Diff of /trunk/xvidcore/src/motion/x86_asm/sad_xmm.asm

-revision 262, Sun Jul  7 09:45:40 2002 UTC
+revision 652, Sun Nov 17 00:35:33 2002 UTC
 Line 1
- ;/**************************************************************************
+ ;/*****************************************************************************
  ; *
  ; *     XVID MPEG-4 VIDEO CODEC
- ; *     xmm sum of absolute difference
+ ; *  xmm (extended mmx) sum of absolute difference
  ; *
- ; *     This program is free software; you can redistribute it and/or modify
+ ; *  Copyright(C) 2002 Peter Ross <pross@xvid.org>
- ; *     it under the terms of the GNU General Public License as published by
+ ; *  Copyright(C) 2002 Michael Militzer <michael@xvid.org>
+ ; *  Copyright(C) 2002 Pascal Massimino <skal@planet-d.net>
+ ; *
+ ; *  This file is part of XviD, a free MPEG-4 video encoder/decoder
+ ; *
+ ; *  XviD is free software; you can redistribute it and/or modify it
+ ; *  under the terms of the GNU General Public License as published by
  ; *     the Free Software Foundation; either version 2 of the License, or
  ; *     (at your option) any later version.
  ; *
-Line 15
+Line 21
  ; *
  ; *     You should have received a copy of the GNU General Public License
  ; *     along with this program; if not, write to the Free Software
- ; *     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ ; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  ; *
- ; *************************************************************************/
+ ; *  Under section 8 of the GNU General Public License, the copyright
+ ; *  holders of XVID explicitly forbid distribution in the following
- ;/**************************************************************************
+ ; *  countries:
  ; *
- ; *     History:
+ ; *    - Japan
+ ; *    - United States of America
  ; *
- ; * 23.07.2002  sad8bi_xmm; <pross@xvid.org>
+ ; *  Linking XviD statically or dynamically with other modules is making a
- ; * 04.06.2002  rewrote some funcs (XMM mainly)     -Skal-
+ ; *  combined work based on XviD.  Thus, the terms and conditions of the
- ; * 17.11.2001  bugfix and small improvement for dev16_xmm,
+ ; *  GNU General Public License cover the whole combination.
- ; *             removed terminate early in sad16_xmm (Isibaar)
+ ; *
- ; *     12.11.2001      inital version; (c)2001 peter ross <pross@cs.rmit.edu.au>
+ ; *  As a special exception, the copyright holders of XviD give you
+ ; *  permission to link XviD with independent modules that communicate with
+ ; *  XviD solely through the VFW1.1 and DShow interfaces, regardless of the
+ ; *  license terms of these independent modules, and to copy and distribute
+ ; *  the resulting combined work under terms of your choice, provided that
+ ; *  every copy of the combined work is accompanied by a complete copy of
+ ; *  the source code of XviD (the version of XviD used to produce the
+ ; *  combined work), being distributed under the terms of the GNU General
+ ; *  Public License plus this exception.  An independent module is a module
+ ; *  which is not derived from or based on XviD.
+ ; *
+ ; *  Note that people who make modified versions of XviD are not obligated
+ ; *  to grant this special exception for their modified versions; it is
+ ; *  their choice whether to do so.  The GNU General Public License gives
+ ; *  permission to release a modified version without this exception; this
+ ; *  exception also makes it possible to release a modified version which
+ ; *  carries forward this exception.
+ ; *
+ ; * $Id: sad_xmm.asm,v 1.5 2002-11-17 00:32:06 edgomez Exp $
  ; *
  ; *************************************************************************/
  bits 32
  %macro cglobal 1
          %ifdef PREFIX
                  global _%1
-Line 58
+Line 84
  ;
  ; uint32_t sad16_xmm(const uint8_t * const cur,
  ;                                       const uint8_t * const ref,
+ ;                                       const uint32_t stride,
  ;                                       const uint32_t best_sad);
- cglobal  sad8_xmm
+ ;
  ;===========================================================================
  %macro SAD_16x16_SSE 0
-Line 114
+Line 141
  ;
  ;===========================================================================
+ %macro SAD_8x8_SSE 0
+     movq mm0, [eax]
+     movq mm1, [eax+ecx]
+     psadbw mm0, [edx]
+     psadbw mm1, [edx+ecx]
+     add eax, ebx
+     add edx, ebx
+     paddusw mm5,mm0
+     paddusw mm6,mm1
+ %endmacro
+ align 16
+ sad8_xmm:
+     mov eax, [esp+ 4] ; Src1
+     mov edx, [esp+ 8] ; Src2
+     mov ecx, [esp+12] ; Stride
+     push ebx
+     lea ebx, [ecx+ecx]
+     pxor mm5, mm5 ; accum1
+     pxor mm6, mm6 ; accum2
+     SAD_8x8_SSE
+     SAD_8x8_SSE
+     SAD_8x8_SSE
+     movq mm0, [eax]
+     movq mm1, [eax+ecx]
+     psadbw mm0, [edx]
+     psadbw mm1, [edx+ecx]
+     pop ebx
+     paddusw mm5,mm0
+     paddusw mm6,mm1
+     paddusw mm6,mm5
+     movd eax, mm6
+     ret
+ ;===========================================================================
+ ;
+ ; uint32_t sad16bi_xmm(const uint8_t * const cur,
+ ;                                       const uint8_t * const ref1,
+ ;                                       const uint8_t * const ref2,
+ ;                                       const uint32_t stride);
+ ;
+ ;===========================================================================
  %macro SADBI_16x16_SSE 0
      movq mm0, [eax]
      movq mm1, [eax+8]
-Line 179
+Line 260
  ;
  ;===========================================================================
- ;===========================================================================
+ %macro SADBI_8x8_XMM 0
- ;
- ; uint32_t sad8_xmm(const uint8_t * const cur,
- ;                                       const uint8_t * const ref,
- ;                                       const uint32_t stride);
- ;
- ;===========================================================================
- %macro SAD_8x8_SSE 0
      movq mm0, [eax]
      movq mm1, [eax+ecx]
- %macro MEAN_16x16_SSE 0
-     psadbw mm0, [edx]
-     psadbw mm1, [edx+ecx]
-     add eax, ebx
-     add edx, ebx
+    movq mm2, [edx]
+    movq mm3, [edx+ecx]
+    pavgb mm2, [ebx]
+    lea edx, [edx+2*ecx]
+    pavgb mm3, [ebx+ecx]
+    lea ebx, [ebx+2*ecx]
+    psadbw mm0, mm2
+    lea eax, [eax+2*ecx]
+    psadbw mm1, mm3
      paddusw mm5,mm0
      paddusw mm6,mm1
  %endmacro
  align 16
- sad8_xmm:
+ sad8bi_xmm:
-     mov eax, [esp+ 4] ; Src1
-     mov edx, [esp+ 8] ; Src2
-     mov ecx, [esp+12] ; Stride
      push ebx
-     lea ebx, [ecx+ecx]
+    mov eax, [esp+4+ 4] ; Src
+    mov edx, [esp+4+ 8] ; Ref1
+    mov ebx, [esp+4+12] ; Ref2
+    mov ecx, [esp+4+16] ; Stride
      pxor mm5, mm5 ; accum1
      pxor mm6, mm6 ; accum2
+ .Loop
+    SADBI_8x8_XMM
+    SADBI_8x8_XMM
+    SADBI_8x8_XMM
+    SADBI_8x8_XMM
-     SAD_8x8_SSE
+    paddusw mm6,mm5
-     SAD_8x8_SSE
+    movd eax, mm6
-     SAD_8x8_SSE
-     movq mm0, [eax]
-     movq mm1, [eax+ecx]
-     psadbw mm0, [edx]
-     psadbw mm1, [edx+ecx]
      pop ebx
+    ret
-     paddusw mm5,mm0
-     paddusw mm6,mm1
-     paddusw mm6,mm5
+ ;===========================================================================
-     movd eax, mm6
+ ;
+ ; uint32_t dev16_xmm(const uint8_t * const cur,
+ ;                                       const uint32_t stride);
+ ;
+ ;===========================================================================
-     ret
+ %macro MEAN_16x16_SSE 0
      movq mm0, [eax]
      movq mm1, [eax+8]
      psadbw mm0, mm7

 Legend:



Removed from v.262
 


changed lines


 
Added in v.652
 Legend:



Removed from v.262
 


changed lines


 
Added in v.652
-Removed from v.262
+Added in v.652

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4