Diff of /branches/dev-api-4/xvidcore/src/motion/x86_asm/sad_xmm.asm

-trunk/xvidcore/src/motion/x86_asm/sad_xmm.asm
revision 262, Sun Jul  7 09:45:40 2002 UTC
+branches/dev-api-4/xvidcore/src/motion/x86_asm/sad_xmm.asm
revision 886, Fri Feb 21 14:49:29 2003 UTC
 Line 3
  ; *     XVID MPEG-4 VIDEO CODEC
  ; *     xmm sum of absolute difference
  ; *
+ ; *     This program is an implementation of a part of one or more MPEG-4
+ ; *     Video tools as specified in ISO/IEC 14496-2 standard.  Those intending
+ ; *     to use this software module in hardware or software products are
+ ; *     advised that its use may infringe existing patents or copyrights, and
+ ; *     any such use would be at such party's own risk.  The original
+ ; *     developer of this software module and his/her company, and subsequent
+ ; *     editors and their companies, will have no liability for use of this
+ ; *     software or modifications or derivatives thereof.
+ ; *
  ; *     This program is free software; you can redistribute it and/or modify
  ; *     it under the terms of the GNU General Public License as published by
  ; *     the Free Software Foundation; either version 2 of the License, or
-Line 32
+Line 41
  ; *************************************************************************/
  bits 32
  %macro cglobal 1
          %ifdef PREFIX
                  global _%1
-Line 58
+Line 68
  ;
  ; uint32_t sad16_xmm(const uint8_t * const cur,
  ;                                       const uint8_t * const ref,
+ ;                                       const uint32_t stride,
  ;                                       const uint32_t best_sad);
- cglobal  sad8_xmm
+ ;
  ;===========================================================================
  %macro SAD_16x16_SSE 0
-Line 114
+Line 125
  ;
  ;===========================================================================
+ %macro SAD_8x8_SSE 0
+     movq mm0, [eax]
+     movq mm1, [eax+ecx]
+     psadbw mm0, [edx]
+     psadbw mm1, [edx+ecx]
+     add eax, ebx
+     add edx, ebx
+     paddusw mm5,mm0
+     paddusw mm6,mm1
+ %endmacro
+ align 16
+ sad8_xmm:
+     mov eax, [esp+ 4] ; Src1
+     mov edx, [esp+ 8] ; Src2
+     mov ecx, [esp+12] ; Stride
+     push ebx
+     lea ebx, [ecx+ecx]
+     pxor mm5, mm5 ; accum1
+     pxor mm6, mm6 ; accum2
+     SAD_8x8_SSE
+     SAD_8x8_SSE
+     SAD_8x8_SSE
+     movq mm0, [eax]
+     movq mm1, [eax+ecx]
+     psadbw mm0, [edx]
+     psadbw mm1, [edx+ecx]
+     pop ebx
+     paddusw mm5,mm0
+     paddusw mm6,mm1
+     paddusw mm6,mm5
+     movd eax, mm6
+     ret
+ ;===========================================================================
+ ;
+ ; uint32_t sad16bi_xmm(const uint8_t * const cur,
+ ;                                       const uint8_t * const ref1,
+ ;                                       const uint8_t * const ref2,
+ ;                                       const uint32_t stride);
+ ;
+ ;===========================================================================
  %macro SADBI_16x16_SSE 0
      movq mm0, [eax]
      movq mm1, [eax+8]
-Line 179
+Line 244
  ;
  ;===========================================================================
- ;===========================================================================
+ %macro SADBI_8x8_XMM 0
- ;
- ; uint32_t sad8_xmm(const uint8_t * const cur,
- ;                                       const uint8_t * const ref,
- ;                                       const uint32_t stride);
- ;
- ;===========================================================================
- %macro SAD_8x8_SSE 0
      movq mm0, [eax]
      movq mm1, [eax+ecx]
- %macro MEAN_16x16_SSE 0
-     psadbw mm0, [edx]
-     psadbw mm1, [edx+ecx]
-     add eax, ebx
-     add edx, ebx
+    movq mm2, [edx]
+    movq mm3, [edx+ecx]
+    pavgb mm2, [ebx]
+    lea edx, [edx+2*ecx]
+    pavgb mm3, [ebx+ecx]
+    lea ebx, [ebx+2*ecx]
+    psadbw mm0, mm2
+    lea eax, [eax+2*ecx]
+    psadbw mm1, mm3
      paddusw mm5,mm0
      paddusw mm6,mm1
  %endmacro
  align 16
- sad8_xmm:
+ sad8bi_xmm:
-     mov eax, [esp+ 4] ; Src1
-     mov edx, [esp+ 8] ; Src2
-     mov ecx, [esp+12] ; Stride
      push ebx
-     lea ebx, [ecx+ecx]
+    mov eax, [esp+4+ 4] ; Src
+    mov edx, [esp+4+ 8] ; Ref1
+    mov ebx, [esp+4+12] ; Ref2
+    mov ecx, [esp+4+16] ; Stride
      pxor mm5, mm5 ; accum1
      pxor mm6, mm6 ; accum2
+ .Loop
+    SADBI_8x8_XMM
+    SADBI_8x8_XMM
+    SADBI_8x8_XMM
+    SADBI_8x8_XMM
-     SAD_8x8_SSE
+    paddusw mm6,mm5
-     SAD_8x8_SSE
+    movd eax, mm6
-     SAD_8x8_SSE
-     movq mm0, [eax]
-     movq mm1, [eax+ecx]
-     psadbw mm0, [edx]
-     psadbw mm1, [edx+ecx]
      pop ebx
+    ret
-     paddusw mm5,mm0
-     paddusw mm6,mm1
-     paddusw mm6,mm5
+ ;===========================================================================
-     movd eax, mm6
+ ;
+ ; uint32_t dev16_xmm(const uint8_t * const cur,
+ ;                                       const uint32_t stride);
+ ;
+ ;===========================================================================
-     ret
+ %macro MEAN_16x16_SSE 0
      movq mm0, [eax]
      movq mm1, [eax+8]
      psadbw mm0, mm7
-Line 291
+Line 357
      mov eax, [esp+ 4] ; Src
      pxor mm5, mm5 ; sums
      pxor mm6, mm6
-Line 319
+Line 386
      movd eax, mm6
      ret
+ cglobal sad16v_xmm
+ ;===========================================================================
+ ;int sad16v_xmm(const uint8_t * const cur,
+ ;               const uint8_t * const ref,
+ ;               const uint32_t stride,
+ ;               int* sad8);
+ ;===========================================================================
+ align 16
+ sad16v_xmm:
+     push ebx
+     mov eax, [esp+4+ 4] ; Src1
+     mov edx, [esp+4+ 8] ; Src2
+     mov ecx, [esp+4+12] ; Stride
+     mov ebx, [esp+4+16] ; sad ptr
+     pxor mm5, mm5 ; accum1
+     pxor mm6, mm6 ; accum2
+     pxor mm7, mm7 ; total
+     SAD_16x16_SSE
+     SAD_16x16_SSE
+     SAD_16x16_SSE
+     SAD_16x16_SSE
+     SAD_16x16_SSE
+     SAD_16x16_SSE
+     SAD_16x16_SSE
+     SAD_16x16_SSE
+     paddusw mm7, mm5
+     paddusw mm7, mm6
+     movd [ebx], mm5
+     movd [ebx+4], mm6
+     pxor mm5, mm5 ; accum1
+     pxor mm6, mm6 ; accum2
+     SAD_16x16_SSE
+     SAD_16x16_SSE
+     SAD_16x16_SSE
+     SAD_16x16_SSE
+     SAD_16x16_SSE
+     SAD_16x16_SSE
+     SAD_16x16_SSE
+     SAD_16x16_SSE
+     paddusw mm7, mm5
+     paddusw mm7, mm6
+     movd [ebx+8], mm5
+     movd [ebx+12], mm6
+     movd eax, mm7
+     pop ebx
+     ret
+ ;--------

 Legend:



Removed from v.262
 


changed lines


 
Added in v.886
 Legend:



Removed from v.262
 


changed lines


 
Added in v.886
-Removed from v.262
+Added in v.886

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4