Diff of /trunk/xvidcore/src/motion/x86_asm/sad_xmm.asm

-revision 262, Sun Jul  7 09:45:40 2002 UTC
+revision 458, Tue Sep 10 19:28:01 2002 UTC
 Line 1
- ;/**************************************************************************
+ ;/*****************************************************************************
  ; *
  ; *     XVID MPEG-4 VIDEO CODEC
- ; *     xmm sum of absolute difference
+ ; *  xmm (extended mmx) sum of absolute difference
+ ; *
+ ; *  Copyright(C) 2002 Peter Ross <pross@xvid.org>
+ ; *  Copyright(C) 2002 Michael Militzer <michael@xvid.org>
+ ; *  Copyright(C) 2002 Pascal Massimino <skal@planet-d.net>
+ ; *
+ ; *  This program is an implementation of a part of one or more MPEG-4
+ ; *  Video tools as specified in ISO/IEC 14496-2 standard.  Those intending
+ ; *  to use this software module in hardware or software products are
+ ; *  advised that its use may infringe existing patents or copyrights, and
+ ; *  any such use would be at such party's own risk.  The original
+ ; *  developer of this software module and his/her company, and subsequent
+ ; *  editors and their companies, will have no liability for use of this
+ ; *  software or modifications or derivatives thereof.
  ; *
  ; *     This program is free software; you can redistribute it and/or modify
  ; *     it under the terms of the GNU General Public License as published by
-Line 15
+Line 28
  ; *
  ; *     You should have received a copy of the GNU General Public License
  ; *     along with this program; if not, write to the Free Software
- ; *     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ ; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- ; *
- ; *************************************************************************/
- ;/**************************************************************************
- ; *
- ; *     History:
- ; *
- ; * 23.07.2002  sad8bi_xmm; <pross@xvid.org>
- ; * 04.06.2002  rewrote some funcs (XMM mainly)     -Skal-
- ; * 17.11.2001  bugfix and small improvement for dev16_xmm,
- ; *             removed terminate early in sad16_xmm (Isibaar)
- ; *     12.11.2001      inital version; (c)2001 peter ross <pross@cs.rmit.edu.au>
  ; *
  ; *************************************************************************/
  bits 32
  %macro cglobal 1
          %ifdef PREFIX
                  global _%1
-Line 58
+Line 60
  ;
  ; uint32_t sad16_xmm(const uint8_t * const cur,
  ;                                       const uint8_t * const ref,
+ ;                                       const uint32_t stride,
  ;                                       const uint32_t best_sad);
- cglobal  sad8_xmm
+ ;
  ;===========================================================================
  %macro SAD_16x16_SSE 0
-Line 114
+Line 117
  ;
  ;===========================================================================
+ %macro SAD_8x8_SSE 0
+     movq mm0, [eax]
+     movq mm1, [eax+ecx]
+     psadbw mm0, [edx]
+     psadbw mm1, [edx+ecx]
+     add eax, ebx
+     add edx, ebx
+     paddusw mm5,mm0
+     paddusw mm6,mm1
+ %endmacro
+ align 16
+ sad8_xmm:
+     mov eax, [esp+ 4] ; Src1
+     mov edx, [esp+ 8] ; Src2
+     mov ecx, [esp+12] ; Stride
+     push ebx
+     lea ebx, [ecx+ecx]
+     pxor mm5, mm5 ; accum1
+     pxor mm6, mm6 ; accum2
+     SAD_8x8_SSE
+     SAD_8x8_SSE
+     SAD_8x8_SSE
+     movq mm0, [eax]
+     movq mm1, [eax+ecx]
+     psadbw mm0, [edx]
+     psadbw mm1, [edx+ecx]
+     pop ebx
+     paddusw mm5,mm0
+     paddusw mm6,mm1
+     paddusw mm6,mm5
+     movd eax, mm6
+     ret
+ ;===========================================================================
+ ;
+ ; uint32_t sad16bi_xmm(const uint8_t * const cur,
+ ;                                       const uint8_t * const ref1,
+ ;                                       const uint8_t * const ref2,
+ ;                                       const uint32_t stride);
+ ;
+ ;===========================================================================
  %macro SADBI_16x16_SSE 0
      movq mm0, [eax]
      movq mm1, [eax+8]
-Line 179
+Line 236
  ;
  ;===========================================================================
- ;===========================================================================
+ %macro SADBI_8x8_XMM 0
- ;
- ; uint32_t sad8_xmm(const uint8_t * const cur,
- ;                                       const uint8_t * const ref,
- ;                                       const uint32_t stride);
- ;
- ;===========================================================================
- %macro SAD_8x8_SSE 0
      movq mm0, [eax]
      movq mm1, [eax+ecx]
- %macro MEAN_16x16_SSE 0
-     psadbw mm0, [edx]
-     psadbw mm1, [edx+ecx]
-     add eax, ebx
-     add edx, ebx
+    movq mm2, [edx]
+    movq mm3, [edx+ecx]
+    pavgb mm2, [ebx]
+    lea edx, [edx+2*ecx]
+    pavgb mm3, [ebx+ecx]
+    lea ebx, [ebx+2*ecx]
+    psadbw mm0, mm2
+    lea eax, [eax+2*ecx]
+    psadbw mm1, mm3
      paddusw mm5,mm0
      paddusw mm6,mm1
  %endmacro
  align 16
- sad8_xmm:
+ sad8bi_xmm:
-     mov eax, [esp+ 4] ; Src1
-     mov edx, [esp+ 8] ; Src2
-     mov ecx, [esp+12] ; Stride
      push ebx
-     lea ebx, [ecx+ecx]
+    mov eax, [esp+4+ 4] ; Src
+    mov edx, [esp+4+ 8] ; Ref1
+    mov ebx, [esp+4+12] ; Ref2
+    mov ecx, [esp+4+16] ; Stride
      pxor mm5, mm5 ; accum1
      pxor mm6, mm6 ; accum2
+ .Loop
+    SADBI_8x8_XMM
+    SADBI_8x8_XMM
+    SADBI_8x8_XMM
+    SADBI_8x8_XMM
-     SAD_8x8_SSE
+    paddusw mm6,mm5
-     SAD_8x8_SSE
+    movd eax, mm6
-     SAD_8x8_SSE
-     movq mm0, [eax]
-     movq mm1, [eax+ecx]
-     psadbw mm0, [edx]
-     psadbw mm1, [edx+ecx]
      pop ebx
+    ret
-     paddusw mm5,mm0
-     paddusw mm6,mm1
-     paddusw mm6,mm5
+ ;===========================================================================
-     movd eax, mm6
+ ;
+ ; uint32_t dev16_xmm(const uint8_t * const cur,
+ ;                                       const uint32_t stride);
+ ;
+ ;===========================================================================
-     ret
+ %macro MEAN_16x16_SSE 0
      movq mm0, [eax]
      movq mm1, [eax+8]
      psadbw mm0, mm7

 Legend:



Removed from v.262
 


changed lines


 
Added in v.458
 Legend:



Removed from v.262
 


changed lines


 
Added in v.458
-Removed from v.262
+Added in v.458

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4