Diff of /trunk/xvidcore/src/motion/x86_asm/sad_mmx.asm

-revision 262, Sun Jul  7 09:45:40 2002 UTC
+revision 652, Sun Nov 17 00:35:33 2002 UTC
 Line 1
- ;/**************************************************************************
+ ;/*****************************************************************************
  ; *
  ; *     XVID MPEG-4 VIDEO CODEC
  ; *     mmx sum of absolute difference
  ; *
- ; *     This program is free software; you can redistribute it and/or modify
+ ; *  Copyright(C) 2002 Peter Ross <pross@xvid.org>
- ; *     it under the terms of the GNU General Public License as published by
+ ; *
+ ; *  This file is part of XviD, a free MPEG-4 video encoder/decoder
+ ; *
+ ; *  XviD is free software; you can redistribute it and/or modify it
+ ; *  under the terms of the GNU General Public License as published by
  ; *     the Free Software Foundation; either version 2 of the License, or
  ; *     (at your option) any later version.
  ; *
-Line 15
+Line 19
  ; *
  ; *     You should have received a copy of the GNU General Public License
  ; *     along with this program; if not, write to the Free Software
- ; *     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ ; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  ; *
- ; *************************************************************************/
+ ; *  Under section 8 of the GNU General Public License, the copyright
+ ; *  holders of XVID explicitly forbid distribution in the following
- ;/**************************************************************************
+ ; *  countries:
+ ; *
+ ; *    - Japan
+ ; *    - United States of America
+ ; *
+ ; *  Linking XviD statically or dynamically with other modules is making a
+ ; *  combined work based on XviD.  Thus, the terms and conditions of the
+ ; *  GNU General Public License cover the whole combination.
  ; *
- ; *     History:
+ ; *  As a special exception, the copyright holders of XviD give you
+ ; *  permission to link XviD with independent modules that communicate with
+ ; *  XviD solely through the VFW1.1 and DShow interfaces, regardless of the
+ ; *  license terms of these independent modules, and to copy and distribute
+ ; *  the resulting combined work under terms of your choice, provided that
+ ; *  every copy of the combined work is accompanied by a complete copy of
+ ; *  the source code of XviD (the version of XviD used to produce the
+ ; *  combined work), being distributed under the terms of the GNU General
+ ; *  Public License plus this exception.  An independent module is a module
+ ; *  which is not derived from or based on XviD.
  ; *
- ; * 23.07.2002  sad[16,8]bi_xmm; <pross@xvid.org>
+ ; *  Note that people who make modified versions of XviD are not obligated
- ; * 04.06.2002  cleanup -Skal-
+ ; *  to grant this special exception for their modified versions; it is
- ; *     12.11.2001      inital version; (c)2001 peter ross <pross@cs.rmit.edu.au>
+ ; *  their choice whether to do so.  The GNU General Public License gives
+ ; *  permission to release a modified version without this exception; this
+ ; *  exception also makes it possible to release a modified version which
+ ; *  carries forward this exception.
  ; *
- ; *************************************************************************/
+ ; * $Id: sad_mmx.asm,v 1.10 2002-11-17 00:32:06 edgomez Exp $
+ ; *
+ ; ****************************************************************************/
  bits 32
  %macro cglobal 1
+         %ifdef PREFIX
                  global _%1
                  %define %1 _%1
          %else
-Line 57
+Line 83
  ; uint32_t sad16_mmx(const uint8_t * const cur,
  ;                                        const uint8_t * const ref,
  ;                                        const uint32_t stride,
+ ;                                        const uint32_t best_sad);
+ ;
  ; (early termination ignore; slows this down)
  ;
  ;===========================================================================
-Line 206
+Line 234
  ;
  ; uint32_t sad16bi_mmx(const uint8_t * const cur,
  ; const uint8_t * const ref1,
+ ; const uint8_t * const ref2,
+ ; const uint32_t stride);
+ ;
+ ;===========================================================================
+ %macro SADBI_16x16_MMX 2    ; SADBI_16x16_MMX( int_ptr_offset, bool_increment_ptr );
+    movq mm0, [edx+%1]
+    movq mm2, [ebx+%1]
+    movq mm1, mm0
+    movq mm3, mm2
+ %if %2 != 0
+    add edx, ecx
+ %endif
+    punpcklbw mm0, mm7
+    punpckhbw mm1, mm7
+ punpcklbw mm2, mm7
+ punpckhbw mm3, mm7
+ %if %2 != 0
+    add ebx, ecx
+ %endif
+ paddusw mm0, mm2    ; mm01 = ref1 + ref2
+ paddusw mm1, mm3
+ paddusw mm0, [mmx_one] ; mm01 += 1
+ paddusw mm1, [mmx_one]
+ psrlw mm0, 1     ; mm01 >>= 1
+ psrlw mm1, 1
+    movq mm2, [eax+%1]
+    movq mm3, mm2
+    punpcklbw mm2, mm7          ; mm23 = src
+    punpckhbw mm3, mm7
+ %if %2 != 0
+    add eax, ecx
+ %endif
+    movq mm4, mm0
+    movq mm5, mm1
+    psubusw mm0, mm2
+    psubusw mm1, mm3
+    psubusw mm2, mm4
+    psubusw mm3, mm5
+    por mm0, mm2                ; mm01 = ABS(mm01 - mm23)
+    por mm1, mm3
+    paddusw mm6,mm0             ; mm6 += mm01
+    paddusw mm6,mm1
+ %endmacro
+ align 16
+ sad16bi_mmx:
+    push ebx
+    mov eax, [esp+4+ 4] ; Src
+    mov edx, [esp+4+ 8] ; Ref1
+    mov ebx, [esp+4+12] ; Ref2
+    mov ecx, [esp+4+16] ; Stride
+    pxor mm6, mm6 ; accum2
+ pxor mm7, mm7
+ .Loop
+    SADBI_16x16_MMX 0, 0
+    SADBI_16x16_MMX 8, 1
+    SADBI_16x16_MMX 0, 0
+    SADBI_16x16_MMX 8, 1
+    SADBI_16x16_MMX 0, 0
+    SADBI_16x16_MMX 8, 1
+    SADBI_16x16_MMX 0, 0
+    SADBI_16x16_MMX 8, 1
+    SADBI_16x16_MMX 0, 0
+    SADBI_16x16_MMX 8, 1
+    SADBI_16x16_MMX 0, 0
+    SADBI_16x16_MMX 8, 1
+    SADBI_16x16_MMX 0, 0
+    SADBI_16x16_MMX 8, 1
+    SADBI_16x16_MMX 0, 0
+    SADBI_16x16_MMX 8, 1
+    SADBI_16x16_MMX 0, 0
+    SADBI_16x16_MMX 8, 1
+    SADBI_16x16_MMX 0, 0
+    SADBI_16x16_MMX 8, 1
+    SADBI_16x16_MMX 0, 0
+    SADBI_16x16_MMX 8, 1
+    SADBI_16x16_MMX 0, 0
+    SADBI_16x16_MMX 8, 1
+    SADBI_16x16_MMX 0, 0
+    SADBI_16x16_MMX 8, 1
+    SADBI_16x16_MMX 0, 0
+    SADBI_16x16_MMX 8, 1
+    SADBI_16x16_MMX 0, 0
+    SADBI_16x16_MMX 8, 1
+    SADBI_16x16_MMX 0, 0
+    SADBI_16x16_MMX 8, 1
+    pmaddwd mm6, [mmx_one] ; collapse
+    movq mm7, mm6
+    psrlq mm7, 32
+    paddd mm6, mm7
+    movd eax, mm6
+    pop ebx
+    ret
+ ;===========================================================================
+ ;
+ ; uint32_t sad8bi_mmx(const uint8_t * const cur,
+ ; const uint8_t * const ref1,
+ ; const uint8_t * const ref2,
+ ; const uint32_t stride);
+ ;
+ ;===========================================================================
+ align 16
+ sad8bi_mmx:
+    push ebx
+    mov eax, [esp+4+ 4] ; Src
+    mov edx, [esp+4+ 8] ; Ref1
+    mov ebx, [esp+4+12] ; Ref2
+    mov ecx, [esp+4+16] ; Stride
+    pxor mm6, mm6 ; accum2
+ pxor mm7, mm7
+ .Loop
+    SADBI_16x16_MMX 0, 1
+    SADBI_16x16_MMX 0, 1
+    SADBI_16x16_MMX 0, 1
+    SADBI_16x16_MMX 0, 1
+    SADBI_16x16_MMX 0, 1
+    SADBI_16x16_MMX 0, 1
+    SADBI_16x16_MMX 0, 1
+    SADBI_16x16_MMX 0, 1
+    pmaddwd mm6, [mmx_one] ; collapse
+    movq mm7, mm6
+    psrlq mm7, 32
+    paddd mm6, mm7
+    movd eax, mm6
+    pop ebx
+    ret
+ ;===========================================================================
+ ;
+ ; uint32_t dev16_mmx(const uint8_t * const cur,
+ ;                                       const uint32_t stride);
+ ;
+ ;===========================================================================
+ %macro MEAN_16x16_MMX 0
      movq mm0, [eax]
      movq mm2, [eax+8]
      lea eax,[eax+ecx]
-Line 326
+Line 510
      movd eax, mm6
      ret

 Legend:



Removed from v.262
 


changed lines


 
Added in v.652
 Legend:



Removed from v.262
 


changed lines


 
Added in v.652
-Removed from v.262
+Added in v.652

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4