Diff of /trunk/xvidcore/src/image/x86_asm/interpolate8x8_3dne.asm

-revision 851, Sat Feb 15 15:22:19 2003 UTC
+revision 1540, Sun Aug 29 10:02:38 2004 UTC
 Line 1
- ;/**************************************************************************
+ ;/*****************************************************************************
  ; *
  ; *     XVID MPEG-4 VIDEO CODEC
- ; *     xmm 8x8 block-based halfpel interpolation
+ ; *  - 3dne pipeline optimized  8x8 block-based halfpel interpolation -
  ; *
- ; *     This program is an implementation of a part of one or more MPEG-4
+ ; *  Copyright(C) 2002 Jaan Kalda
- ; *     Video tools as specified in ISO/IEC 14496-2 standard.  Those intending
- ; *     to use this software module in hardware or software products are
- ; *     advised that its use may infringe existing patents or copyrights, and
- ; *     any such use would be at such party's own risk.  The original
- ; *     developer of this software module and his/her company, and subsequent
- ; *     editors and their companies, will have no liability for use of this
- ; *     software or modifications or derivatives thereof.
  ; *
  ; *     This program is free software; you can redistribute it and/or modify
  ; *     it under the terms of the GNU General Public License as published by
-Line 24
+Line 17
  ; *
  ; *     You should have received a copy of the GNU General Public License
  ; *     along with this program; if not, write to the Free Software
- ; *     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ ; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  ; *
- ; *************************************************************************/
+ ; ****************************************************************************/
- ; these 3dne functions are compatible with iSSE, but are optimized specifically for
+ ; these 3dne functions are compatible with iSSE, but are optimized specifically
- ; K7 pipelines
+ ; for K7 pipelines
- ;
- ;------------------------------------------------------------------------------
- ; 09.12.2002  Athlon optimizations contributed by Jaan Kalda
- ;------------------------------------------------------------------------------
- bits 32
+ BITS 32
  %macro cglobal 1
          %ifdef PREFIX
+                 %ifdef MARK_FUNCS
+                         global _%1:function %1.endfunc-%1
+                         %define %1 _%1:function %1.endfunc-%1
+                 %else
                  global _%1
                  %define %1 _%1
+                 %endif
+         %else
+                 %ifdef MARK_FUNCS
+                         global %1:function %1.endfunc-%1
          %else
                  global %1
          %endif
+         %endif
  %endmacro
- %macro nop4 0
- DB 08Dh,074h,026h,0
+ ;=============================================================================
- %endmacro
+ ; Read only data
+ ;=============================================================================
  %ifdef FORMAT_COFF
- section .data data
+ SECTION .rodata
  %else
- section .data data align=16
+ SECTION .rodata align=16
  %endif
+ ALIGN 16
- align 16
+ mmx_one:
- mmx_one
  times 8 db 1
- align 8
+ ALIGN 8
  mm_minusone:
  dd -1,-1
- section .text
+ ;=============================================================================
+ ; Macros
+ ;=============================================================================
+ %macro nop4 0
+ DB 08Dh,074h,026h,0
+ %endmacro
+ ;=============================================================================
+ ; Macros
+ ;=============================================================================
+ SECTION .text
  cglobal interpolate8x8_halfpel_h_3dne
  cglobal interpolate8x8_halfpel_v_3dne
  cglobal interpolate8x8_halfpel_hv_3dne
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; void interpolate8x8_halfpel_h_3dne(uint8_t * const dst,
  ;                                               const uint8_t * const src,
  ;                                               const uint32_t stride,
  ;                                               const uint32_t rounding);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  %macro COPY_H_SSE_RND0 1
  %if (%1)
-Line 112
+Line 123
          movq [ecx+edx], mm1
  %endmacro
- align 16
+ ALIGN 16
  interpolate8x8_halfpel_h_3dne:
    mov eax, [esp+ 8] ; Src
-Line 143
+Line 154
    lea ecx,[ecx+2*edx]
    COPY_H_SSE_RND1
    ret
+ .endfunc
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; void interpolate8x8_halfpel_v_3dne(uint8_t * const dst,
  ;                                               const uint8_t * const src,
  ;                                               const uint32_t stride,
  ;                                               const uint32_t rounding);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- align 16
+ ALIGN 16
  interpolate8x8_halfpel_v_3dne:
    mov eax, [esp+ 8] ; Src
-Line 201
+Line 213
    movq [ecx+edx],mm7
    ret
- align 8
+ ALIGN 8
  .rounding1
    pcmpeqb mm0,mm0
    psubusb mm0,[eax]
-Line 266
+Line 278
    movq [ecx], mm4
    movq [ecx+edx], mm5
    ret
- ;===========================================================================
+ .endfunc
+ ;-----------------------------------------------------------------------------
  ;
  ; void interpolate8x8_halfpel_hv_3dne(uint8_t * const dst,
  ;                                               const uint8_t * const src,
-Line 274
+Line 288
  ;                                               const uint32_t rounding);
  ;
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ; The trick is to correct the result of 'pavgb' with some combination of the
  ; lsb's of the 4 input values i,j,k,l, and their intermediate 'pavgb' (s and t).
-Line 360
+Line 374
      movq [ecx+edx], mm0
  %endmacro
- align 16
+ ALIGN 16
  interpolate8x8_halfpel_hv_3dne:
    mov eax, [esp+ 8] ; Src
    mov edx, [esp+12] ; stride
-Line 386
+Line 400
    COPY_HV_SSE_RND0
    ret
- align 16
+ ALIGN 16
  .rounding1
    COPY_HV_SSE_RND1
      lea ecx,[ecx+2*edx]
-Line 396
+Line 410
      lea ecx,[ecx+2*edx]
    COPY_HV_SSE_RND1
    ret
+ .endfunc

 Legend:



Removed from v.851
 


changed lines


 
Added in v.1540
 Legend:



Removed from v.851
 


changed lines


 
Added in v.1540
-Removed from v.851
+Added in v.1540

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4