Diff of /trunk/xvidcore/src/utils/x86_asm/mem_transfer_3dne.asm

-revision 1381, Thu Mar 18 16:11:28 2004 UTC
+revision 1382, Mon Mar 22 22:36:25 2004 UTC
 Line 1
- ;/**************************************************************************
+ ;/****************************************************************************
  ; *
  ; *     XVID MPEG-4 VIDEO CODEC
- ; *     mmx 8bit<->16bit transfers
+ ; *  - 8<->16 bit transfer functions -
  ; *
- ; *     This program is an implementation of a part of one or more MPEG-4
+ ; *  Copyright (C) 2002 Jaan Kalda
- ; *     Video tools as specified in ISO/IEC 14496-2 standard.  Those intending
- ; *     to use this software module in hardware or software products are
- ; *     advised that its use may infringe existing patents or copyrights, and
- ; *     any such use would be at such party's own risk.  The original
- ; *     developer of this software module and his/her company, and subsequent
- ; *     editors and their companies, will have no liability for use of this
- ; *     software or modifications or derivatives thereof.
  ; *
  ; *     This program is free software; you can redistribute it and/or modify
  ; *     it under the terms of the GNU General Public License as published by
-Line 24
+Line 17
  ; *
  ; *     You should have received a copy of the GNU General Public License
  ; *     along with this program; if not, write to the Free Software
- ; *     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ ; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  ; *
- ; *************************************************************************/
+ ; * $Id: mem_transfer_3dne.asm,v 1.3 2004-03-22 22:36:24 edgomez Exp $
+ ; *
- ; these 3dne functions are compatible with iSSE, but are optimized specifically for
+ ; ***************************************************************************/
- ; K7 pipelines
- ;
- ;------------------------------------------------------------------------------
- ; 09.12.2002  Athlon optimizations contributed by Jaan Kalda
- ;------------------------------------------------------------------------------
- bits 32
- %ifdef FORMAT_COFF
- section .data data
- %else
- section .data data align=16
- %endif
- align 8
+ ; these 3dne functions are compatible with iSSE, but are optimized specifically
- mm_zero:
+ ; for K7 pipelines
- dd 0,0
+ BITS 32
  %macro cglobal 1
          %ifdef PREFIX
-Line 57
+Line 36
                  global %1
          %endif
  %endmacro
+ ;=============================================================================
+ ; Read only data
+ ;=============================================================================
+ %ifdef FORMAT_COFF
+ SECTION .rodata data
+ %else
+ SECTION .rodata data align=16
+ %endif
+ ALIGN 8
+ mm_zero:
+         dd 0,0
+ ;=============================================================================
+ ; Macros
+ ;=============================================================================
  %macro nop4 0
- DB 08Dh,074h,026h,0
+         db 08Dh, 074h, 026h, 0
  %endmacro
- section .text
+ ;=============================================================================
+ ; Code
+ ;=============================================================================
+ SECTION .text
  cglobal transfer_8to16copy_3dne
  cglobal transfer_16to8copy_3dne
-Line 71
+Line 72
  cglobal transfer_16to8add_3dne
  cglobal transfer8x8_copy_3dne
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; void transfer_8to16copy_3dne(int16_t * const dst,
  ;                                                       const uint8_t * const src,
  ;                                                       uint32_t stride);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- align 16
+ ALIGN 16
  transfer_8to16copy_3dne:
    mov eax, [esp+ 8] ; Src
-Line 141
+Line 142
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; void transfer_16to8copy_3dne(uint8_t * const dst,
  ;                                                       const int16_t * const src,
  ;                                                       uint32_t stride);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- align 16
+ ALIGN 16
  transfer_16to8copy_3dne:
    mov eax, [esp+ 8] ; Src
-Line 185
+Line 186
    movq [ecx+2*edx], mm6
    ret
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; void transfer_8to16sub_3dne(int16_t * const dct,
  ;                               uint8_t * const cur,
  ;                               const uint8_t * const ref,
  ;                               const uint32_t stride);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
- ;/**************************************************************************
- ; *
- ; *     History:
- ; *
- ; * 27.12.2001  renamed from 'compensate' to 'transfer_8to16sub'
- ; * 02.12.2001  loop unrolled, code runs 10% faster now (Isibaar)
- ; * 30.11.2001  16 pixels are processed per iteration (Isibaar)
- ; * 30.11.2001  .text missing
- ; *     06.11.2001      inital version; (c)2001 peter ross <pross@cs.rmit.edu.au>
- ; *
- ; *************************************************************************/
  ; when second argument == 1, reference (ebx) block is to current (eax)
  %macro COPY_8_TO_16_SUB 2
-Line 217
+Line 207
    punpckhbw mm1, mm7
    punpckhbw mm6, mm7
    punpcklbw mm4, mm7
- align 8
+ ALIGN 8
    movq mm2, [byte eax+edx]
    punpcklbw mm0, mm7
    movq mm3, [byte eax+edx]
-Line 248
+Line 238
    movq [edi+%1*32+24], mm3
  %endmacro
- align 16
+ ALIGN 16
  transfer_8to16sub_3dne:
    mov eax, [esp + 8] ; Cur
    mov ecx, [esp +12] ; Ref
-Line 257
+Line 247
    mov edi, [esp+4+ 4] ; Dst
    pxor mm7, mm7
    nop
- align 4
+ ALIGN 4
    COPY_8_TO_16_SUB 0, 1
    COPY_8_TO_16_SUB 1, 1
    COPY_8_TO_16_SUB 2, 1
-Line 265
+Line 255
    mov edi,ecx
    ret
- align 16
+ ALIGN 16
  transfer_8to16subro_3dne:
    mov eax, [esp + 8] ; Cur
    mov ecx, [esp +12] ; Ref
-Line 274
+Line 264
    mov edi, [esp+4+ 4] ; Dst
    pxor mm7, mm7
    nop
- align 4
+ ALIGN 4
    COPY_8_TO_16_SUB 0, 0
    COPY_8_TO_16_SUB 1, 0
    COPY_8_TO_16_SUB 2, 0
-Line 283
+Line 273
    ret
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; void transfer_8to16sub2_3dne(int16_t * const dct,
  ;                               uint8_t * const cur,
-Line 291
+Line 281
  ;                               const uint8_t * ref2,
  ;                               const uint32_t stride)
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  %macro COPY_8_TO_16_SUB2_SSE 1
   db 0Fh, 6Fh, 44h, 20h, 00  ;movq mm0, [byte eax]      ; cur
-Line 305
+Line 295
    movq mm4, [byte ebx]      ; ref1
    pavgb mm4, [byte esi]     ; ref2
+   movq [eax], mm4
    movq mm5, [ebx+edx]  ; ref
    pavgb mm5, [esi+edx] ; ref2
+   movq [eax+edx], mm5
    movq mm6, mm4
    punpcklbw mm4, mm7
    punpckhbw mm6, mm7
-Line 332
+Line 324
    movq [ecx+%1*32+24], mm3
  %endmacro
- align 16
+ ALIGN 16
  transfer_8to16sub2_3dne:
    mov edx, [esp +20] ; Stride
    mov ecx, [esp  + 4] ; Dst
-Line 352
+Line 344
    ret
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; void transfer_16to8add_3dne(uint8_t * const dst,
  ;                                               const int16_t * const src,
  ;                                               uint32_t stride);
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  %macro COPY_16_TO_8_ADD 1
    db 0Fh, 6Fh, 44h, 21h, 00 ;movq mm0, [byte ecx]
-Line 381
+Line 373
  %endmacro
- align 16
+ ALIGN 16
  transfer_16to8add_3dne:
    mov ecx, [esp+ 4] ; Dst
    mov edx, [esp+12] ; Stride
-Line 398
+Line 390
    COPY_16_TO_8_ADD 3
    ret
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  ;
  ; void transfer8x8_copy_3dne(uint8_t * const dst,
  ;                                       const uint8_t * const src,
  ;                                       const uint32_t stride);
  ;
  ;
- ;===========================================================================
+ ;-----------------------------------------------------------------------------
  %macro COPY_8_TO_8 0
    movq mm0, [byte  eax]
-Line 415
+Line 407
    movq [ecx+edx], mm1
  %endmacro
- align 16
+ ALIGN 16
  transfer8x8_copy_3dne:
    mov eax, [esp+ 8] ; Src
    mov edx, [esp+12] ; Stride

 Legend:



Removed from v.1381
 


changed lines


 
Added in v.1382
 Legend:



Removed from v.1381
 


changed lines


 
Added in v.1382
-Removed from v.1381
+Added in v.1382

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4