Diff of /branches/release-1_2-branch/xvidcore/src/bitstream/x86_asm/cbp_sse2.asm

-trunk/xvidcore/src/bitstream/x86_asm/cbp_sse2.asm
revision 851, Sat Feb 15 15:22:19 2003 UTC
+branches/release-1_2-branch/xvidcore/src/bitstream/x86_asm/cbp_sse2.asm
revision 1820, Fri Nov 28 16:54:45 2008 UTC
 Line 1
- ;/**************************************************************************
+ ;/****************************************************************************
  ; *
  ; *     XVID MPEG-4 VIDEO CODEC
- ; *     sse2 cbp calc
+ ; *  - SSE2 CBP computation -
  ; *
- ; *     This program is an implementation of a part of one or more MPEG-4
+ ; *  Copyright (C) 2002 Daniel Smith <danielsmith@astroboymail.com>
- ; *     Video tools as specified in ISO/IEC 14496-2 standard.  Those intending
+ ; *                2002 Pascal Massimino <skal@planet-d.net>
- ; *     to use this software module in hardware or software products are
- ; *     advised that its use may infringe existing patents or copyrights, and
- ; *     any such use would be at such party's own risk.  The original
- ; *     developer of this software module and his/her company, and subsequent
- ; *     editors and their companies, will have no liability for use of this
- ; *     software or modifications or derivatives thereof.
  ; *
  ; *     This program is free software; you can redistribute it and/or modify
  ; *     it under the terms of the GNU General Public License as published by
-Line 24
+Line 18
  ; *
  ; *     You should have received a copy of the GNU General Public License
  ; *     along with this program; if not, write to the Free Software
- ; *     Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ ; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  ; *
- ; *************************************************************************/
+ ; * $Id: cbp_sse2.asm,v 1.10 2008-11-26 01:04:34 Isibaar Exp $
- ;/**************************************************************************
- ; *
- ; *     History:
  ; *
- ; * 14.06.2002  cleanup -Skal-
+ ; ***************************************************************************/
- ; * 24.04.2002  had to use sse2's movdqu instead of movdqa (???)
- ; * 17.04.2002  initial version (c) 2002 Daniel Smith
- ; *
- ; *************************************************************************/
- bits 32
- section .data
- %macro cglobal 1
-         %ifdef PREFIX
-                 global _%1
-                 %define %1 _%1
-         %else
-                 global %1
-         %endif
- %endmacro
- align 16
+ ;=============================================================================
+ ; Macros
+ ;=============================================================================
- ignore_dc       dw              0, -1, -1, -1, -1, -1, -1, -1
+ %include "nasm.inc"
- section .text
+ %macro LOOP_SSE2 2
+   movdqa xmm0, [%2+(%1)*128]
- cglobal calc_cbp_sse2
- ;===========================================================================
- ;
- ; uint32_t calc_cbp_sse2(const int16_t coeff[6][64]);
- ;
- ; not enabled - slower than mmx?
- ;
- ;===========================================================================
- %macro LOOP_SSE2 1
-     movdqa      xmm0, [edx+(%1)*128]
          pand    xmm0, xmm7
-         movdqa  xmm1, [edx+(%1)*128+16]
+   movdqa xmm1, [%2+(%1)*128+16]
-         por             xmm0, [edx+(%1)*128+32]
+   por xmm0, [%2+(%1)*128+32]
-         por             xmm1, [edx+(%1)*128+48]
+   por xmm1, [%2+(%1)*128+48]
-         por             xmm0, [edx+(%1)*128+64]
+   por xmm0, [%2+(%1)*128+64]
-         por             xmm1, [edx+(%1)*128+80]
+   por xmm1, [%2+(%1)*128+80]
-         por             xmm0, [edx+(%1)*128+96]
+   por xmm0, [%2+(%1)*128+96]
-         por             xmm1, [edx+(%1)*128+112]
+   por xmm1, [%2+(%1)*128+112]
          por             xmm0, xmm1     ; xmm0 = xmm1 = 128 bits worth of info
          psadbw  xmm0, xmm6     ; contains 2 dwords with sums
          movhlps xmm1, xmm0     ; move high dword from xmm0 to low xmm1
          por             xmm0, xmm1     ; combine
          movd    ecx, xmm0      ; if ecx set, values were found
-         test    ecx, ecx
+   test _ECX, _ECX
  %endmacro
- align 16
+ ;=============================================================================
+ ; Data (Read Only)
+ ;=============================================================================
+ DATA
+ ALIGN SECTION_ALIGN
+ ignore_dc:
+   dw 0, -1, -1, -1, -1, -1, -1, -1
+ ;=============================================================================
+ ; Code
+ ;=============================================================================
+ SECTION .rotext align=SECTION_ALIGN
+ ;-----------------------------------------------------------------------------
+ ; uint32_t calc_cbp_sse2(const int16_t coeff[6*64]);
+ ;-----------------------------------------------------------------------------
+ ALIGN SECTION_ALIGN
+ cglobal calc_cbp_sse2
  calc_cbp_sse2:
-     mov     edx, [esp+4]        ; coeff[]
+   mov _EDX, prm1           ; coeff[]
-     xor         eax, eax                    ; cbp = 0
+   xor _EAX, _EAX           ; cbp = 0
          movdqu  xmm7, [ignore_dc]       ; mask to ignore dc value
          pxor    xmm6, xmm6          ; zero
-   LOOP_SSE2 0
+   LOOP_SSE2 0, _EDX
-         test ecx, ecx
          jz              .blk2
-     or eax, (1<<5)
+   or _EAX, (1<<5)
- .blk2
-   LOOP_SSE2 1
+ .blk2:
-         test ecx, ecx
+   LOOP_SSE2 1, _EDX
          jz              .blk3
-   or eax, (1<<4)
+   or _EAX, (1<<4)
- .blk3
-   LOOP_SSE2 2
+ .blk3:
-         test ecx, ecx
+   LOOP_SSE2 2, _EDX
          jz              .blk4
-   or eax, (1<<3)
+   or _EAX, (1<<3)
- .blk4
-   LOOP_SSE2 3
+ .blk4:
-         test ecx, ecx
+   LOOP_SSE2 3, _EDX
          jz              .blk5
-   or eax, (1<<2)
+   or _EAX, (1<<2)
- .blk5
-   LOOP_SSE2 4
+ .blk5:
-         test ecx, ecx
+   LOOP_SSE2 4, _EDX
          jz              .blk6
-   or eax, (1<<1)
+   or _EAX, (1<<1)
- .blk6
-   LOOP_SSE2 5
+ .blk6:
-         test ecx, ecx
+   LOOP_SSE2 5, _EDX
          jz              .finished
-   or eax, (1<<0)
+   or _EAX, (1<<0)
- .finished
+ .finished:
    ret
+ ENDFUNC
+ %ifidn __OUTPUT_FORMAT__,elf
+ section ".note.GNU-stack" noalloc noexec nowrite progbits
+ %endif

 Legend:



Removed from v.851
 


changed lines


 
Added in v.1820
 Legend:



Removed from v.851
 


changed lines


 
Added in v.1820
-Removed from v.851
+Added in v.1820

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4