--- trunk/xvidcore/src/bitstream/x86_asm/cbp_mmx.asm 2004/08/22 11:46:10 1535 +++ trunk/xvidcore/src/bitstream/x86_asm/cbp_mmx.asm 2008/11/26 01:04:34 1795 @@ -3,7 +3,8 @@ ; * XVID MPEG-4 VIDEO CODEC ; * - MMX CBP computation - ; * -; * Copyright (C) 2001-2003 Peter Ross +; * Copyright (C) 2005 Carlo Bramini +; * 2001-2003 Peter Ross ; * 2002-2003 Pascal Massimino ; * ; * This program is free software ; you can redistribute it and/or modify @@ -20,54 +21,34 @@ ; * along with this program ; if not, write to the Free Software ; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ; * -; * $Id: cbp_mmx.asm,v 1.11 2004-08-22 11:46:09 edgomez Exp $ +; * $Id: cbp_mmx.asm,v 1.17 2008-11-26 01:04:34 Isibaar Exp $ ; * ; ***************************************************************************/ -BITS 32 - ;============================================================================= ; Macros ;============================================================================= -%macro cglobal 1 - %ifdef PREFIX - %ifdef MARK_FUNCS - global _%1:function - %define %1 _%1:function - %else - global _%1 - %define %1 _%1 - %endif - %else - %ifdef MARK_FUNCS - global %1:function - %else - global %1 - %endif - %endif -%endmacro +%include "nasm.inc" ;============================================================================= ; Local data ;============================================================================= -%ifdef FORMAT_COFF -SECTION .rodata -%else -SECTION .rodata align=16 -%endif +DATA -ALIGN 16 +ALIGN SECTION_ALIGN +mult_mask: + db 0x10,0x20,0x04,0x08,0x01,0x02,0x00,0x00 ignore_dc: - dw 0, -1, -1, -1, -1, -1, -1, -1 + dw 0, -1, -1, -1 ;============================================================================= ; Code ;============================================================================= -SECTION .text +SECTION .rotext align=SECTION_ALIGN cglobal calc_cbp_mmx @@ -75,60 +56,78 @@ ; uint32_t calc_cbp_mmx(const int16_t coeff[6][64]); ;----------------------------------------------------------------------------- -ALIGN 16 -calc_cbp_mmx: - push ebx - push esi +%macro MAKE_LOAD 2 + por mm0, [%2-128*1+%1*8] + por mm1, [%2+128*0+%1*8] + por mm2, [%2+128*1+%1*8] + por mm3, [%2+128*2+%1*8] + por mm4, [%2+128*3+%1*8] + por mm5, [%2+128*4+%1*8] +%endmacro - mov esi, [esp + 8 + 4] ; coeff - xor eax, eax ; cbp = 0 - mov edx, (1 << 5) +ALIGN SECTION_ALIGN +calc_cbp_mmx: + mov _EAX, prm1 ; coeff movq mm7, [ignore_dc] - -.loop - movq mm0, [esi] - movq mm1, [esi+8] + pxor mm6, mm6 ; used only for comparing + movq mm0, [_EAX+128*0] + movq mm1, [_EAX+128*1] + movq mm2, [_EAX+128*2] + movq mm3, [_EAX+128*3] + movq mm4, [_EAX+128*4] + movq mm5, [_EAX+128*5] + add _EAX, 8+128 pand mm0, mm7 + pand mm1, mm7 + pand mm2, mm7 + pand mm3, mm7 + pand mm4, mm7 + pand mm5, mm7 + + MAKE_LOAD 0, _EAX + MAKE_LOAD 1, _EAX + MAKE_LOAD 2, _EAX + MAKE_LOAD 3, _EAX + MAKE_LOAD 4, _EAX + MAKE_LOAD 5, _EAX + MAKE_LOAD 6, _EAX + MAKE_LOAD 7, _EAX + MAKE_LOAD 8, _EAX + MAKE_LOAD 9, _EAX + MAKE_LOAD 10, _EAX + MAKE_LOAD 11, _EAX + MAKE_LOAD 12, _EAX + MAKE_LOAD 13, _EAX + MAKE_LOAD 14, _EAX + + movq mm7, [mult_mask] + packssdw mm0, mm1 + packssdw mm2, mm3 + packssdw mm4, mm5 + packssdw mm0, mm2 + packssdw mm4, mm6 + pcmpeqw mm0, mm6 + pcmpeqw mm4, mm6 + pcmpeqw mm0, mm6 + pcmpeqw mm4, mm6 + psrlw mm0, 15 + psrlw mm4, 15 + packuswb mm0, mm4 + pmaddwd mm0, mm7 - por mm0, [esi+16] - por mm1, [esi+24] - - por mm0, [esi+32] - por mm1, [esi+40] - - por mm0, [esi+48] - por mm1, [esi+56] - - por mm0, [esi+64] - por mm1, [esi+72] - - por mm0, [esi+80] - por mm1, [esi+88] - - por mm0, [esi+96] - por mm1, [esi+104] - - por mm0, [esi+112] - por mm1, [esi+120] - - por mm0, mm1 movq mm1, mm0 psrlq mm1, 32 - lea esi, [esi + 128] + paddusb mm0, mm1 - por mm0, mm1 - movd ebx, mm0 - - test ebx, ebx - jz .next - or eax, edx ; cbp |= 1 << (5-i) + movd eax, mm0 + shr _EAX, 8 + and _EAX, 0x3F + ret +ENDFUNC -.next - shr edx,1 - jnc .loop - pop esi - pop ebx +%ifidn __OUTPUT_FORMAT__,elf +section ".note.GNU-stack" noalloc noexec nowrite progbits +%endif - ret