--- trunk/xvidcore/src/plugins/x86_asm/plugin_ssim-a.asm 2008/11/14 15:43:28 1794 +++ trunk/xvidcore/src/plugins/x86_asm/plugin_ssim-a.asm 2008/11/26 01:04:34 1795 @@ -24,40 +24,20 @@ BITS 32 -%macro cglobal 1 - %ifdef PREFIX - %ifdef MARK_FUNCS - global _%1:function %1.endfunc-%1 - %define %1 _%1:function %1.endfunc-%1 - %define ENDFUNC .endfunc - %else - global _%1 - %define %1 _%1 - %define ENDFUNC - %endif - %else - %ifdef MARK_FUNCS - global %1:function %1.endfunc-%1 - %define ENDFUNC .endfunc - %else - global %1 - %define ENDFUNC - %endif - %endif -%endmacro +%include "nasm.inc" %macro ACC_ROW 2 - movq %1,[ ecx] - movq %2,[ecx+edx] + movq %1,[ TMP0] + movq %2,[TMP0+TMP1] psadbw %1,mm0 psadbw %2,mm0 - lea ecx, [ecx+2*edx] + lea TMP0, [TMP0+2*TMP1] paddw %1, %2 %endmacro %macro CONSIM_1x8_SSE2 0 - movdqu xmm0,[ecx] - movdqu xmm1,[edx] + movdqu xmm0,[TMP0] + movdqu xmm1,[TMP1] ;unpack to words punpcklbw xmm0,xmm2 @@ -76,8 +56,8 @@ %endmacro %macro CONSIM_1x8_MMX 0 - movq mm0,[ecx];orig - movq mm1,[edx];comp + movq mm0,[TMP0];orig + movq mm1,[TMP1];comp ;unpack low half of qw to words punpcklbw mm0,mm2 @@ -94,8 +74,8 @@ pmaddwd mm1,mm0 paddd mm7,mm1 - movq mm0,[ecx];orig - movq mm1,[edx];comp + movq mm0,[TMP0];orig + movq mm1,[TMP1];comp ;unpack high half of qw to words punpckhbw mm0,mm2 @@ -114,39 +94,39 @@ %endmacro %macro CONSIM_WRITEOUT 3 - mov eax,[esp + 16];lumo - mul eax; lumo^2 - add eax, 32 - shr eax,6; 64*lum0^2 - movd ecx,%1 - sub ecx,eax - - mov edx,[esp + 24]; pdevo - mov [edx],ecx - - mov eax,[esp + 20];lumc - mul eax; lumc^2 - add eax, 32 - shr eax,6; 64*lumc^2 - movd ecx,%2 - sub ecx,eax - - mov edx,[esp + 28]; pdevc - mov [edx],ecx - - mov eax,[esp + 16];lumo - mul dword [esp + 20]; lumo*lumc, should fit in eax - add eax, 32 - shr eax,6; 64*lumo*lumc - movd ecx,%3 - sub ecx,eax + mov _EAX,prm4;lumo + mul _EAX; lumo^2 + add _EAX, 32 + shr _EAX,6; 64*lum0^2 + movd TMP0d,%1 + sub TMP0,_EAX + + mov TMP1,prm6; pdevo + mov [TMP1],TMP0 + + mov eax,prm5d ;lumc + mul _EAX; lumc^2 + add _EAX, 32 + shr _EAX,6; 64*lumc^2 + movd TMP0d,%2 + sub TMP0,_EAX + + mov TMP1,prm7; pdevc + mov [TMP1],TMP0 + + mov _EAX,prm4;lumo + mul prm5d; lumo*lumc, should fit in _EAX + add _EAX, 32 + shr _EAX,6; 64*lumo*lumc + movd TMP0d,%3 + sub TMP0,_EAX - mov edx,[esp + 32]; pcorr - mov [edx],ecx + mov TMP1,prm8; pcorr + mov [TMP1],TMP0 %endmacro -SECTION .text +SECTION .rotext align=SECTION_ALIGN cglobal lum_8x8_mmx cglobal consim_sse2 @@ -154,10 +134,10 @@ ;int lum_8x8_c(uint8_t* ptr, uint32_t stride) -ALIGN 16 +ALIGN SECTION_ALIGN lum_8x8_mmx: - mov ecx, [esp + 4] ;ptr - mov edx, [esp + 8];stride + mov TMP0, prm1 ;ptr + mov TMP1, prm2 ;stride pxor mm0,mm0 @@ -177,11 +157,11 @@ ret ENDFUNC -ALIGN 16 +ALIGN SECTION_ALIGN consim_sse2: - mov ecx,[esp+4] ;ptro - mov edx,[esp+8] ;ptrc - mov eax,[esp+12];stride + mov TMP0,prm1 ;ptro + mov TMP1,prm2 ;ptrc + mov _EAX, prm3 ;stride pxor xmm2,xmm2;null vektor pxor xmm5,xmm5;devo @@ -197,26 +177,26 @@ pshufd xmm7,xmm7,00000000b CONSIM_1x8_SSE2 - add ecx,eax - add edx,eax + add TMP0,_EAX + add TMP1,_EAX CONSIM_1x8_SSE2 - add ecx,eax - add edx,eax + add TMP0,_EAX + add TMP1,_EAX CONSIM_1x8_SSE2 - add ecx,eax - add edx,eax + add TMP0,_EAX + add TMP1,_EAX CONSIM_1x8_SSE2 - add ecx,eax - add edx,eax + add TMP0,_EAX + add TMP1,_EAX CONSIM_1x8_SSE2 - add ecx,eax - add edx,eax + add TMP0,_EAX + add TMP1,_EAX CONSIM_1x8_SSE2 - add ecx,eax - add edx,eax + add TMP0,_EAX + add TMP1,_EAX CONSIM_1x8_SSE2 - add ecx,eax - add edx,eax + add TMP0,_EAX + add TMP1,_EAX CONSIM_1x8_SSE2 ;accumulate xmm5-7 @@ -243,37 +223,37 @@ -ALIGN 16 +ALIGN SECTION_ALIGN consim_mmx: - mov ecx,[esp+4] ;ptro - mov edx,[esp+8] ;ptrc - mov eax,[esp+12];stride + mov TMP0,prm1 ;ptro + mov TMP1,prm2 ;ptrc + mov _EAX,prm3;stride pxor mm2,mm2;null pxor mm5,mm5;devo pxor mm6,mm6;devc pxor mm7,mm7;corr CONSIM_1x8_MMX - add ecx,eax - add edx,eax + add TMP0,_EAX + add TMP1,_EAX CONSIM_1x8_MMX - add ecx,eax - add edx,eax + add TMP0,_EAX + add TMP1,_EAX CONSIM_1x8_MMX - add ecx,eax - add edx,eax + add TMP0,_EAX + add TMP1,_EAX CONSIM_1x8_MMX - add ecx,eax - add edx,eax + add TMP0,_EAX + add TMP1,_EAX CONSIM_1x8_MMX - add ecx,eax - add edx,eax + add TMP0,_EAX + add TMP1,_EAX CONSIM_1x8_MMX - add ecx,eax - add edx,eax + add TMP0,_EAX + add TMP1,_EAX CONSIM_1x8_MMX - add ecx,eax - add edx,eax + add TMP0,_EAX + add TMP1,_EAX CONSIM_1x8_MMX movq mm0,mm5