19 |
; * along with this program ; if not, write to the Free Software |
; * along with this program ; if not, write to the Free Software |
20 |
; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
21 |
; * |
; * |
22 |
; * $Id: quantize_h263_3dne.asm,v 1.10 2008-12-04 14:41:50 Isibaar Exp $ |
; * $Id: quantize_h263_3dne.asm,v 1.11 2008-12-04 18:30:36 Isibaar Exp $ |
23 |
; * |
; * |
24 |
; *************************************************************************/ |
; *************************************************************************/ |
25 |
; |
; |
343 |
%endif |
%endif |
344 |
|
|
345 |
push _EBX |
push _EBX |
346 |
mov _EBX, mmzero |
lea _EBX, [mmzero] |
347 |
push _EDI |
push _EDI |
348 |
jz near .q1loop |
jz near .q1loop |
349 |
|
|
351 |
mov _EBP, [_ESP + (4+4)*PTR_SIZE] ; dcscalar |
mov _EBP, [_ESP + (4+4)*PTR_SIZE] ; dcscalar |
352 |
; NB -- there are 3 pushes in the function preambule and one more |
; NB -- there are 3 pushes in the function preambule and one more |
353 |
; in "quant_intra 0", thus an added offset of 16 bytes |
; in "quant_intra 0", thus an added offset of 16 bytes |
354 |
XVID_MOVSX _EAX, word [byte _ECX] ; DC |
movsx _EAX, word [byte _ECX] ; DC |
355 |
|
|
356 |
quant_intra 1 |
quant_intra 1 |
357 |
mov _EDI, _EAX |
mov _EDI, _EAX |
398 |
.q1loop: |
.q1loop: |
399 |
quant_intra1 0 |
quant_intra1 0 |
400 |
mov _EBP, [_ESP + (4+4)*PTR_SIZE] ; dcscalar |
mov _EBP, [_ESP + (4+4)*PTR_SIZE] ; dcscalar |
401 |
XVID_MOVSX _EAX, word [byte _ECX] ; DC |
movsx _EAX, word [byte _ECX] ; DC |
402 |
|
|
403 |
quant_intra1 1 |
quant_intra1 1 |
404 |
mov _EDI, _EAX |
mov _EDI, _EAX |
752 |
lea _EDI, [mmx_mul + _EAX*8 - 8] ; 2*quant |
lea _EDI, [mmx_mul + _EAX*8 - 8] ; 2*quant |
753 |
%endif |
%endif |
754 |
push _EBP |
push _EBP |
755 |
mov _EBX, mmx_2047 |
lea _EBX, [mmx_2047] |
756 |
XVID_MOVSX _EBP, word [_ECX] |
movsx _EBP, word [_ECX] |
757 |
%ifdef ARCH_IS_X86_64 |
%ifdef ARCH_IS_X86_64 |
758 |
lea r9, [mmx_add] |
lea r9, [mmx_add] |
759 |
lea _EAX, [r9 + _EAX*8 - 8] ; quant or quant-1 |
lea _EAX, [r9 + _EAX*8 - 8] ; quant or quant-1 |
761 |
lea _EAX, [mmx_add + _EAX*8 - 8] ; quant or quant-1 |
lea _EAX, [mmx_add + _EAX*8 - 8] ; quant or quant-1 |
762 |
%endif |
%endif |
763 |
push _ESI |
push _ESI |
764 |
mov _ESI, mmzero |
lea _ESI, [mmzero] |
765 |
pxor mm7, mm7 |
pxor mm7, mm7 |
766 |
movq mm3, [_ECX+120] ;B2 ; c = coeff[i] |
movq mm3, [_ECX+120] ;B2 ; c = coeff[i] |
767 |
pcmpeqw mm7, [_ECX+120] ;B6 (c ==0) ? -1 : 0 (1st) |
pcmpeqw mm7, [_ECX+120] ;B6 (c ==0) ? -1 : 0 (1st) |
875 |
%else |
%else |
876 |
lea _EDI, [mmx_mul + _EAX*8 - 8] ; 2*quant |
lea _EDI, [mmx_mul + _EAX*8 - 8] ; 2*quant |
877 |
%endif |
%endif |
878 |
mov _EBX, mmx_2047 |
lea _EBX, [mmx_2047] |
879 |
pxor mm7, mm7 |
pxor mm7, mm7 |
880 |
movq mm3, [_ECX+120] ;B2 ; c = coeff[i] |
movq mm3, [_ECX+120] ;B2 ; c = coeff[i] |
881 |
pcmpeqw mm7, [_ECX+120] ;B6 (c ==0) ? -1 : 0 (1st) |
pcmpeqw mm7, [_ECX+120] ;B6 (c ==0) ? -1 : 0 (1st) |
886 |
lea _EAX, [mmx_add + _EAX*8 - 8] ; quant or quant-1 |
lea _EAX, [mmx_add + _EAX*8 - 8] ; quant or quant-1 |
887 |
%endif |
%endif |
888 |
psubw mm2, mm3 ;-c ;B3 (1st dep) |
psubw mm2, mm3 ;-c ;B3 (1st dep) |
889 |
mov _ESI, mmzero |
lea _ESI, [mmzero] |
890 |
pmaxsw mm2, mm3 ;|c| ;B4 (2nd) |
pmaxsw mm2, mm3 ;|c| ;B4 (2nd) |
891 |
pmullw mm2, [_EDI] ;*= 2Q ;B8 (3rd+) |
pmullw mm2, [_EDI] ;*= 2Q ;B8 (3rd+) |
892 |
psraw mm3, 15 ; sign(c) ;B7 (2nd) |
psraw mm3, 15 ; sign(c) ;B7 (2nd) |