21 |
; * along with this program ; if not, write to the Free Software |
; * along with this program ; if not, write to the Free Software |
22 |
; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
23 |
; * |
; * |
24 |
; * $Id: quantize_h263_mmx.asm,v 1.11 2008-11-26 23:35:50 Isibaar Exp $ |
; * $Id: quantize_h263_mmx.asm,v 1.15 2008-12-04 18:30:36 Isibaar Exp $ |
25 |
; * |
; * |
26 |
; ****************************************************************************/ |
; ****************************************************************************/ |
27 |
|
|
90 |
; Code |
; Code |
91 |
;============================================================================= |
;============================================================================= |
92 |
|
|
93 |
SECTION .rotext align=SECTION_ALIGN |
TEXT |
94 |
|
|
95 |
cglobal quant_h263_intra_mmx |
cglobal quant_h263_intra_mmx |
96 |
cglobal quant_h263_intra_sse2 |
cglobal quant_h263_intra_sse2 |
247 |
|
|
248 |
ALIGN SECTION_ALIGN |
ALIGN SECTION_ALIGN |
249 |
quant_h263_intra_sse2: |
quant_h263_intra_sse2: |
250 |
|
PUSH_XMM6_XMM7 |
251 |
mov _EAX, prm2 ; data |
mov _EAX, prm2 ; data |
252 |
|
|
253 |
movsx _EAX, word [_EAX] ; data[0] |
movsx _EAX, word [_EAX] ; data[0] |
363 |
mov TMP1, prm1 ; coeff |
mov TMP1, prm1 ; coeff |
364 |
mov [TMP1],ax |
mov [TMP1],ax |
365 |
xor _EAX,_EAX ; return 0 |
xor _EAX,_EAX ; return 0 |
366 |
|
POP_XMM6_XMM7 |
367 |
ret |
ret |
368 |
ENDFUNC |
ENDFUNC |
369 |
|
|
483 |
|
|
484 |
ALIGN SECTION_ALIGN |
ALIGN SECTION_ALIGN |
485 |
quant_h263_inter_sse2: |
quant_h263_inter_sse2: |
486 |
|
PUSH_XMM6_XMM7 |
487 |
|
|
488 |
mov TMP1, prm1 ; coeff |
mov TMP1, prm1 ; coeff |
489 |
mov _EAX, prm3 ; quant |
mov _EAX, prm3 ; quant |
491 |
pxor xmm5, xmm5 ; sum |
pxor xmm5, xmm5 ; sum |
492 |
|
|
493 |
lea TMP0, [mmx_sub] |
lea TMP0, [mmx_sub] |
494 |
movq mm0, [TMP0 + _EAX*8 - 8] ; sub |
movq xmm6, [TMP0 + _EAX*8 - 8] ; sub |
|
movq2dq xmm6, mm0 ; load into low 8 bytes |
|
495 |
movlhps xmm6, xmm6 ; duplicate into high 8 bytes |
movlhps xmm6, xmm6 ; duplicate into high 8 bytes |
496 |
|
|
497 |
cmp al, 1 |
cmp al, 1 |
499 |
|
|
500 |
.qes2_not1: |
.qes2_not1: |
501 |
lea TMP0, [mmx_div] |
lea TMP0, [mmx_div] |
502 |
movq mm0, [TMP0 + _EAX*8 - 8] ; divider |
movq xmm7, [TMP0 + _EAX*8 - 8] ; divider |
503 |
|
|
504 |
xor TMP0, TMP0 |
xor TMP0, TMP0 |
505 |
mov _EAX, prm2 ; data |
mov _EAX, prm2 ; data |
506 |
|
|
|
movq2dq xmm7, mm0 |
|
507 |
movlhps xmm7, xmm7 |
movlhps xmm7, xmm7 |
508 |
|
|
509 |
ALIGN SECTION_ALIGN |
ALIGN SECTION_ALIGN |
536 |
jnz .qes2_loop |
jnz .qes2_loop |
537 |
|
|
538 |
.qes2_done: |
.qes2_done: |
539 |
movdqu xmm6, [plus_one] |
movdqa xmm6, [plus_one] |
540 |
pmaddwd xmm5, xmm6 |
pmaddwd xmm5, xmm6 |
541 |
movhlps xmm6, xmm5 |
movhlps xmm6, xmm5 |
542 |
paddd xmm5, xmm6 |
paddd xmm5, xmm6 |
548 |
|
|
549 |
movd eax, mm0 ; return sum |
movd eax, mm0 ; return sum |
550 |
|
|
551 |
|
POP_XMM6_XMM7 |
552 |
ret |
ret |
553 |
|
|
554 |
.qes2_q1_routine: |
.qes2_q1_routine: |
771 |
|
|
772 |
ALIGN SECTION_ALIGN |
ALIGN SECTION_ALIGN |
773 |
dequant_h263_intra_sse2: |
dequant_h263_intra_sse2: |
774 |
|
PUSH_XMM6_XMM7 |
775 |
|
|
776 |
mov TMP0, prm3 ; quant |
mov TMP0, prm3 ; quant |
777 |
mov _EAX, prm2 ; coeff |
mov _EAX, prm2 ; coeff |
845 |
mov [TMP1], ax |
mov [TMP1], ax |
846 |
|
|
847 |
xor _EAX, _EAX ; return 0 |
xor _EAX, _EAX ; return 0 |
848 |
|
|
849 |
|
POP_XMM6_XMM7 |
850 |
ret |
ret |
851 |
ENDFUNC |
ENDFUNC |
852 |
|
|
1004 |
|
|
1005 |
ALIGN SECTION_ALIGN |
ALIGN SECTION_ALIGN |
1006 |
dequant_h263_inter_sse2: |
dequant_h263_inter_sse2: |
1007 |
|
PUSH_XMM6_XMM7 |
1008 |
|
|
1009 |
mov TMP0, prm3 ; quant |
mov TMP0, prm3 ; quant |
1010 |
mov _EAX, prm2 ; coeff |
mov _EAX, prm2 ; coeff |
1063 |
jne .loop |
jne .loop |
1064 |
|
|
1065 |
xor _EAX, _EAX ; return 0 |
xor _EAX, _EAX ; return 0 |
1066 |
|
|
1067 |
|
POP_XMM6_XMM7 |
1068 |
ret |
ret |
1069 |
ENDFUNC |
ENDFUNC |
1070 |
|
|