64 |
%endif |
%endif |
65 |
%endmacro |
%endmacro |
66 |
|
|
67 |
|
%macro cextern 1 |
68 |
|
%ifdef PREFIX |
69 |
|
extern _%1 |
70 |
|
%define %1 _%1 |
71 |
|
%else |
72 |
|
extern %1 |
73 |
|
%endif |
74 |
|
%endmacro |
75 |
|
|
76 |
mmx_one times 4 dw 1 |
mmx_one times 4 dw 1 |
77 |
|
|
78 |
;=========================================================================== |
;=========================================================================== |
122 |
|
|
123 |
;=========================================================================== |
;=========================================================================== |
124 |
; |
; |
125 |
; default intra matrix |
; intra matrix |
126 |
; |
; |
127 |
;=========================================================================== |
;=========================================================================== |
128 |
|
|
129 |
mmx_intra_matrix |
cextern intra_matrix |
130 |
dw 8, 17, 18, 19 |
cextern intra_matrix_fix |
|
dw 21, 23, 25, 27 |
|
|
dw 17, 18, 19, 21 |
|
|
dw 23, 25, 27, 28 |
|
|
dw 20, 21, 22, 23 |
|
|
dw 24, 26, 28, 30 |
|
|
dw 21, 22, 23, 24 |
|
|
dw 26, 28, 30, 32 |
|
|
dw 22, 23, 24, 26 |
|
|
dw 28, 30, 32, 35 |
|
|
dw 23, 24, 26, 28 |
|
|
dw 30, 32, 35, 38 |
|
|
dw 25, 26, 28, 30 |
|
|
dw 32, 35, 38, 41 |
|
|
dw 27, 28, 30, 32 |
|
|
dw 35, 38, 41, 45 |
|
|
|
|
|
%macro MMX_FIX 4 |
|
|
dw (1 << 16) / (%1) + 1, (1 << 16) / (%2) + 1, (1 << 16) / (%3) + 1, (1 << 16) / (%4) + 1 |
|
|
%endmacro |
|
|
|
|
|
mmx_intra_matrix_fix |
|
|
MMX_FIX 8, 17, 18, 19 |
|
|
MMX_FIX 21, 23, 25, 27 |
|
|
MMX_FIX 17, 18, 19, 21 |
|
|
MMX_FIX 23, 25, 27, 28 |
|
|
MMX_FIX 20, 21, 22, 23 |
|
|
MMX_FIX 24, 26, 28, 30 |
|
|
MMX_FIX 21, 22, 23, 24 |
|
|
MMX_FIX 26, 28, 30, 32 |
|
|
MMX_FIX 22, 23, 24, 26 |
|
|
MMX_FIX 28, 30, 32, 35 |
|
|
MMX_FIX 23, 24, 26, 28 |
|
|
MMX_FIX 30, 32, 35, 38 |
|
|
MMX_FIX 25, 26, 28, 30 |
|
|
MMX_FIX 32, 35, 38, 41 |
|
|
MMX_FIX 27, 28, 30, 32 |
|
|
MMX_FIX 35, 38, 41, 45 |
|
|
|
|
131 |
|
|
132 |
;=========================================================================== |
;=========================================================================== |
133 |
; |
; |
134 |
; default inter matrix |
; inter matrix |
135 |
; |
; |
136 |
;=========================================================================== |
;=========================================================================== |
137 |
|
|
138 |
mmx_inter_matrix |
cextern inter_matrix |
139 |
dw 16,17,18,19 |
cextern inter_matrix_fix |
140 |
dw 20,21,22,23 |
|
|
dw 17,18,19,20 |
|
|
dw 21,22,23,24 |
|
|
dw 18,19,20,21 |
|
|
dw 22,23,24,25 |
|
|
dw 19,20,21,22 |
|
|
dw 23,24,26,27 |
|
|
dw 20,21,22,23 |
|
|
dw 25,26,27,28 |
|
|
dw 21,22,23,24 |
|
|
dw 26,27,28,30 |
|
|
dw 22,23,24,26 |
|
|
dw 27,28,30,31 |
|
|
dw 23,24,25,27 |
|
|
dw 28,30,31,33 |
|
|
|
|
|
|
|
|
mmx_inter_matrix_fix |
|
|
MMX_FIX 16,17,18,19 |
|
|
MMX_FIX 20,21,22,23 |
|
|
MMX_FIX 17,18,19,20 |
|
|
MMX_FIX 21,22,23,24 |
|
|
MMX_FIX 18,19,20,21 |
|
|
MMX_FIX 22,23,24,25 |
|
|
MMX_FIX 19,20,21,22 |
|
|
MMX_FIX 23,24,26,27 |
|
|
MMX_FIX 20,21,22,23 |
|
|
MMX_FIX 25,26,27,28 |
|
|
MMX_FIX 21,22,23,24 |
|
|
MMX_FIX 26,27,28,30 |
|
|
MMX_FIX 22,23,24,26 |
|
|
MMX_FIX 27,28,30,31 |
|
|
MMX_FIX 23,24,25,27 |
|
|
MMX_FIX 28,30,31,33 |
|
141 |
|
|
142 |
%define VM18P 3 |
%define VM18P 3 |
143 |
%define VM18Q 4 |
%define VM18Q 4 |
144 |
|
|
145 |
|
|
146 |
;=========================================================================== |
;=========================================================================== |
147 |
; |
; |
148 |
; quantd table |
; quantd table |
189 |
|
|
190 |
;=========================================================================== |
;=========================================================================== |
191 |
; |
; |
|
; multiple by matrix table |
|
|
; |
|
|
;=========================================================================== |
|
|
|
|
|
%macro MMX_MUL 4 |
|
|
dw %1 |
|
|
dw %2 |
|
|
dw %3 |
|
|
dw %4 |
|
|
%endmacro |
|
|
|
|
|
default_inter_matrix_mul |
|
|
MMX_MUL 16,17,18,19 |
|
|
MMX_MUL 20,21,22,23 |
|
|
MMX_MUL 17,18,19,20 |
|
|
MMX_MUL 21,22,23,24 |
|
|
MMX_MUL 18,19,20,21 |
|
|
MMX_MUL 22,23,24,25 |
|
|
MMX_MUL 19,20,21,22 |
|
|
MMX_MUL 23,24,26,27 |
|
|
MMX_MUL 20,21,22,23 |
|
|
MMX_MUL 25,26,27,28 |
|
|
MMX_MUL 21,22,23,24 |
|
|
MMX_MUL 26,27,28,30 |
|
|
MMX_MUL 22,23,24,26 |
|
|
MMX_MUL 27,28,30,31 |
|
|
MMX_MUL 23,24,25,27 |
|
|
MMX_MUL 28,30,31,33 |
|
|
|
|
|
|
|
|
default_intra_matrix_mul |
|
|
MMX_MUL 8,17,18,19 |
|
|
MMX_MUL 21,23,25,27 |
|
|
MMX_MUL 17,18,19,21 |
|
|
MMX_MUL 23,25,27,28 |
|
|
MMX_MUL 20,21,22,23 |
|
|
MMX_MUL 24,26,28,30 |
|
|
MMX_MUL 21,22,23,24 |
|
|
MMX_MUL 26,28,30,32 |
|
|
MMX_MUL 22,23,24,26 |
|
|
MMX_MUL 28,30,32,35 |
|
|
MMX_MUL 23,24,26,28 |
|
|
MMX_MUL 30,32,35,38 |
|
|
MMX_MUL 25,26,28,30 |
|
|
MMX_MUL 32,35,38,41 |
|
|
MMX_MUL 27,28,30,32 |
|
|
MMX_MUL 35,38,41,45 |
|
|
|
|
|
|
|
|
;=========================================================================== |
|
|
; |
|
192 |
; multiple by 2Q table |
; multiple by 2Q table |
193 |
; |
; |
194 |
;=========================================================================== |
;=========================================================================== |
293 |
psllw mm0, 4 ; level << 4 |
psllw mm0, 4 ; level << 4 |
294 |
psllw mm3, 4 ; |
psllw mm3, 4 ; |
295 |
|
|
296 |
movq mm2, [mmx_intra_matrix + 8*ecx] |
movq mm2, [intra_matrix + 8*ecx] |
297 |
psrlw mm2, 1 ; intra_matrix[i]>>1 |
psrlw mm2, 1 ; intra_matrix[i]>>1 |
298 |
paddw mm0, mm2 |
paddw mm0, mm2 |
299 |
|
|
300 |
movq mm2, [mmx_intra_matrix_fix + ecx*8] |
movq mm2, [intra_matrix_fix + ecx*8] |
301 |
pmulhw mm0, mm2 ; (level<<4 + intra_matrix[i]>>1) / intra_matrix[i] |
pmulhw mm0, mm2 ; (level<<4 + intra_matrix[i]>>1) / intra_matrix[i] |
302 |
|
|
303 |
movq mm2, [mmx_intra_matrix + 8*ecx + 8] |
movq mm2, [intra_matrix + 8*ecx + 8] |
304 |
psrlw mm2, 1 |
psrlw mm2, 1 |
305 |
paddw mm3, mm2 |
paddw mm3, mm2 |
306 |
|
|
307 |
movq mm2, [mmx_intra_matrix_fix + ecx*8 + 8] |
movq mm2, [intra_matrix_fix + ecx*8 + 8] |
308 |
pmulhw mm3, mm2 |
pmulhw mm3, mm2 |
309 |
|
|
310 |
paddw mm0, mm5 ; + quantd |
paddw mm0, mm5 ; + quantd |
372 |
psllw mm0, 4 |
psllw mm0, 4 |
373 |
psllw mm3, 4 |
psllw mm3, 4 |
374 |
|
|
375 |
movq mm2, [mmx_intra_matrix + 8*ecx] |
movq mm2, [intra_matrix + 8*ecx] |
376 |
psrlw mm2, 1 |
psrlw mm2, 1 |
377 |
paddw mm0, mm2 |
paddw mm0, mm2 |
378 |
|
|
379 |
movq mm2, [mmx_intra_matrix_fix + ecx*8] |
movq mm2, [intra_matrix_fix + ecx*8] |
380 |
pmulhw mm0, mm2 ; (level<<4 + intra_matrix[i]>>1) / intra_matrix[i] |
pmulhw mm0, mm2 ; (level<<4 + intra_matrix[i]>>1) / intra_matrix[i] |
381 |
|
|
382 |
movq mm2, [mmx_intra_matrix + 8*ecx + 8] |
movq mm2, [intra_matrix + 8*ecx + 8] |
383 |
psrlw mm2, 1 |
psrlw mm2, 1 |
384 |
paddw mm3, mm2 |
paddw mm3, mm2 |
385 |
|
|
386 |
movq mm2, [mmx_intra_matrix_fix + ecx*8 + 8] |
movq mm2, [intra_matrix_fix + ecx*8 + 8] |
387 |
pmulhw mm3, mm2 |
pmulhw mm3, mm2 |
388 |
|
|
389 |
paddw mm0, mm5 |
paddw mm0, mm5 |
425 |
psllw mm0, 4 |
psllw mm0, 4 |
426 |
psllw mm3, 4 |
psllw mm3, 4 |
427 |
|
|
428 |
movq mm2, [mmx_intra_matrix + 8*ecx] |
movq mm2, [intra_matrix + 8*ecx] |
429 |
psrlw mm2, 1 |
psrlw mm2, 1 |
430 |
paddw mm0, mm2 |
paddw mm0, mm2 |
431 |
|
|
432 |
movq mm2, [mmx_intra_matrix_fix + ecx*8] |
movq mm2, [intra_matrix_fix + ecx*8] |
433 |
pmulhw mm0, mm2 ; (level<<4 + intra_matrix[i]>>1) / intra_matrix[i] |
pmulhw mm0, mm2 ; (level<<4 + intra_matrix[i]>>1) / intra_matrix[i] |
434 |
|
|
435 |
movq mm2, [mmx_intra_matrix + 8*ecx + 8] |
movq mm2, [intra_matrix + 8*ecx + 8] |
436 |
psrlw mm2, 1 |
psrlw mm2, 1 |
437 |
paddw mm3, mm2 |
paddw mm3, mm2 |
438 |
|
|
439 |
movq mm2, [mmx_intra_matrix_fix + ecx*8 + 8] |
movq mm2, [intra_matrix_fix + ecx*8 + 8] |
440 |
pmulhw mm3, mm2 |
pmulhw mm3, mm2 |
441 |
|
|
442 |
paddw mm0, mm5 |
paddw mm0, mm5 |
507 |
psllw mm0, 4 |
psllw mm0, 4 |
508 |
psllw mm3, 4 |
psllw mm3, 4 |
509 |
|
|
510 |
movq mm2, [mmx_inter_matrix + 8*ecx] |
movq mm2, [inter_matrix + 8*ecx] |
511 |
psrlw mm2, 1 |
psrlw mm2, 1 |
512 |
paddw mm0, mm2 |
paddw mm0, mm2 |
513 |
|
|
514 |
movq mm2, [mmx_inter_matrix_fix + ecx*8] |
movq mm2, [inter_matrix_fix + ecx*8] |
515 |
pmulhw mm0, mm2 ; (level<<4 + inter_matrix[i]>>1) / inter_matrix[i] |
pmulhw mm0, mm2 ; (level<<4 + inter_matrix[i]>>1) / inter_matrix[i] |
516 |
|
|
517 |
movq mm2, [mmx_inter_matrix + 8*ecx + 8] |
movq mm2, [inter_matrix + 8*ecx + 8] |
518 |
psrlw mm2, 1 |
psrlw mm2, 1 |
519 |
paddw mm3, mm2 |
paddw mm3, mm2 |
520 |
|
|
521 |
movq mm2, [mmx_inter_matrix_fix + ecx*8 + 8] |
movq mm2, [inter_matrix_fix + ecx*8 + 8] |
522 |
pmulhw mm3, mm2 |
pmulhw mm3, mm2 |
523 |
|
|
524 |
pmulhw mm0, mm7 ; mm0 = (mm0 / 2Q) >> 16 |
pmulhw mm0, mm7 ; mm0 = (mm0 / 2Q) >> 16 |
571 |
psllw mm0, 4 |
psllw mm0, 4 |
572 |
psllw mm3, 4 |
psllw mm3, 4 |
573 |
|
|
574 |
movq mm2, [mmx_inter_matrix + 8*ecx] |
movq mm2, [inter_matrix + 8*ecx] |
575 |
psrlw mm2, 1 |
psrlw mm2, 1 |
576 |
paddw mm0, mm2 |
paddw mm0, mm2 |
577 |
|
|
578 |
movq mm2, [mmx_inter_matrix_fix + ecx*8] |
movq mm2, [inter_matrix_fix + ecx*8] |
579 |
pmulhw mm0, mm2 ; (level<<4 + inter_matrix[i]>>1) / inter_matrix[i] |
pmulhw mm0, mm2 ; (level<<4 + inter_matrix[i]>>1) / inter_matrix[i] |
580 |
|
|
581 |
movq mm2, [mmx_inter_matrix + 8*ecx + 8] |
movq mm2, [inter_matrix + 8*ecx + 8] |
582 |
psrlw mm2, 1 |
psrlw mm2, 1 |
583 |
paddw mm3, mm2 |
paddw mm3, mm2 |
584 |
|
|
585 |
movq mm2, [mmx_inter_matrix_fix + ecx*8 + 8] |
movq mm2, [inter_matrix_fix + ecx*8 + 8] |
586 |
pmulhw mm3, mm2 |
pmulhw mm3, mm2 |
587 |
|
|
588 |
psrlw mm0, 1 ; mm0 >>= 1 (/2) |
psrlw mm0, 1 ; mm0 >>= 1 (/2) |
624 |
psllw mm0, 4 |
psllw mm0, 4 |
625 |
psllw mm3, 4 |
psllw mm3, 4 |
626 |
|
|
627 |
movq mm2, [mmx_inter_matrix + 8*ecx] |
movq mm2, [inter_matrix + 8*ecx] |
628 |
psrlw mm2, 1 |
psrlw mm2, 1 |
629 |
paddw mm0, mm2 |
paddw mm0, mm2 |
630 |
|
|
631 |
movq mm2, [mmx_inter_matrix_fix + ecx*8] |
movq mm2, [inter_matrix_fix + ecx*8] |
632 |
pmulhw mm0, mm2 ; (level<<4 + inter_matrix[i]>>1) / inter_matrix[i] |
pmulhw mm0, mm2 ; (level<<4 + inter_matrix[i]>>1) / inter_matrix[i] |
633 |
|
|
634 |
movq mm2, [mmx_inter_matrix + 8*ecx + 8] |
movq mm2, [inter_matrix + 8*ecx + 8] |
635 |
psrlw mm2, 1 |
psrlw mm2, 1 |
636 |
paddw mm3, mm2 |
paddw mm3, mm2 |
637 |
|
|
638 |
movq mm2, [mmx_inter_matrix_fix + ecx*8 + 8] |
movq mm2, [inter_matrix_fix + ecx*8 + 8] |
639 |
pmulhw mm3, mm2 |
pmulhw mm3, mm2 |
640 |
|
|
641 |
psrlw mm0, 2 ; mm0 >>= 1 (/2) |
psrlw mm0, 2 ; mm0 >>= 1 (/2) |
697 |
|
|
698 |
pmullw mm0, mm7 ; mm0 *= quant |
pmullw mm0, mm7 ; mm0 *= quant |
699 |
|
|
700 |
movq mm3, [default_intra_matrix_mul + 8*eax] |
movq mm3, [intra_matrix + 8*eax] |
701 |
|
|
702 |
movq mm4, mm0 ; |
movq mm4, mm0 ; |
703 |
pmullw mm0, mm3 ; mm0 = low(mm0 * mm3) |
pmullw mm0, mm3 ; mm0 = low(mm0 * mm3) |
801 |
paddsw mm0, mm6 ; mm0 = 2*mm0 + 1 |
paddsw mm0, mm6 ; mm0 = 2*mm0 + 1 |
802 |
pmullw mm0, mm7 ; mm0 *= quant |
pmullw mm0, mm7 ; mm0 *= quant |
803 |
|
|
804 |
movq mm3, [default_inter_matrix_mul + 8*eax] |
movq mm3, [inter_matrix + 8*eax] |
805 |
|
|
806 |
movq mm4, mm0 |
movq mm4, mm0 |
807 |
pmullw mm0, mm3 ; mm0 = low(mm0 * mm3) |
pmullw mm0, mm3 ; mm0 = low(mm0 * mm3) |