21 |
; * along with this program; if not, write to the Free Software |
; * along with this program; if not, write to the Free Software |
22 |
; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
23 |
; * |
; * |
24 |
; * $Id: qpel_mmx.asm,v 1.3 2008-08-19 09:06:48 Isibaar Exp $ |
; * $Id: qpel_mmx.asm,v 1.4 2008-11-11 20:46:24 Isibaar Exp $ |
25 |
; * |
; * |
26 |
; *************************************************************************/ |
; *************************************************************************/ |
27 |
|
|
47 |
%ifdef MARK_FUNCS |
%ifdef MARK_FUNCS |
48 |
global _%1:function %1.endfunc-%1 |
global _%1:function %1.endfunc-%1 |
49 |
%define %1 _%1:function %1.endfunc-%1 |
%define %1 _%1:function %1.endfunc-%1 |
50 |
|
%define ENDFUNC .endfunc |
51 |
%else |
%else |
52 |
global _%1 |
global _%1 |
53 |
%define %1 _%1 |
%define %1 _%1 |
54 |
|
%define ENDFUNC |
55 |
%endif |
%endif |
56 |
%else |
%else |
57 |
%ifdef MARK_FUNCS |
%ifdef MARK_FUNCS |
58 |
global %1:function %1.endfunc-%1 |
global %1:function %1.endfunc-%1 |
59 |
|
%define ENDFUNC .endfunc |
60 |
%else |
%else |
61 |
global %1 |
global %1 |
62 |
|
%define ENDFUNC |
63 |
%endif |
%endif |
64 |
%endif |
%endif |
65 |
%endmacro |
%endmacro |
124 |
SECTION .data align=32 |
SECTION .data align=32 |
125 |
xvid_Expand_mmx: |
xvid_Expand_mmx: |
126 |
times 256*4 dw 0 ; uint16_t xvid_Expand_mmx[256][4] |
times 256*4 dw 0 ; uint16_t xvid_Expand_mmx[256][4] |
127 |
.endfunc |
ENDFUNC |
128 |
|
|
129 |
xvid_FIR_1_0_0_0: |
xvid_FIR_1_0_0_0: |
130 |
times 256*4 dw 0 |
times 256*4 dw 0 |
131 |
.endfunc |
ENDFUNC |
132 |
|
|
133 |
xvid_FIR_3_1_0_0: |
xvid_FIR_3_1_0_0: |
134 |
times 256*4 dw 0 |
times 256*4 dw 0 |
135 |
.endfunc |
ENDFUNC |
136 |
|
|
137 |
xvid_FIR_6_3_1_0: |
xvid_FIR_6_3_1_0: |
138 |
times 256*4 dw 0 |
times 256*4 dw 0 |
139 |
.endfunc |
ENDFUNC |
140 |
|
|
141 |
xvid_FIR_14_3_2_1: |
xvid_FIR_14_3_2_1: |
142 |
times 256*4 dw 0 |
times 256*4 dw 0 |
143 |
.endfunc |
ENDFUNC |
144 |
|
|
145 |
xvid_FIR_20_6_3_1: |
xvid_FIR_20_6_3_1: |
146 |
times 256*4 dw 0 |
times 256*4 dw 0 |
147 |
.endfunc |
ENDFUNC |
148 |
|
|
149 |
xvid_FIR_20_20_6_3: |
xvid_FIR_20_20_6_3: |
150 |
times 256*4 dw 0 |
times 256*4 dw 0 |
151 |
.endfunc |
ENDFUNC |
152 |
|
|
153 |
xvid_FIR_23_19_6_3: |
xvid_FIR_23_19_6_3: |
154 |
times 256*4 dw 0 |
times 256*4 dw 0 |
155 |
.endfunc |
ENDFUNC |
156 |
|
|
157 |
xvid_FIR_7_20_20_6: |
xvid_FIR_7_20_20_6: |
158 |
times 256*4 dw 0 |
times 256*4 dw 0 |
159 |
.endfunc |
ENDFUNC |
160 |
|
|
161 |
xvid_FIR_6_20_20_6: |
xvid_FIR_6_20_20_6: |
162 |
times 256*4 dw 0 |
times 256*4 dw 0 |
163 |
.endfunc |
ENDFUNC |
164 |
|
|
165 |
xvid_FIR_6_20_20_7: |
xvid_FIR_6_20_20_7: |
166 |
times 256*4 dw 0 |
times 256*4 dw 0 |
167 |
.endfunc |
ENDFUNC |
168 |
|
|
169 |
xvid_FIR_3_6_20_20: |
xvid_FIR_3_6_20_20: |
170 |
times 256*4 dw 0 |
times 256*4 dw 0 |
171 |
.endfunc |
ENDFUNC |
172 |
|
|
173 |
xvid_FIR_3_6_19_23: |
xvid_FIR_3_6_19_23: |
174 |
times 256*4 dw 0 |
times 256*4 dw 0 |
175 |
.endfunc |
ENDFUNC |
176 |
|
|
177 |
xvid_FIR_1_3_6_20: |
xvid_FIR_1_3_6_20: |
178 |
times 256*4 dw 0 |
times 256*4 dw 0 |
179 |
.endfunc |
ENDFUNC |
180 |
|
|
181 |
xvid_FIR_1_2_3_14: |
xvid_FIR_1_2_3_14: |
182 |
times 256*4 dw 0 |
times 256*4 dw 0 |
183 |
.endfunc |
ENDFUNC |
184 |
|
|
185 |
xvid_FIR_0_1_3_6: |
xvid_FIR_0_1_3_6: |
186 |
times 256*4 dw 0 |
times 256*4 dw 0 |
187 |
.endfunc |
ENDFUNC |
188 |
|
|
189 |
xvid_FIR_0_0_1_3: |
xvid_FIR_0_0_1_3: |
190 |
times 256*4 dw 0 |
times 256*4 dw 0 |
191 |
.endfunc |
ENDFUNC |
192 |
|
|
193 |
xvid_FIR_0_0_0_1: |
xvid_FIR_0_0_0_1: |
194 |
times 256*4 dw 0 |
times 256*4 dw 0 |
195 |
.endfunc |
ENDFUNC |
196 |
|
|
197 |
;////////////////////////////////////////////////////////////////////// |
;////////////////////////////////////////////////////////////////////// |
198 |
|
|
209 |
times 4 dw 0 |
times 4 dw 0 |
210 |
|
|
211 |
align 16 |
align 16 |
212 |
Rounder_QP_MMX |
Rounder_QP_MMX: |
213 |
times 4 dw 16 |
times 4 dw 16 |
214 |
times 4 dw 15 |
times 4 dw 15 |
215 |
|
|
427 |
PROLOG_AVRG |
PROLOG_AVRG |
428 |
%endif |
%endif |
429 |
|
|
430 |
.Loop |
.Loop: |
431 |
|
|
432 |
; mm0..mm3 serves as a 4x4 delay line |
; mm0..mm3 serves as a 4x4 delay line |
433 |
|
|
538 |
PROLOG_AVRG |
PROLOG_AVRG |
539 |
%endif |
%endif |
540 |
|
|
541 |
.Loop |
.Loop: |
542 |
; mm0..mm3 serves as a 4x4 delay line |
; mm0..mm3 serves as a 4x4 delay line |
543 |
|
|
544 |
%ifndef USE_TABLES |
%ifndef USE_TABLES |
655 |
|
|
656 |
xvid_H_Pass_16_x86_64: |
xvid_H_Pass_16_x86_64: |
657 |
H_PASS_16 0, 0 |
H_PASS_16 0, 0 |
658 |
.endfunc |
ENDFUNC |
659 |
xvid_H_Pass_Avrg_16_x86_64: |
xvid_H_Pass_Avrg_16_x86_64: |
660 |
H_PASS_16 1, 0 |
H_PASS_16 1, 0 |
661 |
.endfunc |
ENDFUNC |
662 |
xvid_H_Pass_Avrg_Up_16_x86_64: |
xvid_H_Pass_Avrg_Up_16_x86_64: |
663 |
H_PASS_16 2, 0 |
H_PASS_16 2, 0 |
664 |
.endfunc |
ENDFUNC |
665 |
|
|
666 |
;////////////////////////////////////////////////////////////////////// |
;////////////////////////////////////////////////////////////////////// |
667 |
;// 8x? copy Functions |
;// 8x? copy Functions |
668 |
|
|
669 |
xvid_H_Pass_8_x86_64: |
xvid_H_Pass_8_x86_64: |
670 |
H_PASS_8 0, 0 |
H_PASS_8 0, 0 |
671 |
.endfunc |
ENDFUNC |
672 |
xvid_H_Pass_Avrg_8_x86_64: |
xvid_H_Pass_Avrg_8_x86_64: |
673 |
H_PASS_8 1, 0 |
H_PASS_8 1, 0 |
674 |
.endfunc |
ENDFUNC |
675 |
xvid_H_Pass_Avrg_Up_8_x86_64: |
xvid_H_Pass_Avrg_Up_8_x86_64: |
676 |
H_PASS_8 2, 0 |
H_PASS_8 2, 0 |
677 |
.endfunc |
ENDFUNC |
678 |
|
|
679 |
;////////////////////////////////////////////////////////////////////// |
;////////////////////////////////////////////////////////////////////// |
680 |
;// 16x? avrg Functions |
;// 16x? avrg Functions |
681 |
|
|
682 |
xvid_H_Pass_Add_16_x86_64: |
xvid_H_Pass_Add_16_x86_64: |
683 |
H_PASS_16 0, 1 |
H_PASS_16 0, 1 |
684 |
.endfunc |
ENDFUNC |
685 |
xvid_H_Pass_Avrg_Add_16_x86_64: |
xvid_H_Pass_Avrg_Add_16_x86_64: |
686 |
H_PASS_16 1, 1 |
H_PASS_16 1, 1 |
687 |
.endfunc |
ENDFUNC |
688 |
xvid_H_Pass_Avrg_Up_Add_16_x86_64: |
xvid_H_Pass_Avrg_Up_Add_16_x86_64: |
689 |
H_PASS_16 2, 1 |
H_PASS_16 2, 1 |
690 |
.endfunc |
ENDFUNC |
691 |
|
|
692 |
;////////////////////////////////////////////////////////////////////// |
;////////////////////////////////////////////////////////////////////// |
693 |
;// 8x? avrg Functions |
;// 8x? avrg Functions |
694 |
|
|
695 |
xvid_H_Pass_8_Add_x86_64: |
xvid_H_Pass_8_Add_x86_64: |
696 |
H_PASS_8 0, 1 |
H_PASS_8 0, 1 |
697 |
.endfunc |
ENDFUNC |
698 |
xvid_H_Pass_Avrg_8_Add_x86_64: |
xvid_H_Pass_Avrg_8_Add_x86_64: |
699 |
H_PASS_8 1, 1 |
H_PASS_8 1, 1 |
700 |
.endfunc |
ENDFUNC |
701 |
xvid_H_Pass_Avrg_Up_8_Add_x86_64: |
xvid_H_Pass_Avrg_Up_8_Add_x86_64: |
702 |
H_PASS_8 2, 1 |
H_PASS_8 2, 1 |
703 |
.endfunc |
ENDFUNC |
704 |
|
|
705 |
|
|
706 |
;////////////////////////////////////////////////////////////////////// |
;////////////////////////////////////////////////////////////////////// |
799 |
; the size (3rd argument) is meant to be a multiple of 4 |
; the size (3rd argument) is meant to be a multiple of 4 |
800 |
; mm0..mm3 serves as a 4x4 delay line |
; mm0..mm3 serves as a 4x4 delay line |
801 |
|
|
802 |
.Loop |
.Loop: |
803 |
|
|
804 |
push rdi |
push rdi |
805 |
push rsi ; esi is preserved for src-mixing |
push rsi ; esi is preserved for src-mixing |
1006 |
; we process one stripe of 4x8 pixel each time |
; we process one stripe of 4x8 pixel each time |
1007 |
; the size (3rd argument) is meant to be a multiple of 4 |
; the size (3rd argument) is meant to be a multiple of 4 |
1008 |
; mm0..mm3 serves as a 4x4 delay line |
; mm0..mm3 serves as a 4x4 delay line |
1009 |
.Loop |
.Loop: |
1010 |
|
|
1011 |
push rdi |
push rdi |
1012 |
push rsi ; esi is preserved for src-mixing |
push rsi ; esi is preserved for src-mixing |
1108 |
|
|
1109 |
xvid_V_Pass_16_x86_64: |
xvid_V_Pass_16_x86_64: |
1110 |
V_PASS_16 0, 0 |
V_PASS_16 0, 0 |
1111 |
.endfunc |
ENDFUNC |
1112 |
xvid_V_Pass_Avrg_16_x86_64: |
xvid_V_Pass_Avrg_16_x86_64: |
1113 |
V_PASS_16 1, 0 |
V_PASS_16 1, 0 |
1114 |
.endfunc |
ENDFUNC |
1115 |
xvid_V_Pass_Avrg_Up_16_x86_64: |
xvid_V_Pass_Avrg_Up_16_x86_64: |
1116 |
V_PASS_16 2, 0 |
V_PASS_16 2, 0 |
1117 |
.endfunc |
ENDFUNC |
1118 |
|
|
1119 |
;////////////////////////////////////////////////////////////////////// |
;////////////////////////////////////////////////////////////////////// |
1120 |
;// 8x? copy Functions |
;// 8x? copy Functions |
1121 |
|
|
1122 |
xvid_V_Pass_8_x86_64: |
xvid_V_Pass_8_x86_64: |
1123 |
V_PASS_8 0, 0 |
V_PASS_8 0, 0 |
1124 |
.endfunc |
ENDFUNC |
1125 |
xvid_V_Pass_Avrg_8_x86_64: |
xvid_V_Pass_Avrg_8_x86_64: |
1126 |
V_PASS_8 1, 0 |
V_PASS_8 1, 0 |
1127 |
.endfunc |
ENDFUNC |
1128 |
xvid_V_Pass_Avrg_Up_8_x86_64: |
xvid_V_Pass_Avrg_Up_8_x86_64: |
1129 |
V_PASS_8 2, 0 |
V_PASS_8 2, 0 |
1130 |
.endfunc |
ENDFUNC |
1131 |
|
|
1132 |
;////////////////////////////////////////////////////////////////////// |
;////////////////////////////////////////////////////////////////////// |
1133 |
;// 16x? avrg Functions |
;// 16x? avrg Functions |
1134 |
|
|
1135 |
xvid_V_Pass_Add_16_x86_64: |
xvid_V_Pass_Add_16_x86_64: |
1136 |
V_PASS_16 0, 1 |
V_PASS_16 0, 1 |
1137 |
.endfunc |
ENDFUNC |
1138 |
xvid_V_Pass_Avrg_Add_16_x86_64: |
xvid_V_Pass_Avrg_Add_16_x86_64: |
1139 |
V_PASS_16 1, 1 |
V_PASS_16 1, 1 |
1140 |
.endfunc |
ENDFUNC |
1141 |
xvid_V_Pass_Avrg_Up_Add_16_x86_64: |
xvid_V_Pass_Avrg_Up_Add_16_x86_64: |
1142 |
V_PASS_16 2, 1 |
V_PASS_16 2, 1 |
1143 |
.endfunc |
ENDFUNC |
1144 |
|
|
1145 |
;////////////////////////////////////////////////////////////////////// |
;////////////////////////////////////////////////////////////////////// |
1146 |
;// 8x? avrg Functions |
;// 8x? avrg Functions |
1147 |
|
|
1148 |
xvid_V_Pass_8_Add_x86_64: |
xvid_V_Pass_8_Add_x86_64: |
1149 |
V_PASS_8 0, 1 |
V_PASS_8 0, 1 |
1150 |
.endfunc |
ENDFUNC |
1151 |
xvid_V_Pass_Avrg_8_Add_x86_64: |
xvid_V_Pass_Avrg_8_Add_x86_64: |
1152 |
V_PASS_8 1, 1 |
V_PASS_8 1, 1 |
1153 |
.endfunc |
ENDFUNC |
1154 |
xvid_V_Pass_Avrg_Up_8_Add_x86_64: |
xvid_V_Pass_Avrg_Up_8_Add_x86_64: |
1155 |
V_PASS_8 2, 1 |
V_PASS_8 2, 1 |
1156 |
.endfunc |
ENDFUNC |
1157 |
|
|
1158 |
;////////////////////////////////////////////////////////////////////// |
;////////////////////////////////////////////////////////////////////// |
1159 |
|
|