Parent Directory
|
Revision Log
Revision 1853 - (view) (download)
1 : | Isibaar | 1853 | ;/***************************************************************************** |
2 : | ; * | ||
3 : | ; * XVID MPEG-4 VIDEO CODEC | ||
4 : | ; * - colorspace conversions - | ||
5 : | ; * | ||
6 : | ; * Copyright(C) 2002-2003 Peter Ross <pross@xvid.org> | ||
7 : | ; * 2008 Michael Militzer <michael@xvid.org> | ||
8 : | ; * | ||
9 : | ; * This program is free software ; you can redistribute it and/or modify | ||
10 : | ; * it under the terms of the GNU General Public License as published by | ||
11 : | ; * the Free Software Foundation ; either version 2 of the License, or | ||
12 : | ; * (at your option) any later version. | ||
13 : | ; * | ||
14 : | ; * This program is distributed in the hope that it will be useful, | ||
15 : | ; * but WITHOUT ANY WARRANTY ; without even the implied warranty of | ||
16 : | ; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 : | ; * GNU General Public License for more details. | ||
18 : | ; * | ||
19 : | ; * You should have received a copy of the GNU General Public License | ||
20 : | ; * along with this program ; if not, write to the Free Software | ||
21 : | ; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
22 : | ; * | ||
23 : | ; ****************************************************************************/ | ||
24 : | |||
25 : | edgomez | 851 | ;------------------------------------------------------------------------------ |
26 : | ; | ||
27 : | ; MAKE_COLORSPACE(NAME,STACK, BYTES,PIXELS,ROWS, FUNC, ARG1) | ||
28 : | ; | ||
29 : | ; This macro provides a assembler width/height scroll loop | ||
30 : | ; NAME function name | ||
31 : | ; STACK additional stack bytes required by FUNC | ||
32 : | ; BYTES bytes-per-pixel for the given colorspace | ||
33 : | ; PIXELS pixels (columns) operated on per FUNC call | ||
34 : | ; VPIXELS vpixels (rows) operated on per FUNC call | ||
35 : | ; FUNC conversion macro name; we expect to find FUNC_INIT and FUNC macros | ||
36 : | ; ARG1 argument passed to FUNC | ||
37 : | ; | ||
38 : | ; throughout the FUNC the registers mean: | ||
39 : | ;------------------------------------------------------------------------------ | ||
40 : | Isibaar | 1795 | |
41 : | %define y_stride _EAX | ||
42 : | %define u_ptr _EBX | ||
43 : | %define v_ptr _ECX | ||
44 : | %define x_stride _EDX | ||
45 : | %define x_stride_d edx | ||
46 : | %define y_ptr _ESI | ||
47 : | %define x_ptr _EDI | ||
48 : | %define width _EBP | ||
49 : | |||
50 : | edgomez | 851 | %macro MAKE_COLORSPACE 8 |
51 : | %define NAME %1 | ||
52 : | %define STACK %2 | ||
53 : | %define BYTES %3 | ||
54 : | %define PIXELS %4 | ||
55 : | %define VPIXELS %5 | ||
56 : | %define FUNC %6 | ||
57 : | %define ARG1 %7 | ||
58 : | %define ARG2 %8 | ||
59 : | ; --- define function global/symbol | ||
60 : | Isibaar | 1795 | ALIGN SECTION_ALIGN |
61 : | edgomez | 851 | cglobal NAME |
62 : | edgomez | 1382 | NAME: |
63 : | edgomez | 851 | ; --- init stack --- |
64 : | |||
65 : | Isibaar | 1795 | push _EBX ; esp + localsize + 16 |
66 : | edgomez | 851 | |
67 : | Isibaar | 1795 | %ifdef ARCH_IS_X86_64 |
68 : | edgomez | 851 | |
69 : | Isibaar | 1795 | %define localsize 2*PTR_SIZE + STACK |
70 : | %ifndef WINDOWS | ||
71 : | %define pushsize 2*PTR_SIZE | ||
72 : | %define shadow 0 | ||
73 : | %else | ||
74 : | %define pushsize 4*PTR_SIZE | ||
75 : | Isibaar | 1803 | %define shadow 32 + 2*PTR_SIZE |
76 : | Isibaar | 1795 | %endif |
77 : | edgomez | 851 | |
78 : | Isibaar | 1795 | %define prm_vflip dword [_ESP + localsize + pushsize + shadow + 4*PTR_SIZE] |
79 : | %define prm_height dword [_ESP + localsize + pushsize + shadow + 3*PTR_SIZE] | ||
80 : | %define prm_width dword [_ESP + localsize + pushsize + shadow + 2*PTR_SIZE] | ||
81 : | %define prm_uv_stride dword [_ESP + localsize + pushsize + shadow + 1*PTR_SIZE] | ||
82 : | edgomez | 851 | |
83 : | Isibaar | 1795 | %ifdef WINDOWS |
84 : | Isibaar | 1803 | %define prm_y_stride dword [_ESP + localsize + pushsize + shadow + 0*PTR_SIZE] |
85 : | %define prm_v_ptr [_ESP + localsize + pushsize + shadow - 1*PTR_SIZE] | ||
86 : | edgomez | 851 | |
87 : | Isibaar | 1795 | push _ESI ; esp + localsize + 8 |
88 : | push _EDI ; esp + localsize + 4 | ||
89 : | |||
90 : | %else | ||
91 : | %define prm_y_stride prm6d | ||
92 : | %define prm_v_ptr prm5 | ||
93 : | %endif | ||
94 : | |||
95 : | %define prm_u_ptr prm4 | ||
96 : | %define prm_y_ptr prm3 | ||
97 : | %define prm_x_stride prm2d | ||
98 : | %define prm_x_ptr prm1 | ||
99 : | %define _ip _ESP + localsize + pushsize + 0 | ||
100 : | |||
101 : | %define x_dif TMP0 | ||
102 : | |||
103 : | %else | ||
104 : | |||
105 : | %define localsize 5*PTR_SIZE + STACK | ||
106 : | %define pushsize 4*PTR_SIZE | ||
107 : | |||
108 : | %define prm_vflip [_ESP + localsize + pushsize + 10*PTR_SIZE] | ||
109 : | %define prm_height [_ESP + localsize + pushsize + 9*PTR_SIZE] | ||
110 : | %define prm_width [_ESP + localsize + pushsize + 8*PTR_SIZE] | ||
111 : | %define prm_uv_stride [_ESP + localsize + pushsize + 7*PTR_SIZE] | ||
112 : | %define prm_y_stride [_ESP + localsize + pushsize + 6*PTR_SIZE] | ||
113 : | %define prm_v_ptr [_ESP + localsize + pushsize + 5*PTR_SIZE] | ||
114 : | %define prm_u_ptr [_ESP + localsize + pushsize + 4*PTR_SIZE] | ||
115 : | %define prm_y_ptr [_ESP + localsize + pushsize + 3*PTR_SIZE] | ||
116 : | %define prm_x_stride [_ESP + localsize + pushsize + 2*PTR_SIZE] | ||
117 : | %define prm_x_ptr [_ESP + localsize + pushsize + 1*PTR_SIZE] | ||
118 : | %define _ip _ESP + localsize + pushsize + 0 | ||
119 : | |||
120 : | %define x_dif dword [_ESP + localsize - 5*4] | ||
121 : | |||
122 : | push _ESI ; esp + localsize + 8 | ||
123 : | push _EDI ; esp + localsize + 4 | ||
124 : | |||
125 : | %endif | ||
126 : | |||
127 : | push _EBP ; esp + localsize + 0 | ||
128 : | |||
129 : | %define y_dif dword [_ESP + localsize - 1*4] | ||
130 : | %define uv_dif dword [_ESP + localsize - 2*4] | ||
131 : | %define fixed_width dword [_ESP + localsize - 3*4] | ||
132 : | %define tmp_height dword [_ESP + localsize - 4*4] | ||
133 : | |||
134 : | sub _ESP, localsize | ||
135 : | |||
136 : | edgomez | 1382 | ; --- init varibles --- |
137 : | |||
138 : | Isibaar | 1795 | mov eax, prm_width ; fixed width |
139 : | add eax, 15 ; | ||
140 : | and eax, ~15 ; | ||
141 : | mov fixed_width, eax ; | ||
142 : | edgomez | 851 | |
143 : | Isibaar | 1795 | mov ebx, prm_x_stride ; |
144 : | edgomez | 851 | %rep BYTES |
145 : | Isibaar | 1795 | sub _EBX, _EAX ; |
146 : | edgomez | 851 | %endrep |
147 : | Isibaar | 1795 | mov x_dif, _EBX ; x_dif = x_stride - BYTES*fixed_width |
148 : | edgomez | 851 | |
149 : | Isibaar | 1795 | mov ebx, prm_y_stride ; |
150 : | sub ebx, eax ; | ||
151 : | mov y_dif, ebx ; y_dif = y_stride - fixed_width | ||
152 : | edgomez | 851 | |
153 : | Isibaar | 1795 | mov ebx, prm_uv_stride ; |
154 : | mov TMP1, _EAX ; | ||
155 : | shr TMP1, 1 ; | ||
156 : | sub _EBX, TMP1 ; | ||
157 : | mov uv_dif, ebx ; uv_dif = uv_stride - fixed_width/2 | ||
158 : | edgomez | 851 | |
159 : | Isibaar | 1795 | %ifdef ARCH_IS_X86_64 |
160 : | %ifndef WINDOWS | ||
161 : | mov TMP1d, prm_x_stride | ||
162 : | mov _ESI, prm_y_ptr | ||
163 : | mov _EDX, TMP1 | ||
164 : | %else | ||
165 : | mov _ESI, prm_y_ptr | ||
166 : | mov _EDI, prm_x_ptr | ||
167 : | %endif | ||
168 : | %else | ||
169 : | mov _ESI, prm_y_ptr ; $esi$ = y_ptr | ||
170 : | mov _EDI, prm_x_ptr ; $edi$ = x_ptr | ||
171 : | mov edx, prm_x_stride ; $edx$ = x_stride | ||
172 : | %endif | ||
173 : | edgomez | 851 | |
174 : | Isibaar | 1795 | mov ebp, prm_height ; $ebp$ = height |
175 : | edgomez | 851 | |
176 : | Isibaar | 1795 | mov ebx, prm_vflip |
177 : | or _EBX, _EBX | ||
178 : | edgomez | 1382 | jz .dont_flip |
179 : | edgomez | 851 | |
180 : | edgomez | 1382 | ; --- do flipping --- |
181 : | edgomez | 851 | |
182 : | Isibaar | 1795 | xor _EBX,_EBX |
183 : | edgomez | 851 | %rep BYTES |
184 : | Isibaar | 1795 | sub _EBX, _EAX |
185 : | edgomez | 851 | %endrep |
186 : | Isibaar | 1795 | sub _EBX, _EDX |
187 : | mov x_dif, _EBX ; x_dif = -BYTES*fixed_width - x_stride | ||
188 : | edgomez | 851 | |
189 : | Isibaar | 1834 | lea _EAX, [_EBP-1] |
190 : | |||
191 : | Isibaar | 1795 | %ifdef ARCH_IS_X86_64 |
192 : | mov TMP1, _EDX | ||
193 : | mul edx | ||
194 : | mov _EDX, TMP1 | ||
195 : | %else | ||
196 : | push _EDX | ||
197 : | mul edx | ||
198 : | pop _EDX | ||
199 : | %endif | ||
200 : | add _EDI, _EAX ; $edi$ += (height-1) * x_stride | ||
201 : | edgomez | 851 | |
202 : | Isibaar | 1795 | neg _EDX ; x_stride = -x_stride |
203 : | edgomez | 851 | |
204 : | Isibaar | 1793 | .dont_flip: |
205 : | edgomez | 851 | |
206 : | edgomez | 1382 | ; --- begin loop --- |
207 : | edgomez | 851 | |
208 : | Isibaar | 1795 | mov eax, prm_y_stride ; $eax$ = y_stride |
209 : | mov _EBX, prm_u_ptr ; $ebx$ = u_ptr | ||
210 : | mov _ECX, prm_v_ptr ; $ecx$ = v_ptr | ||
211 : | edgomez | 851 | |
212 : | Isibaar | 1795 | FUNC %+ _INIT ARG1, ARG2 ; call FUNC_INIT |
213 : | edgomez | 851 | |
214 : | Isibaar | 1793 | .y_loop: |
215 : | Isibaar | 1795 | mov tmp_height, ebp |
216 : | mov ebp, fixed_width | ||
217 : | edgomez | 851 | |
218 : | Isibaar | 1793 | .x_loop: |
219 : | Isibaar | 1795 | FUNC ARG1, ARG2 ; call FUNC |
220 : | edgomez | 851 | |
221 : | Isibaar | 1795 | add _EDI, BYTES*PIXELS ; x_ptr += BYTES*PIXELS |
222 : | add _ESI, PIXELS ; y_ptr += PIXELS | ||
223 : | add _EBX, PIXELS/2 ; u_ptr += PIXELS/2 | ||
224 : | add _ECX, PIXELS/2 ; v_ptr += PIXELS/2 | ||
225 : | edgomez | 1382 | |
226 : | Isibaar | 1795 | sub _EBP, PIXELS ; $ebp$ -= PIXELS |
227 : | jg .x_loop ; if ($ebp$ > 0) goto .x_loop | ||
228 : | edgomez | 851 | |
229 : | Isibaar | 1795 | mov ebp, tmp_height |
230 : | add _EDI, x_dif ; x_ptr += x_dif + (VPIXELS-1)*x_stride | ||
231 : | %ifdef ARCH_IS_X86_64 | ||
232 : | mov TMP1d, y_dif | ||
233 : | add _ESI, TMP1 ; y_ptr += y_dif + (VPIXELS-1)*y_stride | ||
234 : | %else | ||
235 : | add _ESI, y_dif ; y_ptr += y_dif + (VPIXELS-1)*y_stride | ||
236 : | %endif | ||
237 : | |||
238 : | edgomez | 851 | %rep VPIXELS-1 |
239 : | Isibaar | 1795 | add _EDI, _EDX |
240 : | add _ESI, _EAX | ||
241 : | edgomez | 851 | %endrep |
242 : | |||
243 : | Isibaar | 1795 | %ifdef ARCH_IS_X86_64 |
244 : | mov TMP1d, uv_dif | ||
245 : | add _EBX, TMP1 ; u_ptr += uv_dif + ((VPIXELS/2)-1)*uv_stride | ||
246 : | add _ECX, TMP1 ; v_ptr += uv_dif + ((VPIXELS/2)-1)*uv_stride | ||
247 : | %else | ||
248 : | add _EBX, uv_dif ; u_ptr += uv_dif + ((VPIXELS/2)-1)*uv_stride | ||
249 : | add _ECX, uv_dif ; v_ptr += uv_dif + ((VPIXELS/2)-1)*uv_stride | ||
250 : | %endif | ||
251 : | |||
252 : | edgomez | 851 | %rep (VPIXELS/2)-1 |
253 : | Isibaar | 1795 | %ifdef ARCH_IS_X86_64 |
254 : | mov TMP1d, prm_uv_stride | ||
255 : | add _EBX, TMP1 | ||
256 : | add _ECX, TMP1 | ||
257 : | %else | ||
258 : | add _EBX, prm_uv_stride | ||
259 : | add _ECX, prm_uv_stride | ||
260 : | %endif | ||
261 : | edgomez | 851 | %endrep |
262 : | |||
263 : | Isibaar | 1795 | sub _EBP, VPIXELS ; $ebp$ -= VPIXELS |
264 : | jg .y_loop ; if ($ebp$ > 0) goto .y_loop | ||
265 : | edgomez | 851 | |
266 : | edgomez | 1382 | ; cleanup stack & undef everything |
267 : | edgomez | 851 | |
268 : | Isibaar | 1795 | add _ESP, localsize |
269 : | edgomez | 851 | |
270 : | Isibaar | 1795 | pop _EBP |
271 : | %ifndef ARCH_IS_X86_64 | ||
272 : | pop _EDI | ||
273 : | pop _ESI | ||
274 : | %else | ||
275 : | %ifdef WINDOWS | ||
276 : | pop _EDI | ||
277 : | pop _ESI | ||
278 : | %endif | ||
279 : | %endif | ||
280 : | pop _EBX | ||
281 : | |||
282 : | %undef prm_vflip | ||
283 : | %undef prm_height | ||
284 : | %undef prm_width | ||
285 : | %undef prm_uv_stride | ||
286 : | %undef prm_y_stride | ||
287 : | %undef prm_v_ptr | ||
288 : | %undef prm_u_ptr | ||
289 : | %undef prm_y_ptr | ||
290 : | %undef prm_x_stride | ||
291 : | %undef prm_x_ptr | ||
292 : | edgomez | 851 | %undef _ip |
293 : | %undef x_dif | ||
294 : | %undef y_dif | ||
295 : | %undef uv_dif | ||
296 : | %undef fixed_width | ||
297 : | %undef tmp_height | ||
298 : | edgomez | 1382 | ret |
299 : | Isibaar | 1793 | ENDFUNC |
300 : | edgomez | 851 | %undef NAME |
301 : | %undef STACK | ||
302 : | %undef BYTES | ||
303 : | %undef PIXELS | ||
304 : | %undef VPIXELS | ||
305 : | %undef FUNC | ||
306 : | %undef ARG1 | ||
307 : | %endmacro | ||
308 : | ;------------------------------------------------------------------------------ |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |