Parent Directory | Revision Log
Revision 1877 - (view) (download)
1 : | edgomez | 1382 | ;/**************************************************************************** |
2 : | edgomez | 851 | ; * |
3 : | edgomez | 1382 | ; * XVID MPEG-4 VIDEO CODEC |
4 : | ; * - K7 optimized SAD operators - | ||
5 : | edgomez | 851 | ; * |
6 : | edgomez | 1382 | ; * Copyright(C) 2002 Jaan Kalda |
7 : | edgomez | 851 | ; * |
8 : | edgomez | 1382 | ; * This program is free software; you can redistribute it and/or modify it |
9 : | ; * under the terms of the GNU General Public License as published by | ||
10 : | ; * the Free Software Foundation; either version 2 of the License, or | ||
11 : | ; * (at your option) any later version. | ||
12 : | edgomez | 851 | ; * |
13 : | edgomez | 1382 | ; * This program is distributed in the hope that it will be useful, |
14 : | ; * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
15 : | ; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 : | ; * GNU General Public License for more details. | ||
17 : | edgomez | 851 | ; * |
18 : | edgomez | 1382 | ; * You should have received a copy of the GNU General Public License |
19 : | ; * along with this program; if not, write to the Free Software | ||
20 : | ; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 : | edgomez | 851 | ; * |
22 : | Isibaar | 1877 | ; * $Id: sad_3dne.asm,v 1.10.2.2 2009-09-16 17:11:39 Isibaar Exp $ |
23 : | edgomez | 1382 | ; * |
24 : | ; ***************************************************************************/ | ||
25 : | edgomez | 851 | |
26 : | edgomez | 1382 | ; these 3dne functions are compatible with iSSE, but are optimized specifically |
27 : | ; for K7 pipelines | ||
28 : | edgomez | 851 | |
29 : | Isibaar | 1795 | %include "nasm.inc" |
30 : | edgomez | 1382 | |
31 : | ;============================================================================= | ||
32 : | ; Read only data | ||
33 : | ;============================================================================= | ||
34 : | |||
35 : | Isibaar | 1795 | DATA |
36 : | edgomez | 851 | |
37 : | Isibaar | 1795 | ALIGN SECTION_ALIGN |
38 : | edgomez | 1382 | mmx_one: |
39 : | times 4 dw 1 | ||
40 : | edgomez | 851 | |
41 : | edgomez | 1382 | ;============================================================================= |
42 : | ; Helper macros | ||
43 : | ;============================================================================= | ||
44 : | edgomez | 851 | |
45 : | edgomez | 1382 | ;; %1 block number (0..4) |
46 : | %macro SAD_16x16_SSE 1 | ||
47 : | Isibaar | 1795 | movq mm7, [_EAX] |
48 : | movq mm6, [_EAX+8] | ||
49 : | psadbw mm7, [TMP1] | ||
50 : | psadbw mm6, [TMP1+8] | ||
51 : | edgomez | 1382 | %if (%1) |
52 : | paddd mm1, mm5 | ||
53 : | %endif | ||
54 : | Isibaar | 1795 | movq mm5, [_EAX+TMP0] |
55 : | movq mm4, [_EAX+TMP0+8] | ||
56 : | psadbw mm5, [TMP1+TMP0] | ||
57 : | psadbw mm4, [TMP1+TMP0+8] | ||
58 : | movq mm3, [_EAX+2*TMP0] | ||
59 : | movq mm2, [_EAX+2*TMP0+8] | ||
60 : | psadbw mm3, [TMP1+2*TMP0] | ||
61 : | psadbw mm2, [TMP1+2*TMP0+8] | ||
62 : | edgomez | 1382 | %if (%1) |
63 : | Isibaar | 1795 | movd [_ESP+4*(%1-1)], mm1 |
64 : | edgomez | 1382 | %else |
65 : | Isibaar | 1795 | sub _ESP, byte 12 |
66 : | edgomez | 1382 | %endif |
67 : | Isibaar | 1795 | movq mm1, [_EAX+_EBX] |
68 : | movq mm0, [_EAX+_EBX+8] | ||
69 : | psadbw mm1, [TMP1+_EBX] | ||
70 : | psadbw mm0, [TMP1+_EBX+8] | ||
71 : | lea _EAX, [_EAX+4*TMP0] | ||
72 : | lea TMP1, [TMP1+4*TMP0] | ||
73 : | edgomez | 1382 | paddd mm7, mm6 |
74 : | paddd mm5, mm4 | ||
75 : | paddd mm3, mm2 | ||
76 : | paddd mm1, mm0 | ||
77 : | paddd mm5, mm7 | ||
78 : | paddd mm1, mm3 | ||
79 : | %endmacro | ||
80 : | edgomez | 851 | |
81 : | edgomez | 1382 | %macro SADBI_16x16_SSE0 0 |
82 : | Isibaar | 1795 | movq mm2, [TMP1] |
83 : | movq mm3, [TMP1+8] | ||
84 : | edgomez | 1382 | |
85 : | Isibaar | 1795 | movq mm5, [byte _EAX] |
86 : | movq mm6, [_EAX+8] | ||
87 : | pavgb mm2, [byte _EBX] | ||
88 : | pavgb mm3, [_EBX+8] | ||
89 : | edgomez | 1382 | |
90 : | Isibaar | 1795 | add TMP1, TMP0 |
91 : | edgomez | 1382 | psadbw mm5, mm2 |
92 : | psadbw mm6, mm3 | ||
93 : | |||
94 : | Isibaar | 1795 | add _EAX, TMP0 |
95 : | add _EBX, TMP0 | ||
96 : | movq mm2, [byte TMP1] | ||
97 : | edgomez | 1382 | |
98 : | Isibaar | 1795 | movq mm3, [TMP1+8] |
99 : | movq mm0, [byte _EAX] | ||
100 : | edgomez | 1382 | |
101 : | Isibaar | 1795 | movq mm1, [_EAX+8] |
102 : | pavgb mm2, [byte _EBX] | ||
103 : | edgomez | 1382 | |
104 : | Isibaar | 1795 | pavgb mm3, [_EBX+8] |
105 : | add TMP1, TMP0 | ||
106 : | add _EAX, TMP0 | ||
107 : | edgomez | 1382 | |
108 : | Isibaar | 1795 | add _EBX, TMP0 |
109 : | edgomez | 1382 | psadbw mm0, mm2 |
110 : | psadbw mm1, mm3 | ||
111 : | |||
112 : | %endmacro | ||
113 : | |||
114 : | %macro SADBI_16x16_SSE 0 | ||
115 : | Isibaar | 1795 | movq mm2, [byte TMP1] |
116 : | movq mm3, [TMP1+8] | ||
117 : | edgomez | 1382 | paddusw mm5, mm0 |
118 : | paddusw mm6, mm1 | ||
119 : | Isibaar | 1795 | movq mm0, [_EAX] |
120 : | movq mm1, [_EAX+8] | ||
121 : | pavgb mm2, [_EBX] | ||
122 : | pavgb mm3, [_EBX+8] | ||
123 : | add TMP1, TMP0 | ||
124 : | add _EAX, TMP0 | ||
125 : | add _EBX, TMP0 | ||
126 : | edgomez | 1382 | psadbw mm0, mm2 |
127 : | psadbw mm1, mm3 | ||
128 : | %endmacro | ||
129 : | |||
130 : | %macro SADBI_8x8_3dne 0 | ||
131 : | Isibaar | 1795 | movq mm2, [TMP1] |
132 : | movq mm3, [TMP1+TMP0] | ||
133 : | pavgb mm2, [_EAX] | ||
134 : | pavgb mm3, [_EAX+TMP0] | ||
135 : | lea TMP1, [TMP1+2*TMP0] | ||
136 : | lea _EAX, [_EAX+2*TMP0] | ||
137 : | edgomez | 1382 | paddusw mm5, mm0 |
138 : | paddusw mm6, mm1 | ||
139 : | Isibaar | 1795 | movq mm0, [_EBX] |
140 : | movq mm1, [_EBX+TMP0] | ||
141 : | lea _EBX, [_EBX+2*TMP0] | ||
142 : | edgomez | 1382 | psadbw mm0, mm2 |
143 : | psadbw mm1, mm3 | ||
144 : | %endmacro | ||
145 : | |||
146 : | %macro ABS_16x16_SSE 1 | ||
147 : | %if (%1 == 0) | ||
148 : | Isibaar | 1795 | movq mm7, [_EAX] |
149 : | edgomez | 1382 | psadbw mm7, mm4 |
150 : | mov esi, esi | ||
151 : | Isibaar | 1795 | movq mm6, [_EAX+8] |
152 : | movq mm5, [_EAX+TMP0] | ||
153 : | movq mm3, [_EAX+TMP0+8] | ||
154 : | edgomez | 1382 | psadbw mm6, mm4 |
155 : | |||
156 : | Isibaar | 1795 | movq mm2, [byte _EAX+2*TMP0] |
157 : | edgomez | 1382 | psadbw mm5, mm4 |
158 : | Isibaar | 1795 | movq mm1, [_EAX+2*TMP0+8] |
159 : | edgomez | 1382 | psadbw mm3, mm4 |
160 : | |||
161 : | Isibaar | 1795 | movq mm0, [_EAX+TMP1+0] |
162 : | edgomez | 1382 | psadbw mm2, mm4 |
163 : | Isibaar | 1795 | add _EAX, TMP1 |
164 : | edgomez | 1382 | psadbw mm1, mm4 |
165 : | %endif | ||
166 : | %if (%1 == 1) | ||
167 : | psadbw mm0, mm4 | ||
168 : | paddd mm7, mm0 | ||
169 : | Isibaar | 1795 | movq mm0, [_EAX+8] |
170 : | edgomez | 1382 | psadbw mm0, mm4 |
171 : | paddd mm6, mm0 | ||
172 : | |||
173 : | Isibaar | 1795 | movq mm0, [byte _EAX+TMP0] |
174 : | edgomez | 1382 | psadbw mm0, mm4 |
175 : | |||
176 : | paddd mm5, mm0 | ||
177 : | Isibaar | 1795 | movq mm0, [_EAX+TMP0+8] |
178 : | edgomez | 1382 | |
179 : | psadbw mm0, mm4 | ||
180 : | paddd mm3, mm0 | ||
181 : | Isibaar | 1795 | movq mm0, [_EAX+2*TMP0] |
182 : | edgomez | 1382 | psadbw mm0, mm4 |
183 : | paddd mm2, mm0 | ||
184 : | |||
185 : | Isibaar | 1795 | movq mm0, [_EAX+2*TMP0+8] |
186 : | add _EAX, TMP1 | ||
187 : | edgomez | 1382 | psadbw mm0, mm4 |
188 : | paddd mm1, mm0 | ||
189 : | Isibaar | 1795 | movq mm0, [_EAX] |
190 : | edgomez | 1382 | %endif |
191 : | %if (%1 == 2) | ||
192 : | psadbw mm0, mm4 | ||
193 : | paddd mm7, mm0 | ||
194 : | Isibaar | 1795 | movq mm0, [_EAX+8] |
195 : | edgomez | 1382 | psadbw mm0, mm4 |
196 : | paddd mm6, mm0 | ||
197 : | %endif | ||
198 : | %endmacro | ||
199 : | |||
200 : | ;============================================================================= | ||
201 : | ; Code | ||
202 : | ;============================================================================= | ||
203 : | |||
204 : | Isibaar | 1865 | TEXT |
205 : | edgomez | 1382 | |
206 : | cglobal sad16_3dne | ||
207 : | cglobal sad8_3dne | ||
208 : | cglobal sad16bi_3dne | ||
209 : | cglobal sad8bi_3dne | ||
210 : | cglobal dev16_3dne | ||
211 : | |||
212 : | ;----------------------------------------------------------------------------- | ||
213 : | edgomez | 851 | ; |
214 : | ; uint32_t sad16_3dne(const uint8_t * const cur, | ||
215 : | edgomez | 1382 | ; const uint8_t * const ref, |
216 : | ; const uint32_t stride, | ||
217 : | ; const uint32_t best_sad); | ||
218 : | edgomez | 851 | ; |
219 : | edgomez | 1382 | ;----------------------------------------------------------------------------- |
220 : | |||
221 : | edgomez | 851 | ; optimization: 21% faster |
222 : | edgomez | 1382 | |
223 : | Isibaar | 1795 | ALIGN SECTION_ALIGN |
224 : | edgomez | 851 | sad16_3dne: |
225 : | Isibaar | 1795 | mov _EAX, prm1 ; Src1 |
226 : | mov TMP1, prm2 ; Src2 | ||
227 : | mov TMP0, prm3 ; Stride | ||
228 : | edgomez | 851 | |
229 : | Isibaar | 1795 | push _EBX |
230 : | lea _EBX, [2*TMP0+TMP0] | ||
231 : | |||
232 : | edgomez | 1382 | SAD_16x16_SSE 0 |
233 : | SAD_16x16_SSE 1 | ||
234 : | SAD_16x16_SSE 2 | ||
235 : | SAD_16x16_SSE 3 | ||
236 : | edgomez | 851 | |
237 : | edgomez | 1382 | paddd mm1, mm5 |
238 : | movd eax, mm1 | ||
239 : | Isibaar | 1795 | add eax, dword [_ESP] |
240 : | add eax, dword [_ESP+4] | ||
241 : | add eax, dword [_ESP+8] | ||
242 : | mov _EBX, [_ESP+12] | ||
243 : | add _ESP, byte PTR_SIZE+12 | ||
244 : | edgomez | 851 | |
245 : | edgomez | 1382 | ret |
246 : | Isibaar | 1793 | ENDFUNC |
247 : | edgomez | 1382 | |
248 : | |||
249 : | ;----------------------------------------------------------------------------- | ||
250 : | edgomez | 851 | ; |
251 : | ; uint32_t sad8_3dne(const uint8_t * const cur, | ||
252 : | ; const uint8_t * const ref, | ||
253 : | ; const uint32_t stride); | ||
254 : | ; | ||
255 : | edgomez | 1382 | ;----------------------------------------------------------------------------- |
256 : | |||
257 : | Isibaar | 1795 | ALIGN SECTION_ALIGN |
258 : | edgomez | 851 | sad8_3dne: |
259 : | |||
260 : | Isibaar | 1795 | mov _EAX, prm1 ; Src1 |
261 : | mov TMP0, prm3 ; Stride | ||
262 : | mov TMP1, prm2 ; Src2 | ||
263 : | push _EBX | ||
264 : | lea _EBX, [TMP0+2*TMP0] | ||
265 : | edgomez | 851 | |
266 : | Isibaar | 1795 | movq mm0, [byte _EAX] ;0 |
267 : | psadbw mm0, [byte TMP1] | ||
268 : | movq mm1, [_EAX+TMP0] ;1 | ||
269 : | psadbw mm1, [TMP1+TMP0] | ||
270 : | edgomez | 851 | |
271 : | Isibaar | 1795 | movq mm2, [_EAX+2*TMP0] ;2 |
272 : | psadbw mm2, [TMP1+2*TMP0] | ||
273 : | movq mm3, [_EAX+_EBX] ;3 | ||
274 : | psadbw mm3, [TMP1+_EBX] | ||
275 : | edgomez | 851 | |
276 : | edgomez | 1382 | paddd mm0, mm1 |
277 : | edgomez | 851 | |
278 : | Isibaar | 1795 | movq mm4, [byte _EAX+4*TMP0];4 |
279 : | psadbw mm4, [TMP1+4*TMP0] | ||
280 : | movq mm5, [_EAX+2*_EBX] ;6 | ||
281 : | psadbw mm5, [TMP1+2*_EBX] | ||
282 : | edgomez | 851 | |
283 : | edgomez | 1382 | paddd mm2, mm3 |
284 : | paddd mm0, mm2 | ||
285 : | edgomez | 851 | |
286 : | Isibaar | 1795 | lea _EBX, [_EBX+4*TMP0] ;3+4=7 |
287 : | lea TMP0, [TMP0+4*TMP0] ; 5 | ||
288 : | movq mm6, [_EAX+TMP0] ;5 | ||
289 : | psadbw mm6, [TMP1+TMP0] | ||
290 : | movq mm7, [_EAX+_EBX] ;7 | ||
291 : | psadbw mm7, [TMP1+_EBX] | ||
292 : | edgomez | 1382 | paddd mm4, mm5 |
293 : | paddd mm6, mm7 | ||
294 : | paddd mm0, mm4 | ||
295 : | Isibaar | 1795 | mov _EBX, [_ESP] |
296 : | add _ESP, byte PTR_SIZE | ||
297 : | edgomez | 1382 | paddd mm0, mm6 |
298 : | movd eax, mm0 | ||
299 : | edgomez | 851 | |
300 : | edgomez | 1382 | ret |
301 : | Isibaar | 1793 | ENDFUNC |
302 : | edgomez | 851 | |
303 : | edgomez | 1382 | |
304 : | ;----------------------------------------------------------------------------- | ||
305 : | edgomez | 851 | ; |
306 : | ; uint32_t sad16bi_3dne(const uint8_t * const cur, | ||
307 : | ; const uint8_t * const ref1, | ||
308 : | ; const uint8_t * const ref2, | ||
309 : | ; const uint32_t stride); | ||
310 : | ; | ||
311 : | edgomez | 1382 | ;----------------------------------------------------------------------------- |
312 : | edgomez | 851 | ;optimization: 14% faster |
313 : | |||
314 : | Isibaar | 1795 | ALIGN SECTION_ALIGN |
315 : | edgomez | 1382 | sad16bi_3dne: |
316 : | Isibaar | 1795 | mov _EAX, prm1 ; Src |
317 : | mov TMP1, prm2 ; Ref1 | ||
318 : | mov TMP0, prm4 ; Stride | ||
319 : | edgomez | 851 | |
320 : | Isibaar | 1795 | push _EBX |
321 : | %ifdef ARCH_IS_X86_64 | ||
322 : | mov _EBX, prm3 | ||
323 : | %else | ||
324 : | mov _EBX, [_ESP+4+12] ; Ref2 | ||
325 : | %endif | ||
326 : | |||
327 : | edgomez | 1382 | SADBI_16x16_SSE0 |
328 : | SADBI_16x16_SSE | ||
329 : | SADBI_16x16_SSE | ||
330 : | SADBI_16x16_SSE | ||
331 : | SADBI_16x16_SSE | ||
332 : | SADBI_16x16_SSE | ||
333 : | SADBI_16x16_SSE | ||
334 : | edgomez | 851 | |
335 : | edgomez | 1382 | SADBI_16x16_SSE |
336 : | SADBI_16x16_SSE | ||
337 : | SADBI_16x16_SSE | ||
338 : | SADBI_16x16_SSE | ||
339 : | SADBI_16x16_SSE | ||
340 : | SADBI_16x16_SSE | ||
341 : | SADBI_16x16_SSE | ||
342 : | SADBI_16x16_SSE | ||
343 : | paddusw mm5,mm0 | ||
344 : | paddusw mm6,mm1 | ||
345 : | edgomez | 851 | |
346 : | Isibaar | 1795 | pop _EBX |
347 : | edgomez | 1382 | paddusw mm6,mm5 |
348 : | movd eax, mm6 | ||
349 : | edgomez | 851 | |
350 : | edgomez | 1382 | ret |
351 : | Isibaar | 1793 | ENDFUNC |
352 : | edgomez | 851 | |
353 : | edgomez | 1382 | ;----------------------------------------------------------------------------- |
354 : | ; | ||
355 : | ; uint32_t sad8bi_3dne(const uint8_t * const cur, | ||
356 : | ; const uint8_t * const ref1, | ||
357 : | ; const uint8_t * const ref2, | ||
358 : | ; const uint32_t stride); | ||
359 : | ; | ||
360 : | ;----------------------------------------------------------------------------- | ||
361 : | edgomez | 851 | |
362 : | Isibaar | 1795 | ALIGN SECTION_ALIGN |
363 : | edgomez | 1382 | sad8bi_3dne: |
364 : | Isibaar | 1795 | mov _EAX, prm3 ; Ref2 |
365 : | mov TMP1, prm2 ; Ref1 | ||
366 : | mov TMP0, prm4 ; Stride | ||
367 : | edgomez | 851 | |
368 : | Isibaar | 1795 | push _EBX |
369 : | %ifdef ARCH_IS_X86_64 | ||
370 : | mov _EBX, prm1 | ||
371 : | %else | ||
372 : | mov _EBX, [_ESP+4+ 4] ; Src | ||
373 : | %endif | ||
374 : | |||
375 : | movq mm2, [TMP1] | ||
376 : | movq mm3, [TMP1+TMP0] | ||
377 : | pavgb mm2, [_EAX] | ||
378 : | pavgb mm3, [_EAX+TMP0] | ||
379 : | lea TMP1, [TMP1+2*TMP0] | ||
380 : | lea _EAX, [_EAX+2*TMP0] | ||
381 : | movq mm5, [_EBX] | ||
382 : | movq mm6, [_EBX+TMP0] | ||
383 : | lea _EBX, [_EBX+2*TMP0] | ||
384 : | edgomez | 1382 | psadbw mm5, mm2 |
385 : | psadbw mm6, mm3 | ||
386 : | edgomez | 851 | |
387 : | Isibaar | 1795 | movq mm2, [TMP1] |
388 : | movq mm3, [TMP1+TMP0] | ||
389 : | pavgb mm2, [_EAX] | ||
390 : | pavgb mm3, [_EAX+TMP0] | ||
391 : | lea TMP1, [TMP1+2*TMP0] | ||
392 : | lea _EAX, [_EAX+2*TMP0] | ||
393 : | movq mm0, [_EBX] | ||
394 : | movq mm1, [_EBX+TMP0] | ||
395 : | lea _EBX, [_EBX+2*TMP0] | ||
396 : | edgomez | 1382 | psadbw mm0, mm2 |
397 : | psadbw mm1, mm3 | ||
398 : | edgomez | 851 | |
399 : | Isibaar | 1795 | movq mm2, [TMP1] |
400 : | movq mm3, [TMP1+TMP0] | ||
401 : | pavgb mm2, [_EAX] | ||
402 : | pavgb mm3, [_EAX+TMP0] | ||
403 : | lea TMP1, [TMP1+2*TMP0] | ||
404 : | lea _EAX, [_EAX+2*TMP0] | ||
405 : | edgomez | 1382 | paddusw mm5,mm0 |
406 : | paddusw mm6,mm1 | ||
407 : | Isibaar | 1795 | movq mm0, [_EBX] |
408 : | movq mm1, [_EBX+TMP0] | ||
409 : | lea _EBX, [_EBX+2*TMP0] | ||
410 : | edgomez | 1382 | psadbw mm0, mm2 |
411 : | psadbw mm1, mm3 | ||
412 : | edgomez | 851 | |
413 : | Isibaar | 1795 | movq mm2, [TMP1] |
414 : | movq mm3, [TMP1+TMP0] | ||
415 : | pavgb mm2, [_EAX] | ||
416 : | pavgb mm3, [_EAX+TMP0] | ||
417 : | edgomez | 1382 | paddusw mm5,mm0 |
418 : | paddusw mm6,mm1 | ||
419 : | Isibaar | 1795 | movq mm0, [_EBX] |
420 : | movq mm1, [_EBX+TMP0] | ||
421 : | edgomez | 1382 | psadbw mm0, mm2 |
422 : | psadbw mm1, mm3 | ||
423 : | paddusw mm5,mm0 | ||
424 : | paddusw mm6,mm1 | ||
425 : | edgomez | 851 | |
426 : | edgomez | 1382 | paddusw mm6,mm5 |
427 : | Isibaar | 1795 | mov _EBX,[_ESP] |
428 : | add _ESP,byte PTR_SIZE | ||
429 : | edgomez | 1382 | movd eax, mm6 |
430 : | edgomez | 851 | |
431 : | edgomez | 1382 | ret |
432 : | Isibaar | 1793 | ENDFUNC |
433 : | edgomez | 851 | |
434 : | |||
435 : | ;=========================================================================== | ||
436 : | ; | ||
437 : | ; uint32_t dev16_3dne(const uint8_t * const cur, | ||
438 : | ; const uint32_t stride); | ||
439 : | ; | ||
440 : | ;=========================================================================== | ||
441 : | ; optimization: 25 % faster | ||
442 : | |||
443 : | Isibaar | 1795 | ALIGN SECTION_ALIGN |
444 : | edgomez | 1382 | dev16_3dne: |
445 : | edgomez | 851 | |
446 : | Isibaar | 1795 | mov _EAX, prm1 ; Src |
447 : | mov TMP0, prm2 ; Stride | ||
448 : | lea TMP1, [TMP0+2*TMP0] | ||
449 : | edgomez | 851 | |
450 : | edgomez | 1382 | pxor mm4, mm4 |
451 : | edgomez | 851 | |
452 : | Isibaar | 1795 | ALIGN SECTION_ALIGN |
453 : | edgomez | 1382 | ABS_16x16_SSE 0 |
454 : | ABS_16x16_SSE 1 | ||
455 : | ABS_16x16_SSE 1 | ||
456 : | ABS_16x16_SSE 1 | ||
457 : | ABS_16x16_SSE 1 | ||
458 : | edgomez | 851 | |
459 : | edgomez | 1382 | paddd mm1, mm2 |
460 : | paddd mm3, mm5 | ||
461 : | edgomez | 851 | |
462 : | edgomez | 1382 | ABS_16x16_SSE 2 |
463 : | edgomez | 851 | |
464 : | edgomez | 1382 | paddd mm7, mm6 |
465 : | paddd mm1, mm3 | ||
466 : | Isibaar | 1795 | mov _EAX, prm1 ; Src |
467 : | edgomez | 1382 | paddd mm7, mm1 |
468 : | punpcklbw mm7, mm7 ;xxyyaazz | ||
469 : | pshufw mm4, mm7, 055h ; mm4 contains the mean | ||
470 : | |||
471 : | |||
472 : | pxor mm1, mm1 | ||
473 : | |||
474 : | ABS_16x16_SSE 0 | ||
475 : | ABS_16x16_SSE 1 | ||
476 : | ABS_16x16_SSE 1 | ||
477 : | ABS_16x16_SSE 1 | ||
478 : | ABS_16x16_SSE 1 | ||
479 : | |||
480 : | paddd mm1, mm2 | ||
481 : | paddd mm3, mm5 | ||
482 : | |||
483 : | ABS_16x16_SSE 2 | ||
484 : | |||
485 : | paddd mm7, mm6 | ||
486 : | paddd mm1, mm3 | ||
487 : | paddd mm7, mm1 | ||
488 : | movd eax, mm7 | ||
489 : | |||
490 : | ret | ||
491 : | Isibaar | 1793 | ENDFUNC |
492 : | edgomez | 1540 | |
493 : | Isibaar | 1877 | NON_EXEC_STACK |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |