1 |
.file "sad_altivec.c" |
.file "sad_altivec.c" |
2 |
gcc2_compiled.: |
gcc2_compiled.: |
3 |
|
.section ".rodata" |
4 |
|
.align 4 |
5 |
|
.type perms,@object |
6 |
|
.size perms,32 |
7 |
|
perms: |
8 |
|
.long 66051 |
9 |
|
.long 67438087 |
10 |
|
.long 269554195 |
11 |
|
.long 336926231 |
12 |
|
.long 134810123 |
13 |
|
.long 202182159 |
14 |
|
.long 404298267 |
15 |
|
.long 471670303 |
16 |
.section ".text" |
.section ".text" |
17 |
.align 2 |
.align 2 |
18 |
.globl sad16_altivec |
.globl sad16_altivec |
21 |
stwu %r1,-48(%r1) |
stwu %r1,-48(%r1) |
22 |
addi %r9,%r4,16 |
addi %r9,%r4,16 |
23 |
lvx %v13,0,%r4 |
lvx %v13,0,%r4 |
|
vspltisw %v15,0 |
|
24 |
lvx %v0,0,%r9 |
lvx %v0,0,%r9 |
25 |
rlwinm %r5,%r5,0,0,27 |
rlwinm %r5,%r5,0,0,27 |
26 |
lvx %v1,0,%r3 |
lvx %v1,0,%r3 |
48 |
vminub %v18,%v19,%v0 |
vminub %v18,%v19,%v0 |
49 |
lvx %v12,0,%r9 |
lvx %v12,0,%r9 |
50 |
add %r3,%r3,%r5 |
add %r3,%r3,%r5 |
51 |
vsum4ubs %v17,%v2,%v15 |
vsum4ubs %v17,%v2,%v31 |
52 |
lvx %v1,0,%r3 |
lvx %v1,0,%r3 |
53 |
addi %r9,%r1,16 |
addi %r9,%r1,16 |
54 |
vmaxub %v2,%v19,%v0 |
vmaxub %v2,%v19,%v0 |
70 |
vmaxub %v2,%v19,%v1 |
vmaxub %v2,%v19,%v1 |
71 |
vsububm %v2,%v2,%v18 |
vsububm %v2,%v2,%v18 |
72 |
vsum4ubs %v17,%v2,%v17 |
vsum4ubs %v17,%v2,%v17 |
73 |
vsumsws %v0,%v17,%v15 |
vsumsws %v0,%v17,%v31 |
74 |
vcmpgtsw. %v1,%v7,%v0 |
vcmpgtsw. %v1,%v7,%v0 |
75 |
bc 12,26,.L19 |
bc 12,26,.L23 |
76 |
addi %r9,%r4,16 |
addi %r9,%r4,16 |
77 |
lvx %v1,0,%r4 |
lvx %v1,0,%r4 |
78 |
lvx %v0,0,%r9 |
lvx %v0,0,%r9 |
117 |
vmaxub %v2,%v19,%v0 |
vmaxub %v2,%v19,%v0 |
118 |
vsububm %v2,%v2,%v18 |
vsububm %v2,%v2,%v18 |
119 |
vsum4ubs %v17,%v2,%v17 |
vsum4ubs %v17,%v2,%v17 |
120 |
vsumsws %v0,%v17,%v15 |
vsumsws %v0,%v17,%v31 |
121 |
vcmpgtsw. %v7,%v7,%v0 |
vcmpgtsw. %v7,%v7,%v0 |
122 |
bc 12,26,.L19 |
bc 12,26,.L23 |
123 |
addi %r9,%r4,16 |
addi %r9,%r4,16 |
124 |
lvx %v1,0,%r4 |
lvx %v1,0,%r4 |
125 |
lvx %v0,0,%r9 |
lvx %v0,0,%r9 |
205 |
vmaxub %v2,%v19,%v0 |
vmaxub %v2,%v19,%v0 |
206 |
vsububm %v2,%v2,%v18 |
vsububm %v2,%v2,%v18 |
207 |
vsum4ubs %v17,%v2,%v17 |
vsum4ubs %v17,%v2,%v17 |
208 |
vsumsws %v0,%v17,%v15 |
vsumsws %v0,%v17,%v31 |
209 |
.L19: |
.L23: |
210 |
vspltw %v0,%v0,3 |
vspltw %v0,%v0,3 |
211 |
addi %r0,%r1,32 |
addi %r0,%r1,32 |
212 |
stvewx %v0,0,%r0 |
stvewx %v0,0,%r0 |
215 |
blr |
blr |
216 |
.Lfe1: |
.Lfe1: |
217 |
.size sad16_altivec,.Lfe1-sad16_altivec |
.size sad16_altivec,.Lfe1-sad16_altivec |
|
.section ".rodata" |
|
|
.align 4 |
|
|
.type perms,@object |
|
|
.size perms,32 |
|
|
perms: |
|
|
.long 66051 |
|
|
.long 67438087 |
|
|
.long 269554195 |
|
|
.long 336926231 |
|
|
.long 134810123 |
|
|
.long 202182159 |
|
|
.long 404298267 |
|
|
.long 471670303 |
|
|
.section ".text" |
|
218 |
.align 2 |
.align 2 |
219 |
.globl sad8_altivec |
.globl sad8_altivec |
220 |
.type sad8_altivec,@function |
.type sad8_altivec,@function |
221 |
sad8_altivec: |
sad8_altivec: |
222 |
stwu %r1,-16(%r1) |
stwu %r1,-16(%r1) |
223 |
|
andi. %r0,%r3,8 |
224 |
|
vsldoi %v16,%v30,%v30,0 |
225 |
|
bc 4,2,.L26 |
226 |
|
vsldoi %v16,%v29,%v29,0 |
227 |
|
.L26: |
228 |
srwi %r5,%r5,4 |
srwi %r5,%r5,4 |
229 |
lvx %v6,0,%r4 |
lvx %v8,0,%r4 |
230 |
vspltisw %v2,0 |
vsldoi %v6,%v29,%v29,0 |
231 |
slwi %r8,%r5,4 |
slwi %r10,%r5,4 |
232 |
lvx %v10,0,%r3 |
lvx %v11,0,%r3 |
233 |
add %r9,%r8,%r4 |
add %r9,%r10,%r4 |
234 |
lvx %v12,%r8,%r4 |
lvx %v10,%r10,%r4 |
235 |
|
addi %r11,%r4,16 |
236 |
|
lvx %v0,%r10,%r3 |
237 |
addi %r9,%r9,16 |
addi %r9,%r9,16 |
238 |
lvx %v11,%r8,%r3 |
lvx %v12,0,%r11 |
239 |
addi %r10,%r4,16 |
lvx %v13,0,%r9 |
|
lvx %v0,0,%r9 |
|
|
lvx %v13,0,%r10 |
|
|
lis %r11,perms@ha |
|
|
la %r11,perms@l(%r11) |
|
|
rlwinm %r9,%r3,1,27,27 |
|
|
lvx %v7,0,%r11 |
|
240 |
lvsl %v1,0,%r4 |
lvsl %v1,0,%r4 |
241 |
lvx %v8,%r11,%r9 |
slwi %r0,%r5,5 |
242 |
slwi %r5,%r5,5 |
add %r4,%r4,%r0 |
243 |
add %r4,%r4,%r5 |
vperm %v18,%v11,%v0,%v16 |
244 |
vperm %v16,%v12,%v0,%v1 |
add %r9,%r10,%r4 |
245 |
vperm %v19,%v6,%v13,%v1 |
vperm %v3,%v8,%v12,%v1 |
|
add %r9,%r8,%r4 |
|
246 |
lvx %v9,0,%r4 |
lvx %v9,0,%r4 |
247 |
|
vperm %v17,%v10,%v13,%v1 |
248 |
addi %r11,%r4,16 |
addi %r11,%r4,16 |
249 |
vperm %v18,%v10,%v11,%v8 |
lvx %v12,%r10,%r4 |
|
lvx %v12,%r8,%r4 |
|
250 |
addi %r9,%r9,16 |
addi %r9,%r9,16 |
|
vperm %v19,%v19,%v16,%v7 |
|
251 |
lvx %v13,0,%r11 |
lvx %v13,0,%r11 |
252 |
lvx %v0,0,%r9 |
lvx %v0,0,%r9 |
253 |
add %r3,%r3,%r5 |
vperm %v3,%v3,%v17,%v6 |
254 |
vminub %v17,%v18,%v19 |
add %r3,%r3,%r0 |
255 |
|
add %r4,%r4,%r0 |
256 |
lvx %v10,0,%r3 |
lvx %v10,0,%r3 |
257 |
add %r4,%r4,%r5 |
lvx %v11,%r10,%r3 |
258 |
lvx %v11,%r8,%r3 |
vminub %v19,%v18,%v3 |
259 |
vmaxub %v18,%v18,%v19 |
add %r9,%r10,%r4 |
260 |
add %r9,%r8,%r4 |
vmaxub %v2,%v18,%v3 |
|
vperm %v19,%v9,%v13,%v1 |
|
261 |
addi %r11,%r4,16 |
addi %r11,%r4,16 |
262 |
vperm %v16,%v12,%v0,%v1 |
vperm %v17,%v12,%v0,%v1 |
|
vsububm %v17,%v18,%v17 |
|
263 |
addi %r9,%r9,16 |
addi %r9,%r9,16 |
264 |
|
lvx %v4,0,%r11 |
265 |
|
vperm %v3,%v9,%v13,%v1 |
266 |
|
vsububm %v2,%v2,%v19 |
267 |
|
lvx %v5,0,%r9 |
268 |
lvx %v9,0,%r4 |
lvx %v9,0,%r4 |
269 |
add %r3,%r3,%r5 |
vperm %v18,%v10,%v11,%v16 |
270 |
vperm %v18,%v10,%v11,%v8 |
add %r3,%r3,%r0 |
271 |
lvx %v13,%r8,%r4 |
lvx %v13,%r10,%r4 |
272 |
vsum4ubs %v0,%v17,%v2 |
vperm %v3,%v3,%v17,%v6 |
273 |
vperm %v19,%v19,%v16,%v7 |
vsum4ubs %v0,%v2,%v31 |
274 |
lvx %v4,0,%r9 |
add %r4,%r4,%r0 |
275 |
add %r4,%r4,%r5 |
lvx %v12,%r10,%r3 |
276 |
lvx %v3,0,%r11 |
lvx %v8,0,%r3 |
277 |
add %r9,%r8,%r4 |
vminub %v19,%v18,%v3 |
278 |
vminub %v17,%v18,%v19 |
add %r9,%r10,%r4 |
279 |
lvx %v12,%r8,%r3 |
vmaxub %v2,%v18,%v3 |
280 |
addi %r9,%r9,16 |
addi %r9,%r9,16 |
281 |
lvx %v6,0,%r3 |
lvx %v7,%r10,%r4 |
282 |
vmaxub %v18,%v18,%v19 |
vperm %v17,%v13,%v5,%v1 |
283 |
addi %r11,%r4,16 |
addi %r11,%r4,16 |
|
vperm %v16,%v13,%v4,%v1 |
|
|
lvx %v5,%r8,%r4 |
|
|
add %r3,%r3,%r5 |
|
|
vperm %v19,%v9,%v3,%v1 |
|
|
vsububm %v17,%v18,%v17 |
|
284 |
lvx %v10,0,%r9 |
lvx %v10,0,%r9 |
285 |
lvx %v9,0,%r11 |
vperm %v3,%v9,%v4,%v1 |
286 |
addi %r0,%r1,8 |
vsububm %v2,%v2,%v19 |
|
vperm %v18,%v6,%v12,%v8 |
|
287 |
lvx %v11,0,%r4 |
lvx %v11,0,%r4 |
288 |
vsum4ubs %v0,%v17,%v0 |
lvx %v9,0,%r11 |
289 |
vperm %v19,%v19,%v16,%v7 |
vperm %v18,%v8,%v12,%v16 |
290 |
lvx %v12,%r8,%r3 |
add %r3,%r3,%r0 |
291 |
|
vperm %v3,%v3,%v17,%v6 |
292 |
|
vsum4ubs %v0,%v2,%v0 |
293 |
|
lvx %v12,%r10,%r3 |
294 |
lvx %v13,0,%r3 |
lvx %v13,0,%r3 |
295 |
vperm %v16,%v5,%v10,%v1 |
vperm %v17,%v7,%v10,%v1 |
296 |
vminub %v17,%v18,%v19 |
addi %r8,%r1,8 |
297 |
vmaxub %v18,%v18,%v19 |
vminub %v19,%v18,%v3 |
298 |
vperm %v19,%v11,%v9,%v1 |
vmaxub %v2,%v18,%v3 |
299 |
vsububm %v17,%v18,%v17 |
vperm %v3,%v11,%v9,%v1 |
300 |
vperm %v18,%v13,%v12,%v8 |
vsububm %v2,%v2,%v19 |
301 |
vperm %v19,%v19,%v16,%v7 |
vperm %v18,%v13,%v12,%v16 |
302 |
vsum4ubs %v0,%v17,%v0 |
vperm %v3,%v3,%v17,%v6 |
303 |
vminub %v17,%v18,%v19 |
vsum4ubs %v0,%v2,%v0 |
304 |
vmaxub %v18,%v18,%v19 |
vminub %v19,%v18,%v3 |
305 |
vsububm %v17,%v18,%v17 |
vmaxub %v2,%v18,%v3 |
306 |
vsum4ubs %v0,%v17,%v0 |
vsububm %v2,%v2,%v19 |
307 |
vsumsws %v0,%v0,%v2 |
vsum4ubs %v0,%v2,%v0 |
308 |
|
vsumsws %v0,%v0,%v31 |
309 |
vspltw %v0,%v0,3 |
vspltw %v0,%v0,3 |
310 |
stvewx %v0,0,%r0 |
stvewx %v0,0,%r8 |
311 |
lwz %r3,8(%r1) |
lwz %r3,8(%r1) |
312 |
la %r1,16(%r1) |
la %r1,16(%r1) |
313 |
blr |
blr |
319 |
dev16_altivec: |
dev16_altivec: |
320 |
stwu %r1,-16(%r1) |
stwu %r1,-16(%r1) |
321 |
lvx %v13,0,%r3 |
lvx %v13,0,%r3 |
|
vspltisw %v15,0 |
|
322 |
rlwinm %r4,%r4,0,0,27 |
rlwinm %r4,%r4,0,0,27 |
|
add %r3,%r3,%r4 |
|
323 |
vspltisb %v1,14 |
vspltisb %v1,14 |
324 |
|
add %r3,%r3,%r4 |
325 |
lvx %v12,0,%r3 |
lvx %v12,0,%r3 |
326 |
addi %r0,%r1,8 |
addi %r0,%r1,8 |
327 |
add %r3,%r3,%r4 |
add %r3,%r3,%r4 |
328 |
vsum4ubs %v0,%v13,%v15 |
vsum4ubs %v0,%v13,%v31 |
329 |
lvx %v11,0,%r3 |
lvx %v11,0,%r3 |
330 |
add %r3,%r3,%r4 |
add %r3,%r3,%r4 |
331 |
lvx %v10,0,%r3 |
lvx %v10,0,%r3 |
367 |
vsum4ubs %v0,%v18,%v0 |
vsum4ubs %v0,%v18,%v0 |
368 |
vsum4ubs %v0,%v17,%v0 |
vsum4ubs %v0,%v17,%v0 |
369 |
vsum4ubs %v0,%v16,%v0 |
vsum4ubs %v0,%v16,%v0 |
370 |
vsumsws %v0,%v0,%v15 |
vsumsws %v0,%v0,%v31 |
371 |
vperm %v1,%v0,%v0,%v1 |
vperm %v1,%v0,%v0,%v1 |
372 |
vminub %v14,%v13,%v1 |
vminub %v15,%v13,%v1 |
373 |
vmaxub %v13,%v13,%v1 |
vmaxub %v13,%v13,%v1 |
374 |
vsububm %v13,%v13,%v14 |
vsububm %v13,%v13,%v15 |
375 |
vminub %v14,%v12,%v1 |
vminub %v15,%v12,%v1 |
376 |
vsum4ubs %v0,%v13,%v15 |
vsum4ubs %v0,%v13,%v31 |
377 |
vmaxub %v13,%v12,%v1 |
vmaxub %v13,%v12,%v1 |
378 |
vsububm %v13,%v13,%v14 |
vsububm %v13,%v13,%v15 |
379 |
vminub %v14,%v11,%v1 |
vminub %v15,%v11,%v1 |
380 |
vsum4ubs %v0,%v13,%v0 |
vsum4ubs %v0,%v13,%v0 |
381 |
vmaxub %v13,%v11,%v1 |
vmaxub %v13,%v11,%v1 |
382 |
vsububm %v13,%v13,%v14 |
vsububm %v13,%v13,%v15 |
383 |
vminub %v14,%v10,%v1 |
vminub %v15,%v10,%v1 |
384 |
vsum4ubs %v0,%v13,%v0 |
vsum4ubs %v0,%v13,%v0 |
385 |
vmaxub %v13,%v10,%v1 |
vmaxub %v13,%v10,%v1 |
386 |
vsububm %v13,%v13,%v14 |
vsububm %v13,%v13,%v15 |
387 |
vminub %v14,%v9,%v1 |
vminub %v15,%v9,%v1 |
388 |
vsum4ubs %v0,%v13,%v0 |
vsum4ubs %v0,%v13,%v0 |
389 |
vmaxub %v13,%v9,%v1 |
vmaxub %v13,%v9,%v1 |
390 |
vsububm %v13,%v13,%v14 |
vsububm %v13,%v13,%v15 |
391 |
vminub %v14,%v8,%v1 |
vminub %v15,%v8,%v1 |
392 |
vsum4ubs %v0,%v13,%v0 |
vsum4ubs %v0,%v13,%v0 |
393 |
vmaxub %v13,%v8,%v1 |
vmaxub %v13,%v8,%v1 |
394 |
vsububm %v13,%v13,%v14 |
vsububm %v13,%v13,%v15 |
395 |
vminub %v14,%v7,%v1 |
vminub %v15,%v7,%v1 |
396 |
vsum4ubs %v0,%v13,%v0 |
vsum4ubs %v0,%v13,%v0 |
397 |
vmaxub %v13,%v7,%v1 |
vmaxub %v13,%v7,%v1 |
398 |
vsububm %v13,%v13,%v14 |
vsububm %v13,%v13,%v15 |
399 |
vminub %v14,%v6,%v1 |
vminub %v15,%v6,%v1 |
400 |
vsum4ubs %v0,%v13,%v0 |
vsum4ubs %v0,%v13,%v0 |
401 |
vmaxub %v13,%v6,%v1 |
vmaxub %v13,%v6,%v1 |
402 |
vsububm %v13,%v13,%v14 |
vsububm %v13,%v13,%v15 |
403 |
vminub %v14,%v5,%v1 |
vminub %v15,%v5,%v1 |
404 |
vsum4ubs %v0,%v13,%v0 |
vsum4ubs %v0,%v13,%v0 |
405 |
vmaxub %v13,%v5,%v1 |
vmaxub %v13,%v5,%v1 |
406 |
vsububm %v13,%v13,%v14 |
vsububm %v13,%v13,%v15 |
407 |
vminub %v14,%v4,%v1 |
vminub %v15,%v4,%v1 |
408 |
vsum4ubs %v0,%v13,%v0 |
vsum4ubs %v0,%v13,%v0 |
409 |
vmaxub %v13,%v4,%v1 |
vmaxub %v13,%v4,%v1 |
410 |
vsububm %v13,%v13,%v14 |
vsububm %v13,%v13,%v15 |
411 |
vminub %v14,%v3,%v1 |
vminub %v15,%v3,%v1 |
412 |
vsum4ubs %v0,%v13,%v0 |
vsum4ubs %v0,%v13,%v0 |
413 |
vmaxub %v13,%v3,%v1 |
vmaxub %v13,%v3,%v1 |
414 |
vsububm %v13,%v13,%v14 |
vsububm %v13,%v13,%v15 |
415 |
vminub %v14,%v2,%v1 |
vminub %v15,%v2,%v1 |
416 |
vsum4ubs %v0,%v13,%v0 |
vsum4ubs %v0,%v13,%v0 |
417 |
vmaxub %v13,%v2,%v1 |
vmaxub %v13,%v2,%v1 |
418 |
vsububm %v13,%v13,%v14 |
vsububm %v13,%v13,%v15 |
419 |
vminub %v14,%v19,%v1 |
vminub %v15,%v19,%v1 |
420 |
vsum4ubs %v0,%v13,%v0 |
vsum4ubs %v0,%v13,%v0 |
421 |
vmaxub %v13,%v19,%v1 |
vmaxub %v13,%v19,%v1 |
422 |
vsububm %v13,%v13,%v14 |
vsububm %v13,%v13,%v15 |
423 |
vminub %v14,%v18,%v1 |
vminub %v15,%v18,%v1 |
424 |
vsum4ubs %v0,%v13,%v0 |
vsum4ubs %v0,%v13,%v0 |
425 |
vmaxub %v13,%v18,%v1 |
vmaxub %v13,%v18,%v1 |
426 |
vsububm %v13,%v13,%v14 |
vsububm %v13,%v13,%v15 |
427 |
vminub %v14,%v17,%v1 |
vminub %v15,%v17,%v1 |
428 |
vsum4ubs %v0,%v13,%v0 |
vsum4ubs %v0,%v13,%v0 |
429 |
vmaxub %v13,%v17,%v1 |
vmaxub %v13,%v17,%v1 |
430 |
vsububm %v13,%v13,%v14 |
vsububm %v13,%v13,%v15 |
431 |
vminub %v14,%v16,%v1 |
vminub %v15,%v16,%v1 |
432 |
vsum4ubs %v0,%v13,%v0 |
vsum4ubs %v0,%v13,%v0 |
433 |
vmaxub %v13,%v16,%v1 |
vmaxub %v13,%v16,%v1 |
434 |
vsububm %v13,%v13,%v14 |
vsububm %v13,%v13,%v15 |
435 |
vsum4ubs %v0,%v13,%v0 |
vsum4ubs %v0,%v13,%v0 |
436 |
vsumsws %v0,%v0,%v15 |
vsumsws %v0,%v0,%v31 |
437 |
vspltw %v0,%v0,3 |
vspltw %v0,%v0,3 |
438 |
stvewx %v0,0,%r0 |
stvewx %v0,0,%r0 |
439 |
lwz %r3,8(%r1) |
lwz %r3,8(%r1) |
441 |
blr |
blr |
442 |
.Lfe3: |
.Lfe3: |
443 |
.size dev16_altivec,.Lfe3-dev16_altivec |
.size dev16_altivec,.Lfe3-dev16_altivec |
444 |
|
.align 2 |
445 |
|
.globl sadInit_altivec |
446 |
|
.type sadInit_altivec,@function |
447 |
|
sadInit_altivec: |
448 |
|
lis %r9,perms@ha |
449 |
|
vspltisw %v31,0 |
450 |
|
la %r9,perms@l(%r9) |
451 |
|
addi %r11,%r9,16 |
452 |
|
lvx %v29,0,%r9 |
453 |
|
lvx %v30,0,%r11 |
454 |
|
blr |
455 |
|
.Lfe4: |
456 |
|
.size sadInit_altivec,.Lfe4-sadInit_altivec |
457 |
.ident "GCC: (GNU) 2.95.3 20010111 (BLL/AltiVec prerelease/franzo/20010111)" |
.ident "GCC: (GNU) 2.95.3 20010111 (BLL/AltiVec prerelease/franzo/20010111)" |