1 |
.file "quant_h263.1.c" |
.file "quant_h263_ia64.s" |
2 |
.pred.safe_across_calls p1-p5,p16-p63 |
.pred.safe_across_calls p1-p5,p16-p63 |
3 |
.section .rodata |
.section .rodata |
4 |
.align 4 |
.align 4 |
43 |
.global quant_intra_ia64# |
.global quant_intra_ia64# |
44 |
.proc quant_intra_ia64# |
.proc quant_intra_ia64# |
45 |
quant_intra_ia64: |
quant_intra_ia64: |
46 |
.prologue 12, 37 |
.prologue |
47 |
.save ar.pfs, r38 |
.save ar.pfs, r38 |
48 |
alloc r38 = ar.pfs, 4, 3, 2, 0 |
alloc r38 = ar.pfs, 4, 3, 2, 0 |
49 |
adds r16 = -8, r12 |
adds r16 = -8, r12 |
134 |
(p6) st2 [r15] = r14 |
(p6) st2 [r15] = r14 |
135 |
br .L12 |
br .L12 |
136 |
.L14: |
.L14: |
137 |
.pred.rel.mutex p8, p9 |
.pred.rel "mutex", p8, p9 |
138 |
setf.sig f6 = r18 |
setf.sig f6 = r18 |
139 |
add r16 = r17, r32 |
add r16 = r17, r32 |
140 |
;; |
;; |
275 |
quant_inter_ia64: |
quant_inter_ia64: |
276 |
|
|
277 |
|
|
278 |
/******************************************************** |
//******************************************************* |
279 |
* * |
//* * |
280 |
* const uint32_t mult = multipliers[quant]; * |
//* const uint32_t mult = multipliers[quant]; * |
281 |
* const uint16_t quant_m_2 = quant << 1; * |
//* const uint16_t quant_m_2 = quant << 1; * |
282 |
* const uint16_t quant_d_2 = quant >> 1; * |
//* const uint16_t quant_d_2 = quant >> 1; * |
283 |
* int sum = 0; * |
//* int sum = 0; * |
284 |
* uint32_t i; * |
//* uint32_t i; * |
285 |
* int16_t acLevel,acL; * |
//* int16_t acLevel,acL; * |
286 |
* * |
//* * |
287 |
********************************************************/ |
//*******************************************************/ |
288 |
|
|
289 |
|
|
290 |
|
|
291 |
LL=3 // LL = load latency |
LL=3 // LL = load latency |
292 |
|
//if LL is changed, you'll also have to change the .pred.rel... parts below! |
293 |
.prologue |
.prologue |
294 |
addl r14 = @ltoff(multipliers#), gp |
addl r14 = @ltoff(multipliers#), gp |
295 |
dep.z r15 = r34, 2, 32 |
dep.z r15 = r34, 2, 32 |
327 |
|
|
328 |
|
|
329 |
|
|
330 |
/******************************************************************************** |
//******************************************************************************* |
331 |
* * |
//* * |
332 |
* for (i = 0; i < 64; i++) { * |
//* for (i = 0; i < 64; i++) { * |
333 |
* acL=acLevel = data[i]; * |
//* acL=acLevel = data[i]; * |
334 |
* acLevel = ((acLevel < 0)?-acLevel:acLevel) - quant_d_2; * |
//* acLevel = ((acLevel < 0)?-acLevel:acLevel) - quant_d_2; * |
335 |
* if (acLevel < quant_m_2){ * |
//* if (acLevel < quant_m_2){ * |
336 |
* acLevel = 0; * |
//* acLevel = 0; * |
337 |
* } * |
//* } * |
338 |
* acLevel = (acLevel * mult) >> SCALEBITS; * |
//* acLevel = (acLevel * mult) >> SCALEBITS; * |
339 |
* sum += acLevel; * |
//* sum += acLevel; * |
340 |
* coeff[i] = ((acL < 0)?-acLevel:acLevel); * |
//* coeff[i] = ((acL < 0)?-acLevel:acLevel); * |
341 |
* } * |
//* } * |
342 |
* * |
//* * |
343 |
********************************************************************************/ |
//*******************************************************************************/ |
344 |
|
|
345 |
|
|
346 |
|
|
347 |
.explicit |
.explicit |
348 |
.L58: |
.L58: |
349 |
|
.pred.rel "clear", p29, p37 |
350 |
|
.pred.rel "mutex", p29, p37 |
351 |
|
|
352 |
//pipeline stage |
//pipeline stage |
353 |
{.mmi |
{.mmi |
354 |
(p[0]) ld2 ac1[0] = [r15],2 // 0 acL=acLevel = data[i]; |
(p[0]) ld2 ac1[0] = [r15],2 // 0 acL=acLevel = data[i]; |
366 |
(p[LL+3]) sxt2 ac2[2] = ac2[2] // LL+3 |
(p[LL+3]) sxt2 ac2[2] = ac2[2] // LL+3 |
367 |
} |
} |
368 |
{.mmi |
{.mmi |
369 |
|
.pred.rel "mutex", p34, p42 |
370 |
(cmp1[6]) mov ac3[0] = ac2[6] // LL+7 ac3 = acLevel; |
(cmp1[6]) mov ac3[0] = ac2[6] // LL+7 ac3 = acLevel; |
371 |
(cmp1neg[6]) sub ac3[0] = r0, ac2[6] // LL+7 ac3 = -acLevel; |
(cmp1neg[6]) sub ac3[0] = r0, ac2[6] // LL+7 ac3 = -acLevel; |
372 |
(p[LL+6]) pmpyshr2.u ac2[5] = r29, ac2[5], 16 // LL+6 acLevel = (acLevel * mult) >> SCALEBITS; |
(p[LL+6]) pmpyshr2.u ac2[5] = r29, ac2[5], 16 // LL+6 acLevel = (acLevel * mult) >> SCALEBITS; |
377 |
br.ctop.sptk.few .L58 |
br.ctop.sptk.few .L58 |
378 |
;; |
;; |
379 |
} |
} |
380 |
|
|
381 |
|
.pred.rel "clear", p29, p37 |
382 |
.default |
.default |
383 |
mov ar.ec = r17 |
mov ar.ec = r17 |
384 |
;; |
;; |
403 |
dequant_inter_ia64: |
dequant_inter_ia64: |
404 |
|
|
405 |
//*********************************************************************** |
//*********************************************************************** |
406 |
// * |
//* * |
407 |
// const uint16_t quant_m_2 = quant << 1; * |
//* const uint16_t quant_m_2 = quant << 1; * |
408 |
// const uint16_t quant_add = (quant & 1 ? quant : quant - 1); * |
//* const uint16_t quant_add = (quant & 1 ? quant : quant - 1); * |
409 |
// uint32_t i; * |
//* uint32_t i; * |
410 |
// * |
//* * |
411 |
//*********************************************************************** * |
//*********************************************************************** |
412 |
|
|
413 |
|
|
414 |
|
|
438 |
mov pr.rot = 1 << 16 |
mov pr.rot = 1 << 16 |
439 |
;; |
;; |
440 |
|
|
441 |
/******************************************************************************** |
//******************************************************************************* |
442 |
* * |
//* * |
443 |
*for (i = 0; i < 64; i++) { * |
//*for (i = 0; i < 64; i++) { * |
444 |
* int16_t acLevel = coeff[i]; * |
//* int16_t acLevel = coeff[i]; * |
445 |
* * |
//* * |
446 |
* if (acLevel == 0) * |
//* if (acLevel == 0) * |
447 |
* { * |
//* { * |
448 |
* data[i] = 0; * |
//* data[i] = 0; * |
449 |
* } * |
//* } * |
450 |
* else if (acLevel < 0) * |
//* else if (acLevel < 0) * |
451 |
* { * |
//* { * |
452 |
* acLevel = acLevel * quant_m_2 - quant_add; * |
//* acLevel = acLevel * quant_m_2 - quant_add; * |
453 |
* data[i] = (acLevel >= -2048 ? acLevel : -2048); * |
//* data[i] = (acLevel >= -2048 ? acLevel : -2048); * |
454 |
* } * |
//* } * |
455 |
* else // if (acLevel > 0) * |
//* else // if (acLevel > 0) * |
456 |
* { * |
//* { * |
457 |
* acLevel = acLevel * quant_m_2 + quant_add; * |
//* acLevel = acLevel * quant_m_2 + quant_add; * |
458 |
* data[i] = (acLevel <= 2047 ? acLevel : 2047); * |
//* data[i] = (acLevel <= 2047 ? acLevel : 2047); * |
459 |
* } * |
//* } * |
460 |
* } * |
//* } * |
461 |
* * |
//* * |
462 |
********************************************************************************/ |
//*******************************************************************************/ |
463 |
|
|
464 |
|
|
465 |
|
|
466 |
LL=2 // LL := load latency |
LL=2 // LL := load latency |
467 |
|
//if LL is changed, you'll also have to change the .pred.rel... parts below! |
468 |
|
|
469 |
|
|
470 |
.rotr ac1[LL+10], x[5], y1[3], y2[3] |
.rotr ac1[LL+10], x[5], y1[3], y2[3] |
474 |
//pipeline stage |
//pipeline stage |
475 |
|
|
476 |
.L60: |
.L60: |
477 |
|
.pred.rel "clear", p36 |
478 |
|
.pred.rel "mutex", p47, p49 |
479 |
|
.pred.rel "mutex", p46, p48 |
480 |
|
.pred.rel "mutex", p40, p45 |
481 |
|
.pred.rel "mutex", p39, p44 |
482 |
|
.pred.rel "mutex", p38, p43 |
483 |
|
.pred.rel "mutex", p37, p42 |
484 |
|
.pred.rel "mutex", p36, p41 |
485 |
{.mmi |
{.mmi |
486 |
(p[0])ld2 ac1[0] = [r14] ,2 // 0 acLevel = coeff[i]; |
(p[0])ld2 ac1[0] = [r14] ,2 // 0 acLevel = coeff[i]; |
487 |
(p[LL+1])cmp4.ne p6, cmp1neg[0] = 0, ac1[LL+1] // LL+1 |
(p[LL+1])cmp4.ne p6, cmp1neg[0] = 0, ac1[LL+1] // LL+1 |
505 |
(p[LL+4])sxt2 ac1[LL+4] = ac1[LL+4] // LL+4 |
(p[LL+4])sxt2 ac1[LL+4] = ac1[LL+4] // LL+4 |
506 |
} |
} |
507 |
{.mmi |
{.mmi |
508 |
(cmp2[4]) mov y1[0] = x[3] // LL+4 |
(cmp2[4]) mov y1[0] = x[3] // LL+5 |
509 |
(cmp2[4]) mov y2[0] = ac1[LL+5] // LL+4 |
(cmp2[4]) mov y2[0] = ac1[LL+5] // LL+5 |
510 |
(p[LL+6])cmp4.le cmp3[0], cmp3neg[0] = x[4], ac1[LL+6] // LL+6 |
(p[LL+6])cmp4.le cmp3[0], cmp3neg[0] = x[4], ac1[LL+6] // LL+6 |
511 |
} |
} |
512 |
{.mmi |
{.mmi |
520 |
br.ctop.sptk.few .L60 |
br.ctop.sptk.few .L60 |
521 |
;; |
;; |
522 |
} |
} |
523 |
|
.pred.rel "clear", p36 |
524 |
.default |
.default |
525 |
mov ar.lc = r2 |
mov ar.lc = r2 |
526 |
mov ar.pfs = r9 |
mov ar.pfs = r9 |