15 |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
16 |
# |
# |
17 |
# |
# |
18 |
# $Id: cbp_altivec.s,v 1.1 2002-03-26 23:21:02 canard Exp $ |
# $Id: cbp_altivec.s,v 1.2 2002-03-28 12:29:58 canard Exp $ |
19 |
# $Source: /home/xvid/cvs_copy/cvs-server-root/xvid/xvidcore/src/bitstream/ppc_asm/cbp_altivec.s,v $ |
# $Source: /home/xvid/cvs_copy/cvs-server-root/xvid/xvidcore/src/bitstream/ppc_asm/cbp_altivec.s,v $ |
20 |
# $Date: 2002-03-26 23:21:02 $ |
# $Date: 2002-03-28 12:29:58 $ |
21 |
# $Author: canard $ |
# $Author: canard $ |
22 |
# |
# |
23 |
# This is my first PPC ASM attempt. So I might do nasty things. |
# This is my first PPC ASM attempt. So I might do nasty things. |
58 |
.text |
.text |
59 |
.global calc_cbp_altivec |
.global calc_cbp_altivec |
60 |
calc_cbp_altivec: |
calc_cbp_altivec: |
61 |
|
# Set VRSAVE |
62 |
|
li %r4,0xFFFFFFFF |
63 |
|
mtspr 256,%r4 |
64 |
|
|
65 |
# r9 will contain coeffs addr |
# r9 will contain coeffs addr |
66 |
mr 9,3 |
mr %r9,%r3 |
67 |
# r3 contains the result, therefore we set it to 0 |
# r3 contains the result, therefore we set it to 0 |
68 |
xor 3,3,3 |
li %r3,0 |
69 |
|
|
70 |
# CTR is the loop counter (rows) |
# CTR is the loop counter (rows) |
71 |
li 4,6 |
li %r4,6 |
72 |
mtctr 4 |
mtctr %r4 |
73 |
vxor 12,12,12 |
|
74 |
lis 4,.skip@ha |
# VR9 contains 0 |
75 |
addi 4,4,.skip@l |
vxor 9,9,9 |
76 |
lvx 10,0,4 |
|
77 |
|
# VR10 will help us to remove the first 16 bits of each row |
78 |
|
lis %r4,.skip@ha |
79 |
|
addi %r4,4,.skip@l |
80 |
|
lvx 10,0,%r4 |
81 |
.loop: |
.loop: |
82 |
mr 6,9 |
mr %r6,%r9 |
83 |
# coeffs is a matrix of 16 bits cells |
|
84 |
lvxl 1,0,6 |
lvxl 1,0,%r6 |
85 |
|
# Set the first 16 bits to 0 |
86 |
vand 1,1,10 |
vand 1,1,10 |
87 |
|
|
88 |
addi 6,6,16 |
addi %r6,%r6,16 |
89 |
lvxl 2,0,6 |
lvxl 2,0,6 |
90 |
|
|
91 |
addi 6,6,16 |
addi %r6,%r6,16 |
92 |
lvxl 3,0,6 |
lvxl 3,0,6 |
93 |
|
|
94 |
addi 6,6,16 |
addi %r6,%r6,16 |
95 |
lvxl 4,0,6 |
lvxl 4,0,6 |
96 |
|
|
97 |
addi 6,6,16 |
addi %r6,%r6,16 |
98 |
lvxl 5,0,6 |
lvxl 5,0,6 |
99 |
|
|
100 |
addi 6,6,16 |
addi %r6,%r6,16 |
101 |
lvxl 6,0,6 |
lvxl 6,0,6 |
102 |
|
|
103 |
addi 6,6,16 |
addi %r6,%r6,16 |
104 |
lvxl 7,0,6 |
lvxl 7,0,6 |
105 |
|
|
106 |
addi 6,6,16 |
addi %r6,%r6,16 |
107 |
lvxl 8,0,6 |
lvxl 8,0,6 |
108 |
|
|
109 |
vor 1,2,1 |
vor 1,2,1 |
114 |
vor 1,7,1 |
vor 1,7,1 |
115 |
vor 1,8,1 |
vor 1,8,1 |
116 |
|
|
117 |
vcmpequw. 3,1,12 |
# is VR1 == 0 |
118 |
|
vcmpequw. 3,1,9 |
119 |
bt 24,.newline |
bt 24,.newline |
120 |
.cbp: |
.cbp: |
121 |
mfctr 5 |
# cbp calc |
122 |
subi 5,5,1 |
mfctr %r5 |
123 |
li 4,1 |
subi %r5,%r5,1 |
124 |
slw 4,4,5 |
li %r4,1 |
125 |
or 3,3,4 |
slw %r4,%r4,%r5 |
126 |
|
or %r3,%r3,%r4 |
127 |
.newline: |
.newline: |
128 |
addi 9,9,128 |
addi %r9,%r9,128 |
129 |
bdnz .loop |
bdnz .loop |
130 |
blr |
blr |