Parent Directory
|
Revision Log
Revision 115 - (view) (download)
1 : | canard | 98 | /* |
2 : | |||
3 : | Copyright (C) 2002 Benjamin Herrenschmidt <benh@kernel.crashing.org> | ||
4 : | |||
5 : | This program is free software; you can redistribute it and/or modify | ||
6 : | it under the terms of the GNU General Public License as published by | ||
7 : | the Free Software Foundation; either version 2 of the License, or | ||
8 : | (at your option) any later version. | ||
9 : | |||
10 : | This program is distributed in the hope that it will be useful, | ||
11 : | but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
12 : | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
13 : | GNU General Public License for more details. | ||
14 : | |||
15 : | You should have received a copy of the GNU General Public License | ||
16 : | along with this program; if not, write to the Free Software | ||
17 : | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
18 : | |||
19 : | |||
20 : | canard | 115 | $Id: sad_altivec.c,v 1.2 2002-04-11 10:18:40 canard Exp $ |
21 : | canard | 98 | $Source: /home/xvid/cvs_copy/cvs-server-root/xvid/xvidcore/src/motion/ppc_asm/sad_altivec.c,v $ |
22 : | canard | 115 | $Date: 2002-04-11 10:18:40 $ |
23 : | canard | 98 | $Author: canard $ |
24 : | |||
25 : | */ | ||
26 : | |||
27 : | canard | 115 | #define G_REG |
28 : | |||
29 : | #ifdef G_REG | ||
30 : | register vector unsigned char perm0 asm ("%v29"); | ||
31 : | register vector unsigned char perm1 asm ("%v30"); | ||
32 : | register vector unsigned int zerovec asm ("%v31"); | ||
33 : | #endif | ||
34 : | |||
35 : | canard | 98 | #include <stdio.h> |
36 : | |||
37 : | #undef DEBUG | ||
38 : | |||
39 : | canard | 115 | static const vector unsigned char perms[2] = { |
40 : | (vector unsigned char)( /* Used when cur is aligned */ | ||
41 : | 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, | ||
42 : | 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 | ||
43 : | ), | ||
44 : | (vector unsigned char)( /* Used when cur is unaligned */ | ||
45 : | 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, | ||
46 : | 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f | ||
47 : | ), | ||
48 : | }; | ||
49 : | |||
50 : | #ifdef G_REG | ||
51 : | void sadInit_altivec(void) | ||
52 : | { | ||
53 : | perm0 = perms[0]; | ||
54 : | perm1 = perms[1]; | ||
55 : | zerovec = (vector unsigned int)(0); | ||
56 : | } | ||
57 : | static inline const vector unsigned char get_perm(unsigned long i) | ||
58 : | { | ||
59 : | return i ? perm1 : perm0; | ||
60 : | } | ||
61 : | #define ZERODEF | ||
62 : | #define ZEROVEC zerovec | ||
63 : | #else | ||
64 : | void sadInit_altivec(void) { } | ||
65 : | static inline const vector unsigned char get_perm(unsigned long i) | ||
66 : | { | ||
67 : | return perms[i]; | ||
68 : | } | ||
69 : | #define ZERODEF vector unsigned int zerovec = (vector unsigned int)(0) | ||
70 : | #define ZEROVEC zerovec | ||
71 : | #endif | ||
72 : | |||
73 : | |||
74 : | canard | 98 | #define SAD16() \ |
75 : | t1 = vec_perm(ref[0], ref[1], perm); /* align current vector */ \ | ||
76 : | t2 = vec_max(t1, *cur); /* find largest of two */ \ | ||
77 : | t3 = vec_min(t1, *cur); /* find smaller of two */ \ | ||
78 : | t4 = vec_sub(t2, t3); /* find absolute difference */ \ | ||
79 : | sad = vec_sum4s(t4, sad); /* accumulate sum of differences */ \ | ||
80 : | cur += stride; ref += stride; | ||
81 : | |||
82 : | /* | ||
83 : | * This function assumes cur and stride are 16 bytes aligned and ref is unaligned | ||
84 : | */ | ||
85 : | unsigned long | ||
86 : | sad16_altivec( const vector unsigned char * cur, | ||
87 : | const vector unsigned char * ref, | ||
88 : | unsigned long stride, | ||
89 : | const unsigned long best_sad) | ||
90 : | { | ||
91 : | vector unsigned char perm; | ||
92 : | vector unsigned char t1, t2, t3, t4 ; | ||
93 : | canard | 115 | vector unsigned int sad; |
94 : | canard | 98 | vector signed int sumdiffs, best_vec; |
95 : | unsigned long result; | ||
96 : | canard | 115 | ZERODEF; |
97 : | |||
98 : | canard | 98 | #ifdef DEBUG |
99 : | if (((unsigned long)cur) & 0xf) | ||
100 : | fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur); | ||
101 : | // if (((unsigned long)ref) & 0xf) | ||
102 : | // fprintf(stderr, "sad16_altivec:incorrect align, ref: %x\n", ref); | ||
103 : | if (stride & 0xf) | ||
104 : | fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride); | ||
105 : | #endif | ||
106 : | /* initialization */ | ||
107 : | canard | 115 | sad = (vector unsigned int)(ZEROVEC); |
108 : | canard | 98 | stride >>= 4; |
109 : | perm = vec_lvsl(0, (unsigned char *)ref); | ||
110 : | *((unsigned long *)&best_vec) = best_sad; | ||
111 : | best_vec = vec_splat(best_vec, 0); | ||
112 : | |||
113 : | /* perform sum of differences between current and previous */ | ||
114 : | SAD16(); | ||
115 : | SAD16(); | ||
116 : | SAD16(); | ||
117 : | SAD16(); | ||
118 : | /* Temp sum for exit */ | ||
119 : | canard | 115 | sumdiffs = vec_sums((vector signed int) sad, (vector signed int)ZEROVEC); |
120 : | canard | 98 | if (vec_all_ge(sumdiffs, best_vec)) |
121 : | goto bail; | ||
122 : | SAD16(); | ||
123 : | SAD16(); | ||
124 : | SAD16(); | ||
125 : | SAD16(); | ||
126 : | canard | 115 | sumdiffs = vec_sums((vector signed int) sad, (vector signed int)ZEROVEC); |
127 : | canard | 98 | if (vec_all_ge(sumdiffs, best_vec)) |
128 : | goto bail; | ||
129 : | SAD16(); | ||
130 : | SAD16(); | ||
131 : | SAD16(); | ||
132 : | SAD16(); | ||
133 : | SAD16(); | ||
134 : | SAD16(); | ||
135 : | SAD16(); | ||
136 : | SAD16(); | ||
137 : | |||
138 : | /* sum all parts of difference into one 32 bit quantity */ | ||
139 : | canard | 115 | sumdiffs = vec_sums((vector signed int) sad, (vector signed int)ZEROVEC); |
140 : | canard | 98 | bail: |
141 : | /* copy vector sum into unaligned result */ | ||
142 : | sumdiffs = vec_splat( sumdiffs, 3 ); | ||
143 : | vec_ste( sumdiffs, 0, (int *)&result ); | ||
144 : | return( result ); | ||
145 : | } | ||
146 : | |||
147 : | #define SAD8() \ | ||
148 : | t1 = vec_perm(cur[0], cur[stride], perm_cur); /* align current vector */ \ | ||
149 : | t2 = vec_perm(ref[0], ref[1], perm_ref1); /* align current vector */ \ | ||
150 : | tp = vec_perm(ref[stride], ref[stride+1], perm_ref1); /* align current vector */ \ | ||
151 : | t2 = vec_perm(t2,tp,perm_ref2); \ | ||
152 : | t3 = vec_max(t1, t2); /* find largest of two */ \ | ||
153 : | t4 = vec_min(t1, t2); /* find smaller of two */ \ | ||
154 : | t5 = vec_sub(t3, t4); /* find absolute difference */ \ | ||
155 : | sad = vec_sum4s(t5, sad); /* accumulate sum of differences */ \ | ||
156 : | cur += stride<<1; ref += stride<<1; | ||
157 : | |||
158 : | /* | ||
159 : | * This function assumes cur is 8 bytes aligned, stride is 16 bytes | ||
160 : | * aligned and ref is unaligned | ||
161 : | */ | ||
162 : | unsigned long | ||
163 : | sad8_altivec( const vector unsigned char * cur, | ||
164 : | const vector unsigned char * ref, | ||
165 : | unsigned long stride) | ||
166 : | { | ||
167 : | vector unsigned char t1, t2, t3, t4, t5, tp ; | ||
168 : | canard | 115 | vector unsigned int sad; |
169 : | canard | 98 | vector signed int sumdiffs; |
170 : | vector unsigned char perm_cur; | ||
171 : | vector unsigned char perm_ref1, perm_ref2; | ||
172 : | unsigned long result; | ||
173 : | canard | 115 | ZERODEF; |
174 : | canard | 98 | |
175 : | #ifdef DEBUG | ||
176 : | if (((unsigned long)cur) & 0x7) | ||
177 : | fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur); | ||
178 : | // if (((unsigned long)ref) & 0x7) | ||
179 : | // fprintf(stderr, "sad8_altivec:incorrect align, ref: %x\n", ref); | ||
180 : | if (stride & 0xf) | ||
181 : | fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride); | ||
182 : | #endif | ||
183 : | |||
184 : | canard | 115 | perm_cur = get_perm((((unsigned long)cur)>>3) & 0x01); |
185 : | canard | 98 | perm_ref1 = vec_lvsl(0, (unsigned char *)ref); |
186 : | canard | 115 | perm_ref2 = get_perm(0); |
187 : | canard | 98 | |
188 : | /* initialization */ | ||
189 : | canard | 115 | sad = (vector unsigned int)(ZEROVEC); |
190 : | canard | 98 | stride >>= 4; |
191 : | |||
192 : | /* perform sum of differences between current and previous */ | ||
193 : | SAD8(); | ||
194 : | SAD8(); | ||
195 : | SAD8(); | ||
196 : | SAD8(); | ||
197 : | |||
198 : | /* sum all parts of difference into one 32 bit quantity */ | ||
199 : | canard | 115 | sumdiffs = vec_sums((vector signed int) sad, (vector signed int)ZEROVEC); |
200 : | canard | 98 | |
201 : | /* copy vector sum into unaligned result */ | ||
202 : | sumdiffs = vec_splat( sumdiffs, 3 ); | ||
203 : | vec_ste( sumdiffs, 0, (int *)&result ); | ||
204 : | return( result ); | ||
205 : | } | ||
206 : | |||
207 : | #define MEAN16(i)\ | ||
208 : | c##i=*cur;\ | ||
209 : | mean = vec_sum4s(c##i,mean);\ | ||
210 : | cur += stride; | ||
211 : | |||
212 : | #define DEV16(i) \ | ||
213 : | t2 = vec_max(c##i, mn); /* find largest of two */ \ | ||
214 : | t3 = vec_min(c##i, mn); /* find smaller of two */ \ | ||
215 : | t4 = vec_sub(t2, t3); /* find absolute difference */ \ | ||
216 : | dev = vec_sum4s(t4, dev); | ||
217 : | |||
218 : | unsigned long | ||
219 : | dev16_altivec( const vector unsigned char * cur, | ||
220 : | unsigned long stride) | ||
221 : | { | ||
222 : | vector unsigned char t2,t3,t4, mn; | ||
223 : | canard | 115 | vector unsigned int mean, dev; |
224 : | canard | 98 | vector signed int sumdiffs; |
225 : | vector unsigned char c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15; | ||
226 : | unsigned long result; | ||
227 : | canard | 115 | ZERODEF; |
228 : | canard | 98 | |
229 : | canard | 115 | mean = (vector unsigned int)(ZEROVEC); |
230 : | dev = (vector unsigned int)(ZEROVEC); | ||
231 : | canard | 98 | stride >>= 4; |
232 : | |||
233 : | MEAN16(0); | ||
234 : | MEAN16(1); | ||
235 : | MEAN16(2); | ||
236 : | MEAN16(3); | ||
237 : | MEAN16(4); | ||
238 : | MEAN16(5); | ||
239 : | MEAN16(6); | ||
240 : | MEAN16(7); | ||
241 : | MEAN16(8); | ||
242 : | MEAN16(9); | ||
243 : | MEAN16(10); | ||
244 : | MEAN16(11); | ||
245 : | MEAN16(12); | ||
246 : | MEAN16(13); | ||
247 : | MEAN16(14); | ||
248 : | MEAN16(15); | ||
249 : | |||
250 : | canard | 115 | sumdiffs = vec_sums((vector signed int) mean, (vector signed int) ZEROVEC); |
251 : | canard | 98 | mn = vec_perm((vector unsigned char)sumdiffs, (vector unsigned char)sumdiffs, |
252 : | (vector unsigned char)(14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14)); | ||
253 : | DEV16(0); | ||
254 : | DEV16(1); | ||
255 : | DEV16(2); | ||
256 : | DEV16(3); | ||
257 : | DEV16(4); | ||
258 : | DEV16(5); | ||
259 : | DEV16(6); | ||
260 : | DEV16(7); | ||
261 : | DEV16(8); | ||
262 : | DEV16(9); | ||
263 : | DEV16(10); | ||
264 : | DEV16(11); | ||
265 : | DEV16(12); | ||
266 : | DEV16(13); | ||
267 : | DEV16(14); | ||
268 : | DEV16(15); | ||
269 : | |||
270 : | /* sum all parts of difference into one 32 bit quantity */ | ||
271 : | canard | 115 | sumdiffs = vec_sums((vector signed int) dev, (vector signed int) ZEROVEC); |
272 : | canard | 98 | |
273 : | /* copy vector sum into unaligned result */ | ||
274 : | sumdiffs = vec_splat( sumdiffs, 3 ); | ||
275 : | vec_ste( sumdiffs, 0, (int *)&result ); | ||
276 : | return( result ); | ||
277 : | } |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |