[svn] / trunk / xvidcore / src / motion / ppc_asm / sad_altivec.c Repository:
ViewVC logotype

Annotation of /trunk/xvidcore/src/motion/ppc_asm/sad_altivec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 652 - (view) (download)

1 : chl 430 /*****************************************************************************
2 :     *
3 :     * XVID MPEG-4 VIDEO CODEC
4 :     * - altivec sum of absolute difference (C version)
5 :     *
6 :     * Copyright (C) 2002 Benjamin Herrenschmidt <benh@kernel.crashing.org>
7 :     *
8 : edgomez 652 * This file is part of XviD, a free MPEG-4 video encoder/decoder
9 : chl 430 *
10 : edgomez 652 * XviD is free software; you can redistribute it and/or modify it
11 :     * under the terms of the GNU General Public License as published by
12 : chl 430 * the Free Software Foundation; either version 2 of the License, or
13 :     * (at your option) any later version.
14 :     *
15 :     * This program is distributed in the hope that it will be useful,
16 :     * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 :     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 :     * GNU General Public License for more details.
19 :     *
20 :     * You should have received a copy of the GNU General Public License
21 :     * along with this program; if not, write to the Free Software
22 :     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 :     *
24 : edgomez 652 * Under section 8 of the GNU General Public License, the copyright
25 :     * holders of XVID explicitly forbid distribution in the following
26 :     * countries:
27 : chl 430 *
28 : edgomez 652 * - Japan
29 :     * - United States of America
30 :     *
31 :     * Linking XviD statically or dynamically with other modules is making a
32 :     * combined work based on XviD. Thus, the terms and conditions of the
33 :     * GNU General Public License cover the whole combination.
34 :     *
35 :     * As a special exception, the copyright holders of XviD give you
36 :     * permission to link XviD with independent modules that communicate with
37 :     * XviD solely through the VFW1.1 and DShow interfaces, regardless of the
38 :     * license terms of these independent modules, and to copy and distribute
39 :     * the resulting combined work under terms of your choice, provided that
40 :     * every copy of the combined work is accompanied by a complete copy of
41 :     * the source code of XviD (the version of XviD used to produce the
42 :     * combined work), being distributed under the terms of the GNU General
43 :     * Public License plus this exception. An independent module is a module
44 :     * which is not derived from or based on XviD.
45 :     *
46 :     * Note that people who make modified versions of XviD are not obligated
47 :     * to grant this special exception for their modified versions; it is
48 :     * their choice whether to do so. The GNU General Public License gives
49 :     * permission to release a modified version without this exception; this
50 :     * exception also makes it possible to release a modified version which
51 :     * carries forward this exception.
52 :     *
53 :     * $Id: sad_altivec.c,v 1.5 2002-11-17 00:32:06 edgomez Exp $
54 :     *
55 : chl 430 ****************************************************************************/
56 : canard 98
57 : canard 115 #define G_REG
58 :    
59 :     #ifdef G_REG
60 : edgomez 195 register vector unsigned char perm0 asm("%v29");
61 :     register vector unsigned char perm1 asm("%v30");
62 :     register vector unsigned int zerovec asm("%v31");
63 : canard 115 #endif
64 :    
65 : canard 98 #include <stdio.h>
66 :    
67 :     #undef DEBUG
68 :    
69 : canard 115 static const vector unsigned char perms[2] = {
70 : edgomez 195 (vector unsigned char) ( /* Used when cur is aligned */
71 :     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
72 :     0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17),
73 :     (vector unsigned char) ( /* Used when cur is unaligned */
74 :     0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
75 :     0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f),
76 : canard 115 };
77 :    
78 :     #ifdef G_REG
79 : edgomez 195 void
80 :     sadInit_altivec(void)
81 : canard 115 {
82 :     perm0 = perms[0];
83 :     perm1 = perms[1];
84 : edgomez 195 zerovec = (vector unsigned int) (0);
85 : canard 115 }
86 : edgomez 195 static inline const vector unsigned char
87 :     get_perm(unsigned long i)
88 : canard 115 {
89 :     return i ? perm1 : perm0;
90 :     }
91 : edgomez 195
92 : canard 115 #define ZERODEF
93 :     #define ZEROVEC zerovec
94 :     #else
95 : edgomez 195 void
96 :     sadInit_altivec(void)
97 : canard 115 {
98 : edgomez 195 }
99 :     static inline const vector unsigned char
100 :     get_perm(unsigned long i)
101 :     {
102 : canard 115 return perms[i];
103 :     }
104 : edgomez 195
105 : canard 115 #define ZERODEF vector unsigned int zerovec = (vector unsigned int)(0)
106 :     #define ZEROVEC zerovec
107 :     #endif
108 :    
109 :    
110 : canard 98 #define SAD16() \
111 :     t1 = vec_perm(ref[0], ref[1], perm); /* align current vector */ \
112 :     t2 = vec_max(t1, *cur); /* find largest of two */ \
113 :     t3 = vec_min(t1, *cur); /* find smaller of two */ \
114 :     t4 = vec_sub(t2, t3); /* find absolute difference */ \
115 :     sad = vec_sum4s(t4, sad); /* accumulate sum of differences */ \
116 :     cur += stride; ref += stride;
117 :    
118 :     /*
119 :     * This function assumes cur and stride are 16 bytes aligned and ref is unaligned
120 :     */
121 :     unsigned long
122 : edgomez 195 sad16_altivec(const vector unsigned char *cur,
123 :     const vector unsigned char *ref,
124 :     unsigned long stride,
125 :     const unsigned long best_sad)
126 : canard 98 {
127 : edgomez 195 vector unsigned char perm;
128 :     vector unsigned char t1, t2, t3, t4;
129 :     vector unsigned int sad;
130 :     vector signed int sumdiffs, best_vec;
131 :     unsigned long result;
132 :    
133 :     ZERODEF;
134 :    
135 : canard 98 #ifdef DEBUG
136 : edgomez 195 if (((unsigned long) cur) & 0xf)
137 :     fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur);
138 : canard 98 // if (((unsigned long)ref) & 0xf)
139 : edgomez 195 // fprintf(stderr, "sad16_altivec:incorrect align, ref: %x\n", ref);
140 :     if (stride & 0xf)
141 :     fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride);
142 :     #endif
143 :     /* initialization */
144 :     sad = (vector unsigned int) (ZEROVEC);
145 :     stride >>= 4;
146 :     perm = vec_lvsl(0, (unsigned char *) ref);
147 :     *((unsigned long *) &best_vec) = best_sad;
148 :     best_vec = vec_splat(best_vec, 0);
149 :    
150 :     /* perform sum of differences between current and previous */
151 :     SAD16();
152 :     SAD16();
153 :     SAD16();
154 :     SAD16();
155 :     /* Temp sum for exit */
156 :     sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
157 :     if (vec_all_ge(sumdiffs, best_vec))
158 :     goto bail;
159 :     SAD16();
160 :     SAD16();
161 :     SAD16();
162 :     SAD16();
163 :     sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
164 :     if (vec_all_ge(sumdiffs, best_vec))
165 :     goto bail;
166 :     SAD16();
167 :     SAD16();
168 :     SAD16();
169 :     SAD16();
170 :     SAD16();
171 :     SAD16();
172 :     SAD16();
173 :     SAD16();
174 :    
175 :     /* sum all parts of difference into one 32 bit quantity */
176 :     sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
177 :     bail:
178 :     /* copy vector sum into unaligned result */
179 :     sumdiffs = vec_splat(sumdiffs, 3);
180 :     vec_ste(sumdiffs, 0, (int *) &result);
181 :     return (result);
182 : canard 98 }
183 :    
184 :     #define SAD8() \
185 :     t1 = vec_perm(cur[0], cur[stride], perm_cur); /* align current vector */ \
186 :     t2 = vec_perm(ref[0], ref[1], perm_ref1); /* align current vector */ \
187 :     tp = vec_perm(ref[stride], ref[stride+1], perm_ref1); /* align current vector */ \
188 :     t2 = vec_perm(t2,tp,perm_ref2); \
189 :     t3 = vec_max(t1, t2); /* find largest of two */ \
190 :     t4 = vec_min(t1, t2); /* find smaller of two */ \
191 :     t5 = vec_sub(t3, t4); /* find absolute difference */ \
192 :     sad = vec_sum4s(t5, sad); /* accumulate sum of differences */ \
193 :     cur += stride<<1; ref += stride<<1;
194 :    
195 :     /*
196 :     * This function assumes cur is 8 bytes aligned, stride is 16 bytes
197 :     * aligned and ref is unaligned
198 :     */
199 :     unsigned long
200 : edgomez 195 sad8_altivec(const vector unsigned char *cur,
201 :     const vector unsigned char *ref,
202 :     unsigned long stride)
203 : canard 98 {
204 : edgomez 195 vector unsigned char t1, t2, t3, t4, t5, tp;
205 :     vector unsigned int sad;
206 :     vector signed int sumdiffs;
207 :     vector unsigned char perm_cur;
208 :     vector unsigned char perm_ref1, perm_ref2;
209 :     unsigned long result;
210 : canard 98
211 : edgomez 195 ZERODEF;
212 :    
213 : canard 98 #ifdef DEBUG
214 : edgomez 195 if (((unsigned long) cur) & 0x7)
215 :     fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur);
216 : canard 98 // if (((unsigned long)ref) & 0x7)
217 : edgomez 195 // fprintf(stderr, "sad8_altivec:incorrect align, ref: %x\n", ref);
218 :     if (stride & 0xf)
219 :     fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride);
220 :     #endif
221 : canard 98
222 : edgomez 195 perm_cur = get_perm((((unsigned long) cur) >> 3) & 0x01);
223 :     perm_ref1 = vec_lvsl(0, (unsigned char *) ref);
224 :     perm_ref2 = get_perm(0);
225 : canard 98
226 : edgomez 195 /* initialization */
227 :     sad = (vector unsigned int) (ZEROVEC);
228 :     stride >>= 4;
229 :    
230 :     /* perform sum of differences between current and previous */
231 :     SAD8();
232 :     SAD8();
233 :     SAD8();
234 :     SAD8();
235 :    
236 :     /* sum all parts of difference into one 32 bit quantity */
237 :     sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
238 :    
239 :     /* copy vector sum into unaligned result */
240 :     sumdiffs = vec_splat(sumdiffs, 3);
241 :     vec_ste(sumdiffs, 0, (int *) &result);
242 :     return (result);
243 : canard 98 }
244 :    
245 :     #define MEAN16(i)\
246 :     c##i=*cur;\
247 :     mean = vec_sum4s(c##i,mean);\
248 :     cur += stride;
249 :    
250 :     #define DEV16(i) \
251 :     t2 = vec_max(c##i, mn); /* find largest of two */ \
252 :     t3 = vec_min(c##i, mn); /* find smaller of two */ \
253 :     t4 = vec_sub(t2, t3); /* find absolute difference */ \
254 :     dev = vec_sum4s(t4, dev);
255 :    
256 :     unsigned long
257 : edgomez 195 dev16_altivec(const vector unsigned char *cur,
258 :     unsigned long stride)
259 : canard 98 {
260 : edgomez 195 vector unsigned char t2, t3, t4, mn;
261 :     vector unsigned int mean, dev;
262 :     vector signed int sumdiffs;
263 :     vector unsigned char c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12,
264 :     c13, c14, c15;
265 :     unsigned long result;
266 : canard 98
267 : edgomez 195 ZERODEF;
268 : canard 98
269 : edgomez 195 mean = (vector unsigned int) (ZEROVEC);
270 :     dev = (vector unsigned int) (ZEROVEC);
271 :     stride >>= 4;
272 : canard 98
273 : edgomez 195 MEAN16(0);
274 :     MEAN16(1);
275 :     MEAN16(2);
276 :     MEAN16(3);
277 :     MEAN16(4);
278 :     MEAN16(5);
279 :     MEAN16(6);
280 :     MEAN16(7);
281 :     MEAN16(8);
282 :     MEAN16(9);
283 :     MEAN16(10);
284 :     MEAN16(11);
285 :     MEAN16(12);
286 :     MEAN16(13);
287 :     MEAN16(14);
288 :     MEAN16(15);
289 : canard 98
290 : edgomez 195 sumdiffs = vec_sums((vector signed int) mean, (vector signed int) ZEROVEC);
291 :     mn = vec_perm((vector unsigned char) sumdiffs,
292 :     (vector unsigned char) sumdiffs, (vector unsigned char) (14,
293 :     14,
294 :     14,
295 :     14,
296 :     14,
297 :     14,
298 :     14,
299 :     14,
300 :     14,
301 :     14,
302 :     14,
303 :     14,
304 :     14,
305 :     14,
306 :     14,
307 :     14));
308 :     DEV16(0);
309 :     DEV16(1);
310 :     DEV16(2);
311 :     DEV16(3);
312 :     DEV16(4);
313 :     DEV16(5);
314 :     DEV16(6);
315 :     DEV16(7);
316 :     DEV16(8);
317 :     DEV16(9);
318 :     DEV16(10);
319 :     DEV16(11);
320 :     DEV16(12);
321 :     DEV16(13);
322 :     DEV16(14);
323 :     DEV16(15);
324 :    
325 :     /* sum all parts of difference into one 32 bit quantity */
326 :     sumdiffs = vec_sums((vector signed int) dev, (vector signed int) ZEROVEC);
327 :    
328 :     /* copy vector sum into unaligned result */
329 :     sumdiffs = vec_splat(sumdiffs, 3);
330 :     vec_ste(sumdiffs, 0, (int *) &result);
331 :     return (result);
332 : canard 98 }

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4