[svn] / branches / dev-api-4 / xvidcore / src / motion / ppc_asm / sad_altivec.c Repository:
ViewVC logotype

Annotation of /branches/dev-api-4/xvidcore/src/motion/ppc_asm/sad_altivec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 430 - (view) (download)
Original Path: trunk/xvidcore/src/motion/ppc_asm/sad_altivec.c

1 : chl 430 /*****************************************************************************
2 :     *
3 :     * XVID MPEG-4 VIDEO CODEC
4 :     * - altivec sum of absolute difference (C version)
5 :     *
6 :     * Copyright (C) 2002 Benjamin Herrenschmidt <benh@kernel.crashing.org>
7 :     *
8 :     * This program is an implementation of a part of one or more MPEG-4
9 :     * Video tools as specified in ISO/IEC 14496-2 standard. Those intending
10 :     * to use this software module in hardware or software products are
11 :     * advised that its use may infringe existing patents or copyrights, and
12 :     * any such use would be at such party's own risk. The original
13 :     * developer of this software module and his/her company, and subsequent
14 :     * editors and their companies, will have no liability for use of this
15 :     * software or modifications or derivatives thereof.
16 :     *
17 :     * This program is free software; you can redistribute it and/or modify
18 :     * it under the terms of the GNU General Public License as published by
19 :     * the Free Software Foundation; either version 2 of the License, or
20 :     * (at your option) any later version.
21 :     *
22 :     * This program is distributed in the hope that it will be useful,
23 :     * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 :     * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 :     * GNU General Public License for more details.
26 :     *
27 :     * You should have received a copy of the GNU General Public License
28 :     * along with this program; if not, write to the Free Software
29 :     * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 :     *
31 :     * $Id: sad_altivec.c,v 1.4 2002-09-06 16:59:47 chl Exp $
32 :     *
33 :     ****************************************************************************/
34 : canard 98
35 : canard 115 #define G_REG
36 :    
37 :     #ifdef G_REG
38 : edgomez 195 register vector unsigned char perm0 asm("%v29");
39 :     register vector unsigned char perm1 asm("%v30");
40 :     register vector unsigned int zerovec asm("%v31");
41 : canard 115 #endif
42 :    
43 : canard 98 #include <stdio.h>
44 :    
45 :     #undef DEBUG
46 :    
47 : canard 115 static const vector unsigned char perms[2] = {
48 : edgomez 195 (vector unsigned char) ( /* Used when cur is aligned */
49 :     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
50 :     0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17),
51 :     (vector unsigned char) ( /* Used when cur is unaligned */
52 :     0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
53 :     0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f),
54 : canard 115 };
55 :    
56 :     #ifdef G_REG
57 : edgomez 195 void
58 :     sadInit_altivec(void)
59 : canard 115 {
60 :     perm0 = perms[0];
61 :     perm1 = perms[1];
62 : edgomez 195 zerovec = (vector unsigned int) (0);
63 : canard 115 }
64 : edgomez 195 static inline const vector unsigned char
65 :     get_perm(unsigned long i)
66 : canard 115 {
67 :     return i ? perm1 : perm0;
68 :     }
69 : edgomez 195
70 : canard 115 #define ZERODEF
71 :     #define ZEROVEC zerovec
72 :     #else
73 : edgomez 195 void
74 :     sadInit_altivec(void)
75 : canard 115 {
76 : edgomez 195 }
77 :     static inline const vector unsigned char
78 :     get_perm(unsigned long i)
79 :     {
80 : canard 115 return perms[i];
81 :     }
82 : edgomez 195
83 : canard 115 #define ZERODEF vector unsigned int zerovec = (vector unsigned int)(0)
84 :     #define ZEROVEC zerovec
85 :     #endif
86 :    
87 :    
88 : canard 98 #define SAD16() \
89 :     t1 = vec_perm(ref[0], ref[1], perm); /* align current vector */ \
90 :     t2 = vec_max(t1, *cur); /* find largest of two */ \
91 :     t3 = vec_min(t1, *cur); /* find smaller of two */ \
92 :     t4 = vec_sub(t2, t3); /* find absolute difference */ \
93 :     sad = vec_sum4s(t4, sad); /* accumulate sum of differences */ \
94 :     cur += stride; ref += stride;
95 :    
96 :     /*
97 :     * This function assumes cur and stride are 16 bytes aligned and ref is unaligned
98 :     */
99 :     unsigned long
100 : edgomez 195 sad16_altivec(const vector unsigned char *cur,
101 :     const vector unsigned char *ref,
102 :     unsigned long stride,
103 :     const unsigned long best_sad)
104 : canard 98 {
105 : edgomez 195 vector unsigned char perm;
106 :     vector unsigned char t1, t2, t3, t4;
107 :     vector unsigned int sad;
108 :     vector signed int sumdiffs, best_vec;
109 :     unsigned long result;
110 :    
111 :     ZERODEF;
112 :    
113 : canard 98 #ifdef DEBUG
114 : edgomez 195 if (((unsigned long) cur) & 0xf)
115 :     fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur);
116 : canard 98 // if (((unsigned long)ref) & 0xf)
117 : edgomez 195 // fprintf(stderr, "sad16_altivec:incorrect align, ref: %x\n", ref);
118 :     if (stride & 0xf)
119 :     fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride);
120 :     #endif
121 :     /* initialization */
122 :     sad = (vector unsigned int) (ZEROVEC);
123 :     stride >>= 4;
124 :     perm = vec_lvsl(0, (unsigned char *) ref);
125 :     *((unsigned long *) &best_vec) = best_sad;
126 :     best_vec = vec_splat(best_vec, 0);
127 :    
128 :     /* perform sum of differences between current and previous */
129 :     SAD16();
130 :     SAD16();
131 :     SAD16();
132 :     SAD16();
133 :     /* Temp sum for exit */
134 :     sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
135 :     if (vec_all_ge(sumdiffs, best_vec))
136 :     goto bail;
137 :     SAD16();
138 :     SAD16();
139 :     SAD16();
140 :     SAD16();
141 :     sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
142 :     if (vec_all_ge(sumdiffs, best_vec))
143 :     goto bail;
144 :     SAD16();
145 :     SAD16();
146 :     SAD16();
147 :     SAD16();
148 :     SAD16();
149 :     SAD16();
150 :     SAD16();
151 :     SAD16();
152 :    
153 :     /* sum all parts of difference into one 32 bit quantity */
154 :     sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
155 :     bail:
156 :     /* copy vector sum into unaligned result */
157 :     sumdiffs = vec_splat(sumdiffs, 3);
158 :     vec_ste(sumdiffs, 0, (int *) &result);
159 :     return (result);
160 : canard 98 }
161 :    
162 :     #define SAD8() \
163 :     t1 = vec_perm(cur[0], cur[stride], perm_cur); /* align current vector */ \
164 :     t2 = vec_perm(ref[0], ref[1], perm_ref1); /* align current vector */ \
165 :     tp = vec_perm(ref[stride], ref[stride+1], perm_ref1); /* align current vector */ \
166 :     t2 = vec_perm(t2,tp,perm_ref2); \
167 :     t3 = vec_max(t1, t2); /* find largest of two */ \
168 :     t4 = vec_min(t1, t2); /* find smaller of two */ \
169 :     t5 = vec_sub(t3, t4); /* find absolute difference */ \
170 :     sad = vec_sum4s(t5, sad); /* accumulate sum of differences */ \
171 :     cur += stride<<1; ref += stride<<1;
172 :    
173 :     /*
174 :     * This function assumes cur is 8 bytes aligned, stride is 16 bytes
175 :     * aligned and ref is unaligned
176 :     */
177 :     unsigned long
178 : edgomez 195 sad8_altivec(const vector unsigned char *cur,
179 :     const vector unsigned char *ref,
180 :     unsigned long stride)
181 : canard 98 {
182 : edgomez 195 vector unsigned char t1, t2, t3, t4, t5, tp;
183 :     vector unsigned int sad;
184 :     vector signed int sumdiffs;
185 :     vector unsigned char perm_cur;
186 :     vector unsigned char perm_ref1, perm_ref2;
187 :     unsigned long result;
188 : canard 98
189 : edgomez 195 ZERODEF;
190 :    
191 : canard 98 #ifdef DEBUG
192 : edgomez 195 if (((unsigned long) cur) & 0x7)
193 :     fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur);
194 : canard 98 // if (((unsigned long)ref) & 0x7)
195 : edgomez 195 // fprintf(stderr, "sad8_altivec:incorrect align, ref: %x\n", ref);
196 :     if (stride & 0xf)
197 :     fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride);
198 :     #endif
199 : canard 98
200 : edgomez 195 perm_cur = get_perm((((unsigned long) cur) >> 3) & 0x01);
201 :     perm_ref1 = vec_lvsl(0, (unsigned char *) ref);
202 :     perm_ref2 = get_perm(0);
203 : canard 98
204 : edgomez 195 /* initialization */
205 :     sad = (vector unsigned int) (ZEROVEC);
206 :     stride >>= 4;
207 :    
208 :     /* perform sum of differences between current and previous */
209 :     SAD8();
210 :     SAD8();
211 :     SAD8();
212 :     SAD8();
213 :    
214 :     /* sum all parts of difference into one 32 bit quantity */
215 :     sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
216 :    
217 :     /* copy vector sum into unaligned result */
218 :     sumdiffs = vec_splat(sumdiffs, 3);
219 :     vec_ste(sumdiffs, 0, (int *) &result);
220 :     return (result);
221 : canard 98 }
222 :    
223 :     #define MEAN16(i)\
224 :     c##i=*cur;\
225 :     mean = vec_sum4s(c##i,mean);\
226 :     cur += stride;
227 :    
228 :     #define DEV16(i) \
229 :     t2 = vec_max(c##i, mn); /* find largest of two */ \
230 :     t3 = vec_min(c##i, mn); /* find smaller of two */ \
231 :     t4 = vec_sub(t2, t3); /* find absolute difference */ \
232 :     dev = vec_sum4s(t4, dev);
233 :    
234 :     unsigned long
235 : edgomez 195 dev16_altivec(const vector unsigned char *cur,
236 :     unsigned long stride)
237 : canard 98 {
238 : edgomez 195 vector unsigned char t2, t3, t4, mn;
239 :     vector unsigned int mean, dev;
240 :     vector signed int sumdiffs;
241 :     vector unsigned char c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12,
242 :     c13, c14, c15;
243 :     unsigned long result;
244 : canard 98
245 : edgomez 195 ZERODEF;
246 : canard 98
247 : edgomez 195 mean = (vector unsigned int) (ZEROVEC);
248 :     dev = (vector unsigned int) (ZEROVEC);
249 :     stride >>= 4;
250 : canard 98
251 : edgomez 195 MEAN16(0);
252 :     MEAN16(1);
253 :     MEAN16(2);
254 :     MEAN16(3);
255 :     MEAN16(4);
256 :     MEAN16(5);
257 :     MEAN16(6);
258 :     MEAN16(7);
259 :     MEAN16(8);
260 :     MEAN16(9);
261 :     MEAN16(10);
262 :     MEAN16(11);
263 :     MEAN16(12);
264 :     MEAN16(13);
265 :     MEAN16(14);
266 :     MEAN16(15);
267 : canard 98
268 : edgomez 195 sumdiffs = vec_sums((vector signed int) mean, (vector signed int) ZEROVEC);
269 :     mn = vec_perm((vector unsigned char) sumdiffs,
270 :     (vector unsigned char) sumdiffs, (vector unsigned char) (14,
271 :     14,
272 :     14,
273 :     14,
274 :     14,
275 :     14,
276 :     14,
277 :     14,
278 :     14,
279 :     14,
280 :     14,
281 :     14,
282 :     14,
283 :     14,
284 :     14,
285 :     14));
286 :     DEV16(0);
287 :     DEV16(1);
288 :     DEV16(2);
289 :     DEV16(3);
290 :     DEV16(4);
291 :     DEV16(5);
292 :     DEV16(6);
293 :     DEV16(7);
294 :     DEV16(8);
295 :     DEV16(9);
296 :     DEV16(10);
297 :     DEV16(11);
298 :     DEV16(12);
299 :     DEV16(13);
300 :     DEV16(14);
301 :     DEV16(15);
302 :    
303 :     /* sum all parts of difference into one 32 bit quantity */
304 :     sumdiffs = vec_sums((vector signed int) dev, (vector signed int) ZEROVEC);
305 :    
306 :     /* copy vector sum into unaligned result */
307 :     sumdiffs = vec_splat(sumdiffs, 3);
308 :     vec_ste(sumdiffs, 0, (int *) &result);
309 :     return (result);
310 : canard 98 }

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4