[svn] / branches / dev-api-4 / xvidcore / src / motion / ppc_asm / sad_altivec.c Repository:
ViewVC logotype

Annotation of /branches/dev-api-4/xvidcore/src/motion/ppc_asm/sad_altivec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 851 - (view) (download)
Original Path: trunk/xvidcore/src/motion/ppc_asm/sad_altivec.c

1 : edgomez 851 /*
2 : canard 98
3 : edgomez 851 Copyright (C) 2002 Benjamin Herrenschmidt <benh@kernel.crashing.org>
4 :    
5 :     This program is free software; you can redistribute it and/or modify
6 :     it under the terms of the GNU General Public License as published by
7 :     the Free Software Foundation; either version 2 of the License, or
8 :     (at your option) any later version.
9 :    
10 :     This program is distributed in the hope that it will be useful,
11 :     but WITHOUT ANY WARRANTY; without even the implied warranty of
12 :     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 :     GNU General Public License for more details.
14 :    
15 :     You should have received a copy of the GNU General Public License
16 :     along with this program; if not, write to the Free Software
17 :     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 :    
19 :    
20 :     $Id: sad_altivec.c,v 1.6 2003-02-15 15:22:19 edgomez Exp $
21 :     $Source: /home/xvid/cvs_copy/cvs-server-root/xvid/xvidcore/src/motion/ppc_asm/sad_altivec.c,v $
22 :     $Date: 2003-02-15 15:22:19 $
23 :     $Author: edgomez $
24 :    
25 :     */
26 :    
27 : canard 115 #define G_REG
28 :    
29 :     #ifdef G_REG
30 : edgomez 195 register vector unsigned char perm0 asm("%v29");
31 :     register vector unsigned char perm1 asm("%v30");
32 :     register vector unsigned int zerovec asm("%v31");
33 : canard 115 #endif
34 :    
35 : canard 98 #include <stdio.h>
36 :    
37 :     #undef DEBUG
38 :    
39 : canard 115 static const vector unsigned char perms[2] = {
40 : edgomez 195 (vector unsigned char) ( /* Used when cur is aligned */
41 :     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
42 :     0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17),
43 :     (vector unsigned char) ( /* Used when cur is unaligned */
44 :     0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
45 :     0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f),
46 : canard 115 };
47 :    
48 :     #ifdef G_REG
49 : edgomez 195 void
50 :     sadInit_altivec(void)
51 : canard 115 {
52 :     perm0 = perms[0];
53 :     perm1 = perms[1];
54 : edgomez 195 zerovec = (vector unsigned int) (0);
55 : canard 115 }
56 : edgomez 195 static inline const vector unsigned char
57 :     get_perm(unsigned long i)
58 : canard 115 {
59 :     return i ? perm1 : perm0;
60 :     }
61 : edgomez 195
62 : canard 115 #define ZERODEF
63 :     #define ZEROVEC zerovec
64 :     #else
65 : edgomez 195 void
66 :     sadInit_altivec(void)
67 : canard 115 {
68 : edgomez 195 }
69 :     static inline const vector unsigned char
70 :     get_perm(unsigned long i)
71 :     {
72 : canard 115 return perms[i];
73 :     }
74 : edgomez 195
75 : canard 115 #define ZERODEF vector unsigned int zerovec = (vector unsigned int)(0)
76 :     #define ZEROVEC zerovec
77 :     #endif
78 :    
79 :    
80 : canard 98 #define SAD16() \
81 :     t1 = vec_perm(ref[0], ref[1], perm); /* align current vector */ \
82 :     t2 = vec_max(t1, *cur); /* find largest of two */ \
83 :     t3 = vec_min(t1, *cur); /* find smaller of two */ \
84 :     t4 = vec_sub(t2, t3); /* find absolute difference */ \
85 :     sad = vec_sum4s(t4, sad); /* accumulate sum of differences */ \
86 :     cur += stride; ref += stride;
87 :    
88 :     /*
89 :     * This function assumes cur and stride are 16 bytes aligned and ref is unaligned
90 :     */
91 :     unsigned long
92 : edgomez 195 sad16_altivec(const vector unsigned char *cur,
93 :     const vector unsigned char *ref,
94 :     unsigned long stride,
95 :     const unsigned long best_sad)
96 : canard 98 {
97 : edgomez 195 vector unsigned char perm;
98 :     vector unsigned char t1, t2, t3, t4;
99 :     vector unsigned int sad;
100 :     vector signed int sumdiffs, best_vec;
101 :     unsigned long result;
102 :    
103 :     ZERODEF;
104 :    
105 : canard 98 #ifdef DEBUG
106 : edgomez 195 if (((unsigned long) cur) & 0xf)
107 :     fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur);
108 : canard 98 // if (((unsigned long)ref) & 0xf)
109 : edgomez 195 // fprintf(stderr, "sad16_altivec:incorrect align, ref: %x\n", ref);
110 :     if (stride & 0xf)
111 :     fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride);
112 :     #endif
113 :     /* initialization */
114 :     sad = (vector unsigned int) (ZEROVEC);
115 :     stride >>= 4;
116 :     perm = vec_lvsl(0, (unsigned char *) ref);
117 :     *((unsigned long *) &best_vec) = best_sad;
118 :     best_vec = vec_splat(best_vec, 0);
119 :    
120 :     /* perform sum of differences between current and previous */
121 :     SAD16();
122 :     SAD16();
123 :     SAD16();
124 :     SAD16();
125 :     /* Temp sum for exit */
126 :     sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
127 :     if (vec_all_ge(sumdiffs, best_vec))
128 :     goto bail;
129 :     SAD16();
130 :     SAD16();
131 :     SAD16();
132 :     SAD16();
133 :     sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
134 :     if (vec_all_ge(sumdiffs, best_vec))
135 :     goto bail;
136 :     SAD16();
137 :     SAD16();
138 :     SAD16();
139 :     SAD16();
140 :     SAD16();
141 :     SAD16();
142 :     SAD16();
143 :     SAD16();
144 :    
145 :     /* sum all parts of difference into one 32 bit quantity */
146 :     sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
147 :     bail:
148 :     /* copy vector sum into unaligned result */
149 :     sumdiffs = vec_splat(sumdiffs, 3);
150 :     vec_ste(sumdiffs, 0, (int *) &result);
151 :     return (result);
152 : canard 98 }
153 :    
154 :     #define SAD8() \
155 :     t1 = vec_perm(cur[0], cur[stride], perm_cur); /* align current vector */ \
156 :     t2 = vec_perm(ref[0], ref[1], perm_ref1); /* align current vector */ \
157 :     tp = vec_perm(ref[stride], ref[stride+1], perm_ref1); /* align current vector */ \
158 :     t2 = vec_perm(t2,tp,perm_ref2); \
159 :     t3 = vec_max(t1, t2); /* find largest of two */ \
160 :     t4 = vec_min(t1, t2); /* find smaller of two */ \
161 :     t5 = vec_sub(t3, t4); /* find absolute difference */ \
162 :     sad = vec_sum4s(t5, sad); /* accumulate sum of differences */ \
163 :     cur += stride<<1; ref += stride<<1;
164 :    
165 :     /*
166 :     * This function assumes cur is 8 bytes aligned, stride is 16 bytes
167 :     * aligned and ref is unaligned
168 :     */
169 :     unsigned long
170 : edgomez 195 sad8_altivec(const vector unsigned char *cur,
171 :     const vector unsigned char *ref,
172 :     unsigned long stride)
173 : canard 98 {
174 : edgomez 195 vector unsigned char t1, t2, t3, t4, t5, tp;
175 :     vector unsigned int sad;
176 :     vector signed int sumdiffs;
177 :     vector unsigned char perm_cur;
178 :     vector unsigned char perm_ref1, perm_ref2;
179 :     unsigned long result;
180 : canard 98
181 : edgomez 195 ZERODEF;
182 :    
183 : canard 98 #ifdef DEBUG
184 : edgomez 195 if (((unsigned long) cur) & 0x7)
185 :     fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur);
186 : canard 98 // if (((unsigned long)ref) & 0x7)
187 : edgomez 195 // fprintf(stderr, "sad8_altivec:incorrect align, ref: %x\n", ref);
188 :     if (stride & 0xf)
189 :     fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride);
190 :     #endif
191 : canard 98
192 : edgomez 195 perm_cur = get_perm((((unsigned long) cur) >> 3) & 0x01);
193 :     perm_ref1 = vec_lvsl(0, (unsigned char *) ref);
194 :     perm_ref2 = get_perm(0);
195 : canard 98
196 : edgomez 195 /* initialization */
197 :     sad = (vector unsigned int) (ZEROVEC);
198 :     stride >>= 4;
199 :    
200 :     /* perform sum of differences between current and previous */
201 :     SAD8();
202 :     SAD8();
203 :     SAD8();
204 :     SAD8();
205 :    
206 :     /* sum all parts of difference into one 32 bit quantity */
207 :     sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
208 :    
209 :     /* copy vector sum into unaligned result */
210 :     sumdiffs = vec_splat(sumdiffs, 3);
211 :     vec_ste(sumdiffs, 0, (int *) &result);
212 :     return (result);
213 : canard 98 }
214 :    
215 :     #define MEAN16(i)\
216 :     c##i=*cur;\
217 :     mean = vec_sum4s(c##i,mean);\
218 :     cur += stride;
219 :    
220 :     #define DEV16(i) \
221 :     t2 = vec_max(c##i, mn); /* find largest of two */ \
222 :     t3 = vec_min(c##i, mn); /* find smaller of two */ \
223 :     t4 = vec_sub(t2, t3); /* find absolute difference */ \
224 :     dev = vec_sum4s(t4, dev);
225 :    
226 :     unsigned long
227 : edgomez 195 dev16_altivec(const vector unsigned char *cur,
228 :     unsigned long stride)
229 : canard 98 {
230 : edgomez 195 vector unsigned char t2, t3, t4, mn;
231 :     vector unsigned int mean, dev;
232 :     vector signed int sumdiffs;
233 :     vector unsigned char c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12,
234 :     c13, c14, c15;
235 :     unsigned long result;
236 : canard 98
237 : edgomez 195 ZERODEF;
238 : canard 98
239 : edgomez 195 mean = (vector unsigned int) (ZEROVEC);
240 :     dev = (vector unsigned int) (ZEROVEC);
241 :     stride >>= 4;
242 : canard 98
243 : edgomez 195 MEAN16(0);
244 :     MEAN16(1);
245 :     MEAN16(2);
246 :     MEAN16(3);
247 :     MEAN16(4);
248 :     MEAN16(5);
249 :     MEAN16(6);
250 :     MEAN16(7);
251 :     MEAN16(8);
252 :     MEAN16(9);
253 :     MEAN16(10);
254 :     MEAN16(11);
255 :     MEAN16(12);
256 :     MEAN16(13);
257 :     MEAN16(14);
258 :     MEAN16(15);
259 : canard 98
260 : edgomez 195 sumdiffs = vec_sums((vector signed int) mean, (vector signed int) ZEROVEC);
261 :     mn = vec_perm((vector unsigned char) sumdiffs,
262 :     (vector unsigned char) sumdiffs, (vector unsigned char) (14,
263 :     14,
264 :     14,
265 :     14,
266 :     14,
267 :     14,
268 :     14,
269 :     14,
270 :     14,
271 :     14,
272 :     14,
273 :     14,
274 :     14,
275 :     14,
276 :     14,
277 :     14));
278 :     DEV16(0);
279 :     DEV16(1);
280 :     DEV16(2);
281 :     DEV16(3);
282 :     DEV16(4);
283 :     DEV16(5);
284 :     DEV16(6);
285 :     DEV16(7);
286 :     DEV16(8);
287 :     DEV16(9);
288 :     DEV16(10);
289 :     DEV16(11);
290 :     DEV16(12);
291 :     DEV16(13);
292 :     DEV16(14);
293 :     DEV16(15);
294 :    
295 :     /* sum all parts of difference into one 32 bit quantity */
296 :     sumdiffs = vec_sums((vector signed int) dev, (vector signed int) ZEROVEC);
297 :    
298 :     /* copy vector sum into unaligned result */
299 :     sumdiffs = vec_splat(sumdiffs, 3);
300 :     vec_ste(sumdiffs, 0, (int *) &result);
301 :     return (result);
302 : canard 98 }

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4