[svn] / trunk / xvidcore / src / motion / ppc_asm / sad_altivec.c Repository:
ViewVC logotype

Annotation of /trunk/xvidcore/src/motion/ppc_asm/sad_altivec.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 115 - (view) (download)

1 : canard 98 /*
2 :    
3 :     Copyright (C) 2002 Benjamin Herrenschmidt <benh@kernel.crashing.org>
4 :    
5 :     This program is free software; you can redistribute it and/or modify
6 :     it under the terms of the GNU General Public License as published by
7 :     the Free Software Foundation; either version 2 of the License, or
8 :     (at your option) any later version.
9 :    
10 :     This program is distributed in the hope that it will be useful,
11 :     but WITHOUT ANY WARRANTY; without even the implied warranty of
12 :     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 :     GNU General Public License for more details.
14 :    
15 :     You should have received a copy of the GNU General Public License
16 :     along with this program; if not, write to the Free Software
17 :     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 :    
19 :    
20 : canard 115 $Id: sad_altivec.c,v 1.2 2002-04-11 10:18:40 canard Exp $
21 : canard 98 $Source: /home/xvid/cvs_copy/cvs-server-root/xvid/xvidcore/src/motion/ppc_asm/sad_altivec.c,v $
22 : canard 115 $Date: 2002-04-11 10:18:40 $
23 : canard 98 $Author: canard $
24 :    
25 :     */
26 :    
27 : canard 115 #define G_REG
28 :    
29 :     #ifdef G_REG
30 :     register vector unsigned char perm0 asm ("%v29");
31 :     register vector unsigned char perm1 asm ("%v30");
32 :     register vector unsigned int zerovec asm ("%v31");
33 :     #endif
34 :    
35 : canard 98 #include <stdio.h>
36 :    
37 :     #undef DEBUG
38 :    
39 : canard 115 static const vector unsigned char perms[2] = {
40 :     (vector unsigned char)( /* Used when cur is aligned */
41 :     0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
42 :     0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
43 :     ),
44 :     (vector unsigned char)( /* Used when cur is unaligned */
45 :     0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
46 :     0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
47 :     ),
48 :     };
49 :    
50 :     #ifdef G_REG
51 :     void sadInit_altivec(void)
52 :     {
53 :     perm0 = perms[0];
54 :     perm1 = perms[1];
55 :     zerovec = (vector unsigned int)(0);
56 :     }
57 :     static inline const vector unsigned char get_perm(unsigned long i)
58 :     {
59 :     return i ? perm1 : perm0;
60 :     }
61 :     #define ZERODEF
62 :     #define ZEROVEC zerovec
63 :     #else
64 :     void sadInit_altivec(void) { }
65 :     static inline const vector unsigned char get_perm(unsigned long i)
66 :     {
67 :     return perms[i];
68 :     }
69 :     #define ZERODEF vector unsigned int zerovec = (vector unsigned int)(0)
70 :     #define ZEROVEC zerovec
71 :     #endif
72 :    
73 :    
74 : canard 98 #define SAD16() \
75 :     t1 = vec_perm(ref[0], ref[1], perm); /* align current vector */ \
76 :     t2 = vec_max(t1, *cur); /* find largest of two */ \
77 :     t3 = vec_min(t1, *cur); /* find smaller of two */ \
78 :     t4 = vec_sub(t2, t3); /* find absolute difference */ \
79 :     sad = vec_sum4s(t4, sad); /* accumulate sum of differences */ \
80 :     cur += stride; ref += stride;
81 :    
82 :     /*
83 :     * This function assumes cur and stride are 16 bytes aligned and ref is unaligned
84 :     */
85 :     unsigned long
86 :     sad16_altivec( const vector unsigned char * cur,
87 :     const vector unsigned char * ref,
88 :     unsigned long stride,
89 :     const unsigned long best_sad)
90 :     {
91 :     vector unsigned char perm;
92 :     vector unsigned char t1, t2, t3, t4 ;
93 : canard 115 vector unsigned int sad;
94 : canard 98 vector signed int sumdiffs, best_vec;
95 :     unsigned long result;
96 : canard 115 ZERODEF;
97 :    
98 : canard 98 #ifdef DEBUG
99 :     if (((unsigned long)cur) & 0xf)
100 :     fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur);
101 :     // if (((unsigned long)ref) & 0xf)
102 :     // fprintf(stderr, "sad16_altivec:incorrect align, ref: %x\n", ref);
103 :     if (stride & 0xf)
104 :     fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride);
105 :     #endif
106 :     /* initialization */
107 : canard 115 sad = (vector unsigned int)(ZEROVEC);
108 : canard 98 stride >>= 4;
109 :     perm = vec_lvsl(0, (unsigned char *)ref);
110 :     *((unsigned long *)&best_vec) = best_sad;
111 :     best_vec = vec_splat(best_vec, 0);
112 :    
113 :     /* perform sum of differences between current and previous */
114 :     SAD16();
115 :     SAD16();
116 :     SAD16();
117 :     SAD16();
118 :     /* Temp sum for exit */
119 : canard 115 sumdiffs = vec_sums((vector signed int) sad, (vector signed int)ZEROVEC);
120 : canard 98 if (vec_all_ge(sumdiffs, best_vec))
121 :     goto bail;
122 :     SAD16();
123 :     SAD16();
124 :     SAD16();
125 :     SAD16();
126 : canard 115 sumdiffs = vec_sums((vector signed int) sad, (vector signed int)ZEROVEC);
127 : canard 98 if (vec_all_ge(sumdiffs, best_vec))
128 :     goto bail;
129 :     SAD16();
130 :     SAD16();
131 :     SAD16();
132 :     SAD16();
133 :     SAD16();
134 :     SAD16();
135 :     SAD16();
136 :     SAD16();
137 :    
138 :     /* sum all parts of difference into one 32 bit quantity */
139 : canard 115 sumdiffs = vec_sums((vector signed int) sad, (vector signed int)ZEROVEC);
140 : canard 98 bail:
141 :     /* copy vector sum into unaligned result */
142 :     sumdiffs = vec_splat( sumdiffs, 3 );
143 :     vec_ste( sumdiffs, 0, (int *)&result );
144 :     return( result );
145 :     }
146 :    
147 :     #define SAD8() \
148 :     t1 = vec_perm(cur[0], cur[stride], perm_cur); /* align current vector */ \
149 :     t2 = vec_perm(ref[0], ref[1], perm_ref1); /* align current vector */ \
150 :     tp = vec_perm(ref[stride], ref[stride+1], perm_ref1); /* align current vector */ \
151 :     t2 = vec_perm(t2,tp,perm_ref2); \
152 :     t3 = vec_max(t1, t2); /* find largest of two */ \
153 :     t4 = vec_min(t1, t2); /* find smaller of two */ \
154 :     t5 = vec_sub(t3, t4); /* find absolute difference */ \
155 :     sad = vec_sum4s(t5, sad); /* accumulate sum of differences */ \
156 :     cur += stride<<1; ref += stride<<1;
157 :    
158 :     /*
159 :     * This function assumes cur is 8 bytes aligned, stride is 16 bytes
160 :     * aligned and ref is unaligned
161 :     */
162 :     unsigned long
163 :     sad8_altivec( const vector unsigned char * cur,
164 :     const vector unsigned char * ref,
165 :     unsigned long stride)
166 :     {
167 :     vector unsigned char t1, t2, t3, t4, t5, tp ;
168 : canard 115 vector unsigned int sad;
169 : canard 98 vector signed int sumdiffs;
170 :     vector unsigned char perm_cur;
171 :     vector unsigned char perm_ref1, perm_ref2;
172 :     unsigned long result;
173 : canard 115 ZERODEF;
174 : canard 98
175 :     #ifdef DEBUG
176 :     if (((unsigned long)cur) & 0x7)
177 :     fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur);
178 :     // if (((unsigned long)ref) & 0x7)
179 :     // fprintf(stderr, "sad8_altivec:incorrect align, ref: %x\n", ref);
180 :     if (stride & 0xf)
181 :     fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride);
182 :     #endif
183 :    
184 : canard 115 perm_cur = get_perm((((unsigned long)cur)>>3) & 0x01);
185 : canard 98 perm_ref1 = vec_lvsl(0, (unsigned char *)ref);
186 : canard 115 perm_ref2 = get_perm(0);
187 : canard 98
188 :     /* initialization */
189 : canard 115 sad = (vector unsigned int)(ZEROVEC);
190 : canard 98 stride >>= 4;
191 :    
192 :     /* perform sum of differences between current and previous */
193 :     SAD8();
194 :     SAD8();
195 :     SAD8();
196 :     SAD8();
197 :    
198 :     /* sum all parts of difference into one 32 bit quantity */
199 : canard 115 sumdiffs = vec_sums((vector signed int) sad, (vector signed int)ZEROVEC);
200 : canard 98
201 :     /* copy vector sum into unaligned result */
202 :     sumdiffs = vec_splat( sumdiffs, 3 );
203 :     vec_ste( sumdiffs, 0, (int *)&result );
204 :     return( result );
205 :     }
206 :    
207 :     #define MEAN16(i)\
208 :     c##i=*cur;\
209 :     mean = vec_sum4s(c##i,mean);\
210 :     cur += stride;
211 :    
212 :     #define DEV16(i) \
213 :     t2 = vec_max(c##i, mn); /* find largest of two */ \
214 :     t3 = vec_min(c##i, mn); /* find smaller of two */ \
215 :     t4 = vec_sub(t2, t3); /* find absolute difference */ \
216 :     dev = vec_sum4s(t4, dev);
217 :    
218 :     unsigned long
219 :     dev16_altivec( const vector unsigned char * cur,
220 :     unsigned long stride)
221 :     {
222 :     vector unsigned char t2,t3,t4, mn;
223 : canard 115 vector unsigned int mean, dev;
224 : canard 98 vector signed int sumdiffs;
225 :     vector unsigned char c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15;
226 :     unsigned long result;
227 : canard 115 ZERODEF;
228 : canard 98
229 : canard 115 mean = (vector unsigned int)(ZEROVEC);
230 :     dev = (vector unsigned int)(ZEROVEC);
231 : canard 98 stride >>= 4;
232 :    
233 :     MEAN16(0);
234 :     MEAN16(1);
235 :     MEAN16(2);
236 :     MEAN16(3);
237 :     MEAN16(4);
238 :     MEAN16(5);
239 :     MEAN16(6);
240 :     MEAN16(7);
241 :     MEAN16(8);
242 :     MEAN16(9);
243 :     MEAN16(10);
244 :     MEAN16(11);
245 :     MEAN16(12);
246 :     MEAN16(13);
247 :     MEAN16(14);
248 :     MEAN16(15);
249 :    
250 : canard 115 sumdiffs = vec_sums((vector signed int) mean, (vector signed int) ZEROVEC);
251 : canard 98 mn = vec_perm((vector unsigned char)sumdiffs, (vector unsigned char)sumdiffs,
252 :     (vector unsigned char)(14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14));
253 :     DEV16(0);
254 :     DEV16(1);
255 :     DEV16(2);
256 :     DEV16(3);
257 :     DEV16(4);
258 :     DEV16(5);
259 :     DEV16(6);
260 :     DEV16(7);
261 :     DEV16(8);
262 :     DEV16(9);
263 :     DEV16(10);
264 :     DEV16(11);
265 :     DEV16(12);
266 :     DEV16(13);
267 :     DEV16(14);
268 :     DEV16(15);
269 :    
270 :     /* sum all parts of difference into one 32 bit quantity */
271 : canard 115 sumdiffs = vec_sums((vector signed int) dev, (vector signed int) ZEROVEC);
272 : canard 98
273 :     /* copy vector sum into unaligned result */
274 :     sumdiffs = vec_splat( sumdiffs, 3 );
275 :     vec_ste( sumdiffs, 0, (int *)&result );
276 :     return( result );
277 :     }

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4