Annotation of /trunk/xvidcore/src/motion/ppc_asm/sad_altivec.c

1 :	canard	98	/*
2 :
3 :			Copyright (C) 2002 Benjamin Herrenschmidt <benh@kernel.crashing.org>
4 :
5 :			This program is free software; you can redistribute it and/or modify
6 :			it under the terms of the GNU General Public License as published by
7 :			the Free Software Foundation; either version 2 of the License, or
8 :			(at your option) any later version.
9 :
10 :			This program is distributed in the hope that it will be useful,
11 :			but WITHOUT ANY WARRANTY; without even the implied warranty of
12 :			MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 :			GNU General Public License for more details.
14 :
15 :			You should have received a copy of the GNU General Public License
16 :			along with this program; if not, write to the Free Software
17 :			Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 :
19 :
20 :			$Id: sad_altivec.c,v 1.1 2002-04-03 14:17:05 canard Exp $
21 :			$Source: /home/xvid/cvs_copy/cvs-server-root/xvid/xvidcore/src/motion/ppc_asm/sad_altivec.c,v $
22 :			$Date: 2002-04-03 14:17:05 $
23 :			$Author: canard $
24 :
25 :			*/
26 :
27 :			#include <stdio.h>
28 :
29 :			#undef DEBUG
30 :
31 :			#define SAD16() \
32 :			t1 = vec_perm(ref[0], ref[1], perm); /* align current vector */ \
33 :			t2 = vec_max(t1, cur); / find largest of two */ \
34 :			t3 = vec_min(t1, cur); / find smaller of two */ \
35 :			t4 = vec_sub(t2, t3); /* find absolute difference */ \
36 :			sad = vec_sum4s(t4, sad); /* accumulate sum of differences */ \
37 :			cur += stride; ref += stride;
38 :
39 :			/*
40 :			* This function assumes cur and stride are 16 bytes aligned and ref is unaligned
41 :			*/
42 :			unsigned long
43 :			sad16_altivec( const vector unsigned char * cur,
44 :			const vector unsigned char * ref,
45 :			unsigned long stride,
46 :			const unsigned long best_sad)
47 :			{
48 :			vector unsigned char perm;
49 :			vector unsigned char t1, t2, t3, t4 ;
50 :			vector unsigned int sad, zero;
51 :			vector signed int sumdiffs, best_vec;
52 :			unsigned long result;
53 :
54 :			#ifdef DEBUG
55 :			if (((unsigned long)cur) & 0xf)
56 :			fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur);
57 :			// if (((unsigned long)ref) & 0xf)
58 :			// fprintf(stderr, "sad16_altivec:incorrect align, ref: %x\n", ref);
59 :			if (stride & 0xf)
60 :			fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride);
61 :			#endif
62 :			/* initialization */
63 :			zero = (vector unsigned int)(0);
64 :			sad = (vector unsigned int)(0);
65 :			stride >>= 4;
66 :			perm = vec_lvsl(0, (unsigned char *)ref);
67 :			((unsigned long )&best_vec) = best_sad;
68 :			best_vec = vec_splat(best_vec, 0);
69 :
70 :			/* perform sum of differences between current and previous */
71 :			SAD16();
72 :			SAD16();
73 :			SAD16();
74 :			SAD16();
75 :			/* Temp sum for exit */
76 :			sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
77 :			if (vec_all_ge(sumdiffs, best_vec))
78 :			goto bail;
79 :			SAD16();
80 :			SAD16();
81 :			SAD16();
82 :			SAD16();
83 :			sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
84 :			if (vec_all_ge(sumdiffs, best_vec))
85 :			goto bail;
86 :			SAD16();
87 :			SAD16();
88 :			SAD16();
89 :			SAD16();
90 :			SAD16();
91 :			SAD16();
92 :			SAD16();
93 :			SAD16();
94 :
95 :			/* sum all parts of difference into one 32 bit quantity */
96 :			sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
97 :			bail:
98 :			/* copy vector sum into unaligned result */
99 :			sumdiffs = vec_splat( sumdiffs, 3 );
100 :			vec_ste( sumdiffs, 0, (int *)&result );
101 :			return( result );
102 :			}
103 :
104 :			#define SAD8() \
105 :			t1 = vec_perm(cur[0], cur[stride], perm_cur); /* align current vector */ \
106 :			t2 = vec_perm(ref[0], ref[1], perm_ref1); /* align current vector */ \
107 :			tp = vec_perm(ref[stride], ref[stride+1], perm_ref1); /* align current vector */ \
108 :			t2 = vec_perm(t2,tp,perm_ref2); \
109 :			t3 = vec_max(t1, t2); /* find largest of two */ \
110 :			t4 = vec_min(t1, t2); /* find smaller of two */ \
111 :			t5 = vec_sub(t3, t4); /* find absolute difference */ \
112 :			sad = vec_sum4s(t5, sad); /* accumulate sum of differences */ \
113 :			cur += stride<<1; ref += stride<<1;
114 :
115 :			static const vector unsigned char perms[2] = {
116 :			(vector unsigned char)( /* Used when cur is aligned */
117 :			0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
118 :			0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
119 :			),
120 :			(vector unsigned char)( /* Used when cur is unaligned */
121 :			0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
122 :			0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
123 :			),
124 :			};
125 :
126 :			/*
127 :			* This function assumes cur is 8 bytes aligned, stride is 16 bytes
128 :			* aligned and ref is unaligned
129 :			*/
130 :			unsigned long
131 :			sad8_altivec( const vector unsigned char * cur,
132 :			const vector unsigned char * ref,
133 :			unsigned long stride)
134 :			{
135 :			vector unsigned char t1, t2, t3, t4, t5, tp ;
136 :			vector unsigned int sad, zero;
137 :			vector signed int sumdiffs;
138 :			vector unsigned char perm_cur;
139 :			vector unsigned char perm_ref1, perm_ref2;
140 :			unsigned long result;
141 :
142 :			#ifdef DEBUG
143 :			if (((unsigned long)cur) & 0x7)
144 :			fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur);
145 :			// if (((unsigned long)ref) & 0x7)
146 :			// fprintf(stderr, "sad8_altivec:incorrect align, ref: %x\n", ref);
147 :			if (stride & 0xf)
148 :			fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride);
149 :			#endif
150 :
151 :			perm_cur = perms[(((unsigned long)cur)>>3) & 0x01];
152 :			perm_ref1 = vec_lvsl(0, (unsigned char *)ref);
153 :			perm_ref2 = perms[0];
154 :
155 :			/* initialization */
156 :			zero = (vector unsigned int)(0);
157 :			sad = (vector unsigned int)(0);
158 :			stride >>= 4;
159 :
160 :			/* perform sum of differences between current and previous */
161 :			SAD8();
162 :			SAD8();
163 :			SAD8();
164 :			SAD8();
165 :
166 :			/* sum all parts of difference into one 32 bit quantity */
167 :			sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
168 :
169 :			/* copy vector sum into unaligned result */
170 :			sumdiffs = vec_splat( sumdiffs, 3 );
171 :			vec_ste( sumdiffs, 0, (int *)&result );
172 :			return( result );
173 :			}
174 :
175 :			#define MEAN16(i)\
176 :			c##i=*cur;\
177 :			mean = vec_sum4s(c##i,mean);\
178 :			cur += stride;
179 :
180 :			#define DEV16(i) \
181 :			t2 = vec_max(c##i, mn); /* find largest of two */ \
182 :			t3 = vec_min(c##i, mn); /* find smaller of two */ \
183 :			t4 = vec_sub(t2, t3); /* find absolute difference */ \
184 :			dev = vec_sum4s(t4, dev);
185 :
186 :			unsigned long
187 :			dev16_altivec( const vector unsigned char * cur,
188 :			unsigned long stride)
189 :			{
190 :			vector unsigned char t2,t3,t4, mn;
191 :			vector unsigned int mean, dev, zero;
192 :			vector signed int sumdiffs;
193 :			vector unsigned char c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15;
194 :			unsigned long result;
195 :
196 :			zero = (vector unsigned int)(0);
197 :			mean = (vector unsigned int)(0);
198 :			dev = (vector unsigned int)(0);
199 :			stride >>= 4;
200 :
201 :			MEAN16(0);
202 :			MEAN16(1);
203 :			MEAN16(2);
204 :			MEAN16(3);
205 :			MEAN16(4);
206 :			MEAN16(5);
207 :			MEAN16(6);
208 :			MEAN16(7);
209 :			MEAN16(8);
210 :			MEAN16(9);
211 :			MEAN16(10);
212 :			MEAN16(11);
213 :			MEAN16(12);
214 :			MEAN16(13);
215 :			MEAN16(14);
216 :			MEAN16(15);
217 :
218 :			sumdiffs = vec_sums((vector signed int) mean, (vector signed int) zero);
219 :			mn = vec_perm((vector unsigned char)sumdiffs, (vector unsigned char)sumdiffs,
220 :			(vector unsigned char)(14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14));
221 :			DEV16(0);
222 :			DEV16(1);
223 :			DEV16(2);
224 :			DEV16(3);
225 :			DEV16(4);
226 :			DEV16(5);
227 :			DEV16(6);
228 :			DEV16(7);
229 :			DEV16(8);
230 :			DEV16(9);
231 :			DEV16(10);
232 :			DEV16(11);
233 :			DEV16(12);
234 :			DEV16(13);
235 :			DEV16(14);
236 :			DEV16(15);
237 :
238 :			/* sum all parts of difference into one 32 bit quantity */
239 :			sumdiffs = vec_sums((vector signed int) dev, (vector signed int) zero);
240 :
241 :			/* copy vector sum into unaligned result */
242 :			sumdiffs = vec_splat( sumdiffs, 3 );
243 :			vec_ste( sumdiffs, 0, (int *)&result );
244 :			return( result );
245 :			}

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4