Annotation of /trunk/xvidcore/src/image/ppc_asm/colorspace_altivec.c

Revision 1412 - (view) (download)

1 :	edgomez	1412	/*****************************************************************************
2 :			*
3 :			* XVID MPEG-4 VIDEO CODEC
4 :			* - Colorspace conversion functions with altivec optimization -
5 :			*
6 :			* Copyright(C) 2004 Christoph NŠgeli <chn@kbw.ch>
7 :			*
8 :			* This program is free software ; you can redistribute it and/or modify
9 :			* it under the terms of the GNU General Public License as published by
10 :			* the Free Software Foundation ; either version 2 of the License, or
11 :			* (at your option) any later version.
12 :			*
13 :			* This program is distributed in the hope that it will be useful,
14 :			* but WITHOUT ANY WARRANTY ; without even the implied warranty of
15 :			* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 :			* GNU General Public License for more details.
17 :			*
18 :			* You should have received a copy of the GNU General Public License
19 :			* along with this program ; if not, write to the Free Software
20 :			* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 :			*
22 :			* $Id: colorspace_altivec.c,v 1.1 2004-04-05 20:36:36 edgomez Exp $
23 :			*
24 :			****************************************************************************/
25 :
26 :			#ifdef HAVE_ALTIVEC_H
27 :			#include <altivec.h>
28 :			#endif
29 :
30 :			#include "../../portab.h"
31 :
32 :			#undef DEBUG
33 :			#include <stdio.h>
34 :
35 :
36 :			/******** generic altivec RGB to YV12 colorspace macro ********/
37 :
38 :			#define MAKE_COLORSPACE_ALTIVEC_FROM_RGB(NAME,SIZE,PIXELS,VPIXELS,FUNC,C1,C2,C3,C4) \
39 :			void \
40 :			NAME(uint8_t *x_ptr, int x_stride, \
41 :			uint8_t y_ptr, uint8_t u_ptr, uint8_t *v_ptr, \
42 :			int y_stride, int uv_stride, \
43 :			int width, int height, int vflip) \
44 :			{ \
45 :			int fixed_width = (width + 15) & ~15; \
46 :			int x_dif = x_stride - (SIZE) * fixed_width; \
47 :			int y_dif = y_stride - fixed_width; \
48 :			int uv_dif = uv_stride - (fixed_width / 2); \
49 :			int x, y; \
50 :			unsigned prefetch_constant; \
51 :			\
52 :			register vector unsigned int shift_consts[4]; \
53 :			\
54 :			vector unsigned char y_add; \
55 :			vector unsigned char u_add; \
56 :			vector unsigned char v_add; \
57 :			\
58 :			vector unsigned short vec_fix_ins[3]; \
59 :			\
60 :			vec_st(vec_ldl(0, &g_vec_fix_ins[0]), 0, &vec_fix_ins[0]); \
61 :			vec_st(vec_ldl(0, &g_vec_fix_ins[1]), 0, &vec_fix_ins[1]); \
62 :			vec_st(vec_ldl(0, &g_vec_fix_ins[2]), 0, &vec_fix_ins[2]); \
63 :			\
64 :			shift_consts[0] = vec_add(vec_splat_u32(12), vec_splat_u32(12)); \
65 :			shift_consts[1] = vec_add(vec_splat_u32(8), vec_splat_u32(8)); \
66 :			shift_consts[2] = vec_splat_u32(8); \
67 :			shift_consts[3] = vec_splat_u32(0); \
68 :			\
69 :			prefetch_constant = build_prefetch(16, 2, (short)x_stride); \
70 :			vec_dstt(x_ptr, prefetch_constant, 0); \
71 :			vec_dstt(x_ptr + (x_stride << 1), prefetch_constant, 1); \
72 :			\
73 :			((unsigned char)&y_add) = Y_ADD_IN; \
74 :			((unsigned char)&u_add) = U_ADD_IN; \
75 :			((unsigned char)&v_add) = V_ADD_IN; \
76 :			\
77 :			y_add = vec_splat(y_add, 0); \
78 :			u_add = vec_splat(u_add, 0); \
79 :			v_add = vec_splat(v_add, 0); \
80 :			\
81 :			if(vflip) { \
82 :			x_ptr += (height - 1) * x_stride; \
83 :			x_dif = -(SIZE) * fixed_width - x_stride; \
84 :			x_stride = -x_stride; \
85 :			} \
86 :			\
87 :			for(y = 0; y < height; y += (VPIXELS)) { \
88 :			FUNC##_ROW(SIZE,C1,C2,C3,C4); \
89 :			for(x = 0; x < fixed_width; x += (PIXELS)) { \
90 :			FUNC(SIZE,C1,C2,C3,C4); \
91 :			x_ptr += (PIXELS)*(SIZE); \
92 :			y_ptr += (PIXELS); \
93 :			u_ptr += (PIXELS)/2; \
94 :			v_ptr += (PIXELS)/2; \
95 :			} \
96 :			x_ptr += x_dif + (VPIXELS-1) * x_stride; \
97 :			y_ptr += y_dif + (VPIXELS-1) * y_stride; \
98 :			u_ptr += uv_dif + ((VPIXELS/2)-1)*uv_stride; \
99 :			v_ptr += uv_dif + ((VPIXELS/2)-1)*uv_stride; \
100 :			} \
101 :			vec_dssall(); \
102 :			}
103 :
104 :
105 :			/******** generic altivec YUV to YV12 colorspace macro ********/
106 :
107 :			#define MAKE_COLORSPACE_ALTIVEC_FROM_YUV(NAME,SIZE,PIXELS,VPIXELS,FUNC,C1,C2,C3,C4) \
108 :			void \
109 :			NAME(uint8_t *x_ptr, int x_stride, \
110 :			uint8_t y_ptr, uint8_t u_ptr, uint8_t *v_ptr, \
111 :			int y_stride, int uv_stride, \
112 :			int width, int height, int vflip) \
113 :			{ \
114 :			int fixed_width = (width + 15) & ~15; \
115 :			int x_dif = x_stride - (SIZE)*fixed_width; \
116 :			int y_dif = y_stride - fixed_width; \
117 :			int uv_dif = uv_stride - (fixed_width / 2); \
118 :			int x, y; \
119 :			\
120 :			unsigned prefetch_constant; \
121 :			\
122 :			vector unsigned int p0, p1; \
123 :			vector unsigned char lum0, lum1; \
124 :			vector unsigned char u0, u1; \
125 :			vector unsigned char v0, v1; \
126 :			vector unsigned char t; \
127 :			\
128 :			prefetch_constant = build_prefetch(16, 2, (short)x_stride); \
129 :			vec_dstt(x_ptr, prefetch_constant, 0); \
130 :			vec_dstt(x_ptr + (x_stride << 1), prefetch_constant, 1); \
131 :			\
132 :			if(vflip) { \
133 :			x_ptr += (height - 1) * x_stride; \
134 :			x_dif = -(SIZE)*fixed_width - x_stride; \
135 :			x_stride = -x_stride; \
136 :			} \
137 :			\
138 :			for(y = 0; y < height; y += (VPIXELS)) { \
139 :			FUNC##_ROW(SIZE,C1,C2,C3,C4); \
140 :			for(x = 0; x < fixed_width; x += (PIXELS)) { \
141 :			FUNC(SIZE,C1,C2,C3,C4); \
142 :			x_ptr += (PIXELS)*(SIZE); \
143 :			y_ptr += (PIXELS); \
144 :			u_ptr += (PIXELS)/2; \
145 :			v_ptr += (PIXELS)/2; \
146 :			} \
147 :			x_ptr += x_dif + (VPIXELS-1) * x_stride; \
148 :			y_ptr += y_dif + (VPIXELS-1) * y_stride; \
149 :			u_ptr += uv_dif + ((VPIXELS/2)-1) * uv_stride; \
150 :			v_ptr += uv_dif + ((VPIXELS/2)-1) * uv_stride; \
151 :			} \
152 :			vec_dssall(); \
153 :			}
154 :
155 :
156 :			/******** generic altivec YV12 to YUV colorspace macro ********/
157 :
158 :			#define MAKE_COLORSPACE_ALTIVEC_TO_YUV(NAME,SIZE,PIXELS,VPIXELS,FUNC,C1,C2,C3,C4) \
159 :			void \
160 :			NAME(uint8_t *x_ptr, int x_stride, \
161 :			uint8_t y_ptr, uint8_t u_ptr, uint8_t *v_ptr, \
162 :			int y_stride, int uv_stride, \
163 :			int width, int height, int vflip) \
164 :			{ \
165 :			int fixed_width = (width + 15) & ~15; \
166 :			int x_dif = x_stride - (SIZE)*fixed_width; \
167 :			int y_dif = y_stride - fixed_width; \
168 :			int uv_dif = uv_stride - (fixed_width / 2); \
169 :			int x, y; \
170 :			\
171 :			vector unsigned char y_vec; \
172 :			vector unsigned char u_vec; \
173 :			vector unsigned char v_vec; \
174 :			vector unsigned char p0, p1; \
175 :			vector unsigned char mask; \
176 :			vector unsigned char mask_stencil; \
177 :			vector unsigned char t; \
178 :			vector unsigned char m4; \
179 :			vector unsigned char vec4; \
180 :			\
181 :			unsigned prefetch_constant_y; \
182 :			unsigned prefetch_constant_uv; \
183 :			\
184 :			prefetch_constant_y = build_prefetch(16, 4, (short)y_stride); \
185 :			prefetch_constant_uv = build_prefetch(16, 2, (short)uv_stride); \
186 :			\
187 :			vec_dstt(y_ptr, prefetch_constant_y, 0); \
188 :			vec_dstt(u_ptr, prefetch_constant_uv, 1); \
189 :			vec_dstt(v_ptr, prefetch_constant_uv, 2); \
190 :			\
191 :			mask_stencil = (vector unsigned char)vec_mergeh( (vector unsigned short)vec_mergeh(vec_splat_u8(-1), vec_splat_u8(0)), vec_splat_u16(0) ); \
192 :			m4 = vec_sr(vec_lvsl(0, (unsigned char*)0), vec_splat_u8(2)); \
193 :			vec4 = vec_splat_u8(4); \
194 :			\
195 :			if(vflip) { \
196 :			x_ptr += (height - 1) * x_stride; \
197 :			x_dif = -(SIZE)*fixed_width - x_stride; \
198 :			x_stride = -x_stride; \
199 :			} \
200 :			\
201 :			for(y = 0; y < height; y += (VPIXELS)) { \
202 :			FUNC##_ROW(SIZE,C1,C2,C3,C4); \
203 :			for(x = 0; x < fixed_width; x += (PIXELS)) { \
204 :			FUNC(SIZE,C1,C2,C3,C4); \
205 :			x_ptr += (PIXELS)*(SIZE); \
206 :			y_ptr += (PIXELS); \
207 :			u_ptr += (PIXELS)/2; \
208 :			v_ptr += (PIXELS)/2; \
209 :			} \
210 :			x_ptr += x_dif + (VPIXELS-1) * x_stride; \
211 :			y_ptr += y_dif + (VPIXELS-1) * y_stride; \
212 :			u_ptr += uv_dif + ((VPIXELS/2)-1) * uv_stride; \
213 :			v_ptr += uv_dif + ((VPIXELS/2)-1) * uv_stride; \
214 :			} \
215 :			vec_dssall(); \
216 :			}
217 :
218 :			/******** colorspace input (xxx_to_yv12) functions ********/
219 :
220 :			/* rgb -> yuv def's
221 :
222 :			this following constants are "official spec"
223 :			Video Demystified" (ISBN 1-878707-09-4)
224 :
225 :			rgb<->yuv _is_ lossy, since most programs do the conversion differently
226 :
227 :			SCALEBITS/FIX taken from ffmpeg
228 :			*/
229 :
230 :			#define Y_R_IN 0.257
231 :			#define Y_G_IN 0.504
232 :			#define Y_B_IN 0.098
233 :			#define Y_ADD_IN 16
234 :
235 :			#define U_R_IN 0.148
236 :			#define U_G_IN 0.291
237 :			#define U_B_IN 0.439
238 :			#define U_ADD_IN 128
239 :
240 :			#define V_R_IN 0.439
241 :			#define V_G_IN 0.368
242 :			#define V_B_IN 0.071
243 :			#define V_ADD_IN 128
244 :
245 :			#define SCALEBITS_IN 8
246 :			#define FIX_IN(x) ((uint16_t) ((x) * (1L<<SCALEBITS_IN) + 0.5))
247 :
248 :
249 :			static inline unsigned
250 :			build_prefetch(unsigned char block_size, unsigned char block_count, short stride)
251 :			{
252 :			if(block_size > 31)
253 :			block_size = 0;
254 :
255 :			return ((block_size << 24) \| (block_count << 16) \| stride);
256 :			}
257 :
258 :			const static vector unsigned short g_vec_fix_ins [3] = {
259 :			(vector unsigned short)AVV( SCALEBITS_IN, FIX_IN(Y_R_IN), FIX_IN(Y_G_IN), FIX_IN(Y_B_IN), 0, 0, 0, 0),
260 :			(vector unsigned short)AVV( SCALEBITS_IN + 2, -FIX_IN(U_R_IN), -FIX_IN(U_G_IN), FIX_IN(U_B_IN), 0, 0, 0, 0),
261 :			(vector unsigned short)AVV( SCALEBITS_IN + 2, FIX_IN(V_R_IN), -FIX_IN(V_G_IN), -FIX_IN(V_B_IN), 0, 0, 0, 0)
262 :			};
263 :
264 :			/* RGB Input */
265 :			#define READ_RGB_Y_ALTIVEC(SIZE,ROW,UVID,C1,C2,C3,C4) \
266 :			p0 = vec_ld(0, (unsigned int)(x_ptr + (ROW) x_stride)); \
267 :			p1 = vec_ld(16, (unsigned int)(x_ptr + (ROW) x_stride)); \
268 :			\
269 :			mask = vec_mergeh((vector unsigned char)shift_consts[3], vec_splat_u8(-1)); \
270 :			mask = (vector unsigned char)vec_mergeh((vector unsigned short)shift_consts[3], (vector unsigned short)mask); \
271 :			\
272 :			t0 = vec_sr(p0, shift_consts[C1]); \
273 :			t0 = vec_sel(shift_consts[3], t0, (vector unsigned int)mask); \
274 :			t1 = vec_sr(p1, shift_consts[C1]); \
275 :			t1 = vec_sel(shift_consts[3], t1, (vector unsigned int)mask); \
276 :			r = vec_pack(t0, t1); \
277 :			r##UVID = vec_add(r##UVID, r); \
278 :			\
279 :			t0 = vec_sr(p0, shift_consts[C2]); \
280 :			t0 = vec_sel(shift_consts[3], t0, (vector unsigned int)mask); \
281 :			t1 = vec_sr(p1, shift_consts[C2]); \
282 :			t1 = vec_sel(shift_consts[3], t1, (vector unsigned int)mask); \
283 :			g = vec_pack(t0, t1); \
284 :			g##UVID = vec_add(g##UVID, g); \
285 :			\
286 :			t0 = vec_sr(p0, shift_consts[C3]); \
287 :			t0 = vec_sel(shift_consts[3], t0, (vector unsigned int)mask); \
288 :			t1 = vec_sr(p1, shift_consts[C3]); \
289 :			t1 = vec_sel(shift_consts[3], t1, (vector unsigned int)mask); \
290 :			b = vec_pack(t0, t1); \
291 :			b##UVID = vec_add(b##UVID, b); \
292 :			\
293 :			lum = vec_mladd(r, vec_splat(vec_fix_ins[0], 1), (vector unsigned short)shift_consts[3]); \
294 :			lum = vec_mladd(g, vec_splat(vec_fix_ins[0], 2), lum); \
295 :			lum = vec_mladd(b, vec_splat(vec_fix_ins[0], 3), lum); \
296 :			lum = vec_sr(lum, vec_splat(vec_fix_ins[0], 0)); \
297 :			y_vec = vec_pack(lum, (vector unsigned short)shift_consts[3]); \
298 :			y_vec = vec_add(y_vec, y_add); \
299 :			\
300 :			mask = vec_pack((vector unsigned short)shift_consts[3], vec_splat_u16(-1)); \
301 :			mask = vec_perm(mask, mask, vec_lvsl(0, y_ptr + (ROW)*y_stride)); \
302 :			y_vec = vec_perm(y_vec, y_vec, vec_lvsl(0, y_ptr + (ROW)*y_stride)); \
303 :			y_vec = vec_sel(y_vec, vec_ld(0, y_ptr + (ROW)*y_stride), mask); \
304 :			vec_st(y_vec, 0, y_ptr + (ROW)*y_stride)
305 :
306 :			#define READ_RGB_UV_ALTIVEC(UV_ROW,UVID) \
307 :			r##UVID = (vector unsigned short)vec_sum4s((vector signed short)r##UVID, (vector signed int)shift_consts[3]); \
308 :			g##UVID = (vector unsigned short)vec_sum4s((vector signed short)g##UVID, (vector signed int)shift_consts[3]); \
309 :			b##UVID = (vector unsigned short)vec_sum4s((vector signed short)b##UVID, (vector signed int)shift_consts[3]); \
310 :			\
311 :			t3 = vec_mulo((vector signed short)r##UVID, (vector signed short)vec_splat(vec_fix_ins[1], 1)); \
312 :			t3 = vec_add(t3, vec_mulo((vector signed short)g##UVID, (vector signed short)vec_splat(vec_fix_ins[1], 2))); \
313 :			t3 = vec_add(t3, vec_mulo((vector signed short)b##UVID, (vector signed short)vec_splat(vec_fix_ins[1], 3))); \
314 :			t3 = vec_sr(t3, (vector unsigned int)vec_mergeh((vector unsigned short)shift_consts[3], vec_splat(vec_fix_ins[1], 0))); \
315 :			\
316 :			u_vec = vec_pack(vec_pack((vector unsigned int)t3, shift_consts[3]), (vector unsigned short)shift_consts[3]); \
317 :			u_vec = vec_add(u_vec, u_add); \
318 :			\
319 :			mask = vec_pack(vec_splat_u16(-1), (vector unsigned short)shift_consts[3]); \
320 :			mask = (vector unsigned char)vec_pack((vector unsigned int)mask, shift_consts[3]); \
321 :			mask = vec_perm(mask, mask, vec_lvsr(0, u_ptr + (UV_ROW)*uv_stride)); \
322 :			u_vec = vec_perm(u_vec, u_vec, vec_lvsr(0, u_ptr + (UV_ROW)*uv_stride)); \
323 :			u_vec = vec_sel(vec_ld(0, u_ptr + (UV_ROW)*uv_stride), u_vec, mask); \
324 :			vec_st(u_vec, 0, u_ptr + (UV_ROW)*uv_stride); \
325 :			\
326 :			t3 = vec_mulo((vector signed short)r##UVID, (vector signed short)vec_splat(vec_fix_ins[2], 1)); \
327 :			t3 = vec_add(t3, vec_mulo((vector signed short)g##UVID, (vector signed short)vec_splat(vec_fix_ins[2], 2))); \
328 :			t3 = vec_add(t3, vec_mulo((vector signed short)b##UVID, (vector signed short)vec_splat(vec_fix_ins[2], 3))); \
329 :			t3 = vec_sr(t3, (vector unsigned int)vec_mergeh((vector unsigned short)shift_consts[3], vec_splat(vec_fix_ins[2], 0))); \
330 :			\
331 :			v_vec = vec_pack(vec_pack((vector unsigned int)t3, shift_consts[3]), (vector unsigned short)shift_consts[3]); \
332 :			v_vec = vec_add(v_vec, v_add); \
333 :			\
334 :			mask = vec_pack(vec_splat_u16(-1), (vector unsigned short)shift_consts[3]); \
335 :			mask = (vector unsigned char)vec_pack((vector unsigned int)mask, shift_consts[3]); \
336 :			mask = vec_perm(mask, mask, vec_lvsr(0, v_ptr + (UV_ROW) * uv_stride)); \
337 :			v_vec = vec_perm(v_vec, v_vec, vec_lvsr(0, v_ptr + (UV_ROW) * uv_stride)); \
338 :			v_vec = vec_sel(vec_ld(0, v_ptr + (UV_ROW) * uv_stride), v_vec, mask); \
339 :			vec_st(v_vec, 0, v_ptr + (UV_ROW) * uv_stride)
340 :
341 :
342 :			#define RGB_TO_YV12_ALTIVEC_ROW(SIZE,C1,C2,C3,C4) \
343 :			/* nothing */
344 :
345 :			#define RGB_TO_YV12_ALTIVEC(SIZE,C1,C2,C3,C4) \
346 :			vector unsigned int p0, p1; \
347 :			vector unsigned int t0, t1; \
348 :			vector unsigned short r, g, b, r0, g0, b0; \
349 :			vector unsigned short lum; \
350 :			vector unsigned char mask; \
351 :			vector unsigned char y_vec; \
352 :			vector unsigned char u_vec; \
353 :			vector unsigned char v_vec; \
354 :			vector signed int t3; \
355 :			\
356 :			vec_dstt(x_ptr, prefetch_constant, 0); \
357 :			vec_dstt(x_ptr + (x_stride << 1), prefetch_constant, 1); \
358 :			\
359 :			r0 = g0 = b0 = (vector unsigned short)shift_consts[3]; \
360 :			\
361 :			READ_RGB_Y_ALTIVEC(SIZE, 0, 0, C1, C2, C3, C4); \
362 :			READ_RGB_Y_ALTIVEC(SIZE, 1, 0, C1, C2, C3, C4); \
363 :			READ_RGB_UV_ALTIVEC(0, 0)
364 :
365 :
366 :			/* YUV input */
367 :
368 :			#define READ_YUYV_Y_ALTIVEC(ROW,C1,C2,C3,C4) \
369 :			p0 = vec_ld(0, (unsigned int)(x_ptr + (ROW)x_stride)); \
370 :			p1 = vec_ld(16, (unsigned int)(x_ptr + (ROW)x_stride)); \
371 :			\
372 :			t = vec_lvsl(0, (unsigned char*)0); \
373 :			t = vec_sl(t, vec_splat_u8(2)); \
374 :			t = vec_add(t, vec_splat_u8(C1)); \
375 :			\
376 :			lum0 = (vector unsigned char)vec_perm(p0, p0, t); \
377 :			lum1 = (vector unsigned char)vec_perm(p1, p1, t); \
378 :			\
379 :			t = vec_lvsl(0, (unsigned char*)0); \
380 :			t = vec_sl(t, vec_splat_u8(2)); \
381 :			t = vec_add(t, vec_splat_u8(C3)); \
382 :			\
383 :			lum0 = vec_mergeh(lum0, (vector unsigned char)vec_perm(p0, p0, t)); \
384 :			lum1 = vec_mergeh(lum1, (vector unsigned char)vec_perm(p1, p1, t)); \
385 :			\
386 :			lum0 = vec_sel(lum0, lum1, vec_pack(vec_splat_u16(0), vec_splat_u16(-1))); \
387 :			vec_st(lum0, 0, y_ptr + (ROW)*y_stride); \
388 :			\
389 :			t = vec_lvsl(0, (unsigned char*)0); \
390 :			t = vec_sl(t, vec_splat_u8(2)); \
391 :			t = vec_add(t, vec_splat_u8(C2)); \
392 :			\
393 :			lum0 = (vector unsigned char)vec_perm(p0, p0, t); \
394 :			lum1 = (vector unsigned char)vec_perm(p1, p1, t); \
395 :			lum1 = vec_perm(lum1, lum1, vec_lvsr(4, (unsigned char*)0)); \
396 :			t = vec_pack(vec_pack(vec_splat_u32(0), vec_splat_u32(-1)), vec_splat_u16(-1)); \
397 :			u##ROW = vec_sel(lum0, lum1, t); \
398 :			\
399 :			t = vec_lvsl(0, (unsigned char*)0); \
400 :			t = vec_sl(t, vec_splat_u8(2)); \
401 :			t = vec_add(t, vec_splat_u8(C4)); \
402 :			\
403 :			lum0 = (vector unsigned char)vec_perm(p0, p0, t); \
404 :			lum1 = (vector unsigned char)vec_perm(p1, p1, t); \
405 :			lum1 = vec_perm(lum1, lum1, vec_lvsr(4, (unsigned char*)0)); \
406 :			t = vec_pack(vec_pack(vec_splat_u32(0), vec_splat_u32(-1)), vec_splat_u16(-1)); \
407 :			v##ROW = vec_sel(lum0, lum1, t);
408 :
409 :			#define READ_YUYV_UV_ALTIVEC(UV_ROW,ROW1,ROW2,C1,C2,C3,C4) \
410 :			u##ROW1 = vec_avg(u##ROW1, u##ROW2); \
411 :			t = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); \
412 :			t = vec_perm(t, t, vec_lvsl(0, u_ptr + (UV_ROW)*uv_stride)); \
413 :			u##ROW1 = vec_perm(u##ROW1, u##ROW1, vec_lvsl(0, u_ptr + (UV_ROW)*uv_stride)); \
414 :			u##ROW1 = vec_sel(u##ROW1, vec_ld(0, u_ptr + (UV_ROW)*uv_stride), t); \
415 :			vec_st(u##ROW1, 0, u_ptr + (UV_ROW)*uv_stride); \
416 :			\
417 :			v##ROW1 = vec_avg(v##ROW1, v##ROW2); \
418 :			t = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); \
419 :			t = vec_perm(t, t, vec_lvsl(0, v_ptr + (UV_ROW)*uv_stride)); \
420 :			v##ROW1 = vec_perm(v##ROW1, v##ROW1, vec_lvsl(0, v_ptr + (UV_ROW)*uv_stride)); \
421 :			v##ROW1 = vec_sel(v##ROW1, vec_ld(0, v_ptr + (UV_ROW)*uv_stride), t); \
422 :			vec_st(v##ROW1, 0, v_ptr + (UV_ROW)*uv_stride);
423 :
424 :
425 :			#define YUYV_TO_YV12_ALTIVEC_ROW(SIZE,C1,C2,C3,C4) \
426 :			/nothing/
427 :
428 :			#define YUYV_TO_YV12_ALTIVEC(SIZE,C1,C2,C3,C4) \
429 :			vec_dstt(x_ptr, prefetch_constant, 0); \
430 :			vec_dstt(x_ptr + (x_stride << 1), prefetch_constant, 1); \
431 :			\
432 :			READ_YUYV_Y_ALTIVEC (0, C1,C2,C3,C4) \
433 :			READ_YUYV_Y_ALTIVEC (1, C1,C2,C3,C4) \
434 :			READ_YUYV_UV_ALTIVEC(0, 0, 1, C1,C2,C3,C4)
435 :
436 :			MAKE_COLORSPACE_ALTIVEC_FROM_RGB(bgra_to_yv12_altivec_c, 4, 8, 2, RGB_TO_YV12_ALTIVEC, 2, 1, 0, 0)
437 :			MAKE_COLORSPACE_ALTIVEC_FROM_RGB(abgr_to_yv12_altivec_c, 4, 8, 2, RGB_TO_YV12_ALTIVEC, 3, 2, 1, 0)
438 :			MAKE_COLORSPACE_ALTIVEC_FROM_RGB(rgba_to_yv12_altivec_c, 4, 8, 2, RGB_TO_YV12_ALTIVEC, 0, 1, 2, 0)
439 :			MAKE_COLORSPACE_ALTIVEC_FROM_RGB(argb_to_yv12_altivec_c, 4, 8, 2, RGB_TO_YV12_ALTIVEC, 1, 2, 3, 0)
440 :
441 :			MAKE_COLORSPACE_ALTIVEC_FROM_YUV(yuyv_to_yv12_altivec_c, 2, 16, 2, YUYV_TO_YV12_ALTIVEC, 0, 1, 2, 3)
442 :			MAKE_COLORSPACE_ALTIVEC_FROM_YUV(uyvy_to_yv12_altivec_c, 2, 16, 2, YUYV_TO_YV12_ALTIVEC, 1, 0, 3, 2)
443 :
444 :
445 :			#define WRITE_YUYV_ALTIVEC(ROW, UV_ROW, C1,C2,C3,C4) \
446 :			p0 = vec_splat_u8(0); \
447 :			p1 = vec_splat_u8(0); \
448 :			\
449 :			y_vec = vec_perm(vec_ld(0, y_ptr + (ROW)y_stride), vec_ld(16, y_ptr + (ROW)y_stride), vec_lvsl(0, y_ptr + (ROW)*y_stride)); \
450 :			/* C1 */ \
451 :			t = vec_perm(y_vec, y_vec, vec_sl(vec_lvsl(0, (unsigned char*)0), vec_splat_u8(1))); \
452 :			mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C1, (unsigned char*)0)); \
453 :			\
454 :			p0 = vec_sel(p0, vec_perm(t, t, m4), mask); \
455 :			p1 = vec_sel(p1, vec_perm(t, t, vec_add(m4, vec4)), mask); \
456 :			\
457 :			/* C3 */ \
458 :			t = vec_perm(y_vec, y_vec, vec_add(vec_sl(vec_lvsl(0, (unsigned char*)0), vec_splat_u8(1)), vec_splat_u8(1))); \
459 :			mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C3, (unsigned char*)0)); \
460 :			\
461 :			p0 = vec_sel(p0, vec_perm(t, t, m4), mask); \
462 :			p1 = vec_sel(p1, vec_perm(t, t, vec_add(m4, vec4)), mask); \
463 :			\
464 :			/* C2 */ \
465 :			u_vec = vec_perm(vec_ld(0,u_ptr), vec_ld(16, u_ptr), vec_lvsl(0, u_ptr)); \
466 :			mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C2, (unsigned char*)0)); \
467 :			\
468 :			p0 = vec_sel(p0, vec_perm(u_vec, u_vec, m4), mask); \
469 :			p1 = vec_sel(p1, vec_perm(u_vec, u_vec, vec_add(m4, vec4)), mask); \
470 :			\
471 :			/* C4 */ \
472 :			v_vec = vec_perm(vec_ld(0, v_ptr), vec_ld(16, v_ptr), vec_lvsl(0, v_ptr)); \
473 :			mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C4, (unsigned char*)0)); \
474 :			\
475 :			p0 = vec_sel(p0, vec_perm(v_vec, v_vec, m4), mask); \
476 :			p1 = vec_sel(p1, vec_perm(v_vec, v_vec, vec_add(m4, vec4)), mask); \
477 :			\
478 :			vec_st(p0, 0, x_ptr + (ROW)*x_stride); \
479 :			vec_st(p1, 16, x_ptr + (ROW)*x_stride)
480 :
481 :
482 :			#define YV12_TO_YUYV_ALTIVEC_ROW(SIZE,C1,C2,C3,C4) \
483 :			/nothing/
484 :
485 :			#define YV12_TO_YUYV_ALTIVEC(SIZE,C1,C2,C3,C4) \
486 :			vec_dstt(y_ptr, prefetch_constant_y, 0); \
487 :			vec_dstt(u_ptr, prefetch_constant_uv, 1); \
488 :			vec_dstt(v_ptr, prefetch_constant_uv, 2); \
489 :			\
490 :			WRITE_YUYV_ALTIVEC(0, 0, C1,C2,C3,C4); \
491 :			WRITE_YUYV_ALTIVEC(1, 0, C1,C2,C3,C4)
492 :
493 :
494 :			MAKE_COLORSPACE_ALTIVEC_TO_YUV(yv12_to_yuyv_altivec_c, 2, 16, 2, YV12_TO_YUYV_ALTIVEC, 0, 1, 2, 3)
495 :			MAKE_COLORSPACE_ALTIVEC_TO_YUV(yv12_to_uyvy_altivec_c, 2, 16, 2, YV12_TO_YUYV_ALTIVEC, 1, 0, 3, 2)
496 :

No admin address has been configured	ViewVC Help
Powered by ViewVC 1.0.4