Parent Directory | Revision Log
Revision 1988 - (view) (download)
1 : | edgomez | 1412 | /***************************************************************************** |
2 : | * | ||
3 : | * XVID MPEG-4 VIDEO CODEC | ||
4 : | * - Colorspace conversion functions with altivec optimization - | ||
5 : | * | ||
6 : | * Copyright(C) 2004 Christoph NŠgeli <chn@kbw.ch> | ||
7 : | * | ||
8 : | * This program is free software ; you can redistribute it and/or modify | ||
9 : | * it under the terms of the GNU General Public License as published by | ||
10 : | * the Free Software Foundation ; either version 2 of the License, or | ||
11 : | * (at your option) any later version. | ||
12 : | * | ||
13 : | * This program is distributed in the hope that it will be useful, | ||
14 : | * but WITHOUT ANY WARRANTY ; without even the implied warranty of | ||
15 : | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 : | * GNU General Public License for more details. | ||
17 : | * | ||
18 : | * You should have received a copy of the GNU General Public License | ||
19 : | * along with this program ; if not, write to the Free Software | ||
20 : | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 : | * | ||
22 : | Isibaar | 1988 | * $Id$ |
23 : | edgomez | 1412 | * |
24 : | ****************************************************************************/ | ||
25 : | |||
26 : | #ifdef HAVE_ALTIVEC_H | ||
27 : | #include <altivec.h> | ||
28 : | #endif | ||
29 : | |||
30 : | #include "../../portab.h" | ||
31 : | edgomez | 1606 | #include "../colorspace.h" |
32 : | edgomez | 1412 | |
33 : | #undef DEBUG | ||
34 : | #include <stdio.h> | ||
35 : | |||
36 : | |||
37 : | /********** generic altivec RGB to YV12 colorspace macro **********/ | ||
38 : | |||
39 : | #define MAKE_COLORSPACE_ALTIVEC_FROM_RGB(NAME,SIZE,PIXELS,VPIXELS,FUNC,C1,C2,C3,C4) \ | ||
40 : | void \ | ||
41 : | NAME(uint8_t *x_ptr, int x_stride, \ | ||
42 : | uint8_t *y_ptr, uint8_t *u_ptr, uint8_t *v_ptr, \ | ||
43 : | int y_stride, int uv_stride, \ | ||
44 : | int width, int height, int vflip) \ | ||
45 : | { \ | ||
46 : | int fixed_width = (width + 15) & ~15; \ | ||
47 : | int x_dif = x_stride - (SIZE) * fixed_width; \ | ||
48 : | int y_dif = y_stride - fixed_width; \ | ||
49 : | int uv_dif = uv_stride - (fixed_width / 2); \ | ||
50 : | int x, y; \ | ||
51 : | unsigned prefetch_constant; \ | ||
52 : | \ | ||
53 : | register vector unsigned int shift_consts[4]; \ | ||
54 : | \ | ||
55 : | vector unsigned char y_add; \ | ||
56 : | vector unsigned char u_add; \ | ||
57 : | vector unsigned char v_add; \ | ||
58 : | \ | ||
59 : | vector unsigned short vec_fix_ins[3]; \ | ||
60 : | \ | ||
61 : | vec_st(vec_ldl(0, &g_vec_fix_ins[0]), 0, &vec_fix_ins[0]); \ | ||
62 : | vec_st(vec_ldl(0, &g_vec_fix_ins[1]), 0, &vec_fix_ins[1]); \ | ||
63 : | vec_st(vec_ldl(0, &g_vec_fix_ins[2]), 0, &vec_fix_ins[2]); \ | ||
64 : | \ | ||
65 : | shift_consts[0] = vec_add(vec_splat_u32(12), vec_splat_u32(12)); \ | ||
66 : | shift_consts[1] = vec_add(vec_splat_u32(8), vec_splat_u32(8)); \ | ||
67 : | shift_consts[2] = vec_splat_u32(8); \ | ||
68 : | shift_consts[3] = vec_splat_u32(0); \ | ||
69 : | \ | ||
70 : | prefetch_constant = build_prefetch(16, 2, (short)x_stride); \ | ||
71 : | vec_dstt(x_ptr, prefetch_constant, 0); \ | ||
72 : | vec_dstt(x_ptr + (x_stride << 1), prefetch_constant, 1); \ | ||
73 : | \ | ||
74 : | *((unsigned char*)&y_add) = Y_ADD_IN; \ | ||
75 : | *((unsigned char*)&u_add) = U_ADD_IN; \ | ||
76 : | *((unsigned char*)&v_add) = V_ADD_IN; \ | ||
77 : | \ | ||
78 : | y_add = vec_splat(y_add, 0); \ | ||
79 : | u_add = vec_splat(u_add, 0); \ | ||
80 : | v_add = vec_splat(v_add, 0); \ | ||
81 : | \ | ||
82 : | if(vflip) { \ | ||
83 : | x_ptr += (height - 1) * x_stride; \ | ||
84 : | x_dif = -(SIZE) * fixed_width - x_stride; \ | ||
85 : | x_stride = -x_stride; \ | ||
86 : | } \ | ||
87 : | \ | ||
88 : | for(y = 0; y < height; y += (VPIXELS)) { \ | ||
89 : | FUNC##_ROW(SIZE,C1,C2,C3,C4); \ | ||
90 : | for(x = 0; x < fixed_width; x += (PIXELS)) { \ | ||
91 : | FUNC(SIZE,C1,C2,C3,C4); \ | ||
92 : | x_ptr += (PIXELS)*(SIZE); \ | ||
93 : | y_ptr += (PIXELS); \ | ||
94 : | u_ptr += (PIXELS)/2; \ | ||
95 : | v_ptr += (PIXELS)/2; \ | ||
96 : | } \ | ||
97 : | x_ptr += x_dif + (VPIXELS-1) * x_stride; \ | ||
98 : | y_ptr += y_dif + (VPIXELS-1) * y_stride; \ | ||
99 : | u_ptr += uv_dif + ((VPIXELS/2)-1)*uv_stride; \ | ||
100 : | v_ptr += uv_dif + ((VPIXELS/2)-1)*uv_stride; \ | ||
101 : | } \ | ||
102 : | vec_dssall(); \ | ||
103 : | } | ||
104 : | |||
105 : | |||
106 : | /********** generic altivec YUV to YV12 colorspace macro **********/ | ||
107 : | |||
108 : | #define MAKE_COLORSPACE_ALTIVEC_FROM_YUV(NAME,SIZE,PIXELS,VPIXELS,FUNC,C1,C2,C3,C4) \ | ||
109 : | void \ | ||
110 : | NAME(uint8_t *x_ptr, int x_stride, \ | ||
111 : | uint8_t *y_ptr, uint8_t *u_ptr, uint8_t *v_ptr, \ | ||
112 : | int y_stride, int uv_stride, \ | ||
113 : | int width, int height, int vflip) \ | ||
114 : | { \ | ||
115 : | int fixed_width = (width + 15) & ~15; \ | ||
116 : | int x_dif = x_stride - (SIZE)*fixed_width; \ | ||
117 : | int y_dif = y_stride - fixed_width; \ | ||
118 : | int uv_dif = uv_stride - (fixed_width / 2); \ | ||
119 : | int x, y; \ | ||
120 : | \ | ||
121 : | unsigned prefetch_constant; \ | ||
122 : | \ | ||
123 : | vector unsigned int p0, p1; \ | ||
124 : | vector unsigned char lum0, lum1; \ | ||
125 : | vector unsigned char u0, u1; \ | ||
126 : | vector unsigned char v0, v1; \ | ||
127 : | vector unsigned char t; \ | ||
128 : | \ | ||
129 : | prefetch_constant = build_prefetch(16, 2, (short)x_stride); \ | ||
130 : | vec_dstt(x_ptr, prefetch_constant, 0); \ | ||
131 : | vec_dstt(x_ptr + (x_stride << 1), prefetch_constant, 1); \ | ||
132 : | \ | ||
133 : | if(vflip) { \ | ||
134 : | x_ptr += (height - 1) * x_stride; \ | ||
135 : | x_dif = -(SIZE)*fixed_width - x_stride; \ | ||
136 : | x_stride = -x_stride; \ | ||
137 : | } \ | ||
138 : | \ | ||
139 : | for(y = 0; y < height; y += (VPIXELS)) { \ | ||
140 : | FUNC##_ROW(SIZE,C1,C2,C3,C4); \ | ||
141 : | for(x = 0; x < fixed_width; x += (PIXELS)) { \ | ||
142 : | FUNC(SIZE,C1,C2,C3,C4); \ | ||
143 : | x_ptr += (PIXELS)*(SIZE); \ | ||
144 : | y_ptr += (PIXELS); \ | ||
145 : | u_ptr += (PIXELS)/2; \ | ||
146 : | v_ptr += (PIXELS)/2; \ | ||
147 : | } \ | ||
148 : | x_ptr += x_dif + (VPIXELS-1) * x_stride; \ | ||
149 : | y_ptr += y_dif + (VPIXELS-1) * y_stride; \ | ||
150 : | u_ptr += uv_dif + ((VPIXELS/2)-1) * uv_stride; \ | ||
151 : | v_ptr += uv_dif + ((VPIXELS/2)-1) * uv_stride; \ | ||
152 : | } \ | ||
153 : | vec_dssall(); \ | ||
154 : | } | ||
155 : | |||
156 : | |||
157 : | /********** generic altivec YV12 to YUV colorspace macro **********/ | ||
158 : | |||
159 : | #define MAKE_COLORSPACE_ALTIVEC_TO_YUV(NAME,SIZE,PIXELS,VPIXELS,FUNC,C1,C2,C3,C4) \ | ||
160 : | void \ | ||
161 : | NAME(uint8_t *x_ptr, int x_stride, \ | ||
162 : | uint8_t *y_ptr, uint8_t *u_ptr, uint8_t *v_ptr, \ | ||
163 : | int y_stride, int uv_stride, \ | ||
164 : | int width, int height, int vflip) \ | ||
165 : | { \ | ||
166 : | int fixed_width = (width + 15) & ~15; \ | ||
167 : | int x_dif = x_stride - (SIZE)*fixed_width; \ | ||
168 : | int y_dif = y_stride - fixed_width; \ | ||
169 : | int uv_dif = uv_stride - (fixed_width / 2); \ | ||
170 : | int x, y; \ | ||
171 : | \ | ||
172 : | vector unsigned char y_vec; \ | ||
173 : | vector unsigned char u_vec; \ | ||
174 : | vector unsigned char v_vec; \ | ||
175 : | edgomez | 1570 | vector unsigned char p0, p1, ptmp; \ |
176 : | edgomez | 1412 | vector unsigned char mask; \ |
177 : | vector unsigned char mask_stencil; \ | ||
178 : | vector unsigned char t; \ | ||
179 : | vector unsigned char m4; \ | ||
180 : | vector unsigned char vec4; \ | ||
181 : | \ | ||
182 : | unsigned prefetch_constant_y; \ | ||
183 : | unsigned prefetch_constant_uv; \ | ||
184 : | \ | ||
185 : | prefetch_constant_y = build_prefetch(16, 4, (short)y_stride); \ | ||
186 : | prefetch_constant_uv = build_prefetch(16, 2, (short)uv_stride); \ | ||
187 : | \ | ||
188 : | vec_dstt(y_ptr, prefetch_constant_y, 0); \ | ||
189 : | vec_dstt(u_ptr, prefetch_constant_uv, 1); \ | ||
190 : | vec_dstt(v_ptr, prefetch_constant_uv, 2); \ | ||
191 : | \ | ||
192 : | mask_stencil = (vector unsigned char)vec_mergeh( (vector unsigned short)vec_mergeh(vec_splat_u8(-1), vec_splat_u8(0)), vec_splat_u16(0) ); \ | ||
193 : | m4 = vec_sr(vec_lvsl(0, (unsigned char*)0), vec_splat_u8(2)); \ | ||
194 : | vec4 = vec_splat_u8(4); \ | ||
195 : | \ | ||
196 : | if(vflip) { \ | ||
197 : | x_ptr += (height - 1) * x_stride; \ | ||
198 : | x_dif = -(SIZE)*fixed_width - x_stride; \ | ||
199 : | x_stride = -x_stride; \ | ||
200 : | } \ | ||
201 : | \ | ||
202 : | for(y = 0; y < height; y += (VPIXELS)) { \ | ||
203 : | FUNC##_ROW(SIZE,C1,C2,C3,C4); \ | ||
204 : | for(x = 0; x < fixed_width; x += (PIXELS)) { \ | ||
205 : | FUNC(SIZE,C1,C2,C3,C4); \ | ||
206 : | x_ptr += (PIXELS)*(SIZE); \ | ||
207 : | y_ptr += (PIXELS); \ | ||
208 : | u_ptr += (PIXELS)/2; \ | ||
209 : | v_ptr += (PIXELS)/2; \ | ||
210 : | } \ | ||
211 : | x_ptr += x_dif + (VPIXELS-1) * x_stride; \ | ||
212 : | y_ptr += y_dif + (VPIXELS-1) * y_stride; \ | ||
213 : | u_ptr += uv_dif + ((VPIXELS/2)-1) * uv_stride; \ | ||
214 : | v_ptr += uv_dif + ((VPIXELS/2)-1) * uv_stride; \ | ||
215 : | } \ | ||
216 : | vec_dssall(); \ | ||
217 : | } | ||
218 : | |||
219 : | /********** colorspace input (xxx_to_yv12) functions **********/ | ||
220 : | |||
221 : | /* rgb -> yuv def's | ||
222 : | |||
223 : | this following constants are "official spec" | ||
224 : | Video Demystified" (ISBN 1-878707-09-4) | ||
225 : | |||
226 : | rgb<->yuv _is_ lossy, since most programs do the conversion differently | ||
227 : | |||
228 : | SCALEBITS/FIX taken from ffmpeg | ||
229 : | */ | ||
230 : | |||
231 : | #define Y_R_IN 0.257 | ||
232 : | #define Y_G_IN 0.504 | ||
233 : | #define Y_B_IN 0.098 | ||
234 : | #define Y_ADD_IN 16 | ||
235 : | |||
236 : | #define U_R_IN 0.148 | ||
237 : | #define U_G_IN 0.291 | ||
238 : | #define U_B_IN 0.439 | ||
239 : | #define U_ADD_IN 128 | ||
240 : | |||
241 : | #define V_R_IN 0.439 | ||
242 : | #define V_G_IN 0.368 | ||
243 : | #define V_B_IN 0.071 | ||
244 : | #define V_ADD_IN 128 | ||
245 : | |||
246 : | #define SCALEBITS_IN 8 | ||
247 : | #define FIX_IN(x) ((uint16_t) ((x) * (1L<<SCALEBITS_IN) + 0.5)) | ||
248 : | |||
249 : | |||
250 : | static inline unsigned | ||
251 : | build_prefetch(unsigned char block_size, unsigned char block_count, short stride) | ||
252 : | edgomez | 1557 | { |
253 : | edgomez | 1412 | return ((block_size << 24) | (block_count << 16) | stride); |
254 : | } | ||
255 : | |||
256 : | const static vector unsigned short g_vec_fix_ins [3] = { | ||
257 : | (vector unsigned short)AVV( SCALEBITS_IN, FIX_IN(Y_R_IN), FIX_IN(Y_G_IN), FIX_IN(Y_B_IN), 0, 0, 0, 0), | ||
258 : | (vector unsigned short)AVV( SCALEBITS_IN + 2, -FIX_IN(U_R_IN), -FIX_IN(U_G_IN), FIX_IN(U_B_IN), 0, 0, 0, 0), | ||
259 : | (vector unsigned short)AVV( SCALEBITS_IN + 2, FIX_IN(V_R_IN), -FIX_IN(V_G_IN), -FIX_IN(V_B_IN), 0, 0, 0, 0) | ||
260 : | }; | ||
261 : | |||
262 : | /* RGB Input */ | ||
263 : | #define READ_RGB_Y_ALTIVEC(SIZE,ROW,UVID,C1,C2,C3,C4) \ | ||
264 : | p0 = vec_ld(0, (unsigned int*)(x_ptr + (ROW) * x_stride)); \ | ||
265 : | p1 = vec_ld(16, (unsigned int*)(x_ptr + (ROW) * x_stride)); \ | ||
266 : | \ | ||
267 : | mask = vec_mergeh((vector unsigned char)shift_consts[3], vec_splat_u8(-1)); \ | ||
268 : | mask = (vector unsigned char)vec_mergeh((vector unsigned short)shift_consts[3], (vector unsigned short)mask); \ | ||
269 : | \ | ||
270 : | t0 = vec_sr(p0, shift_consts[C1]); \ | ||
271 : | t0 = vec_sel(shift_consts[3], t0, (vector unsigned int)mask); \ | ||
272 : | t1 = vec_sr(p1, shift_consts[C1]); \ | ||
273 : | t1 = vec_sel(shift_consts[3], t1, (vector unsigned int)mask); \ | ||
274 : | r = vec_pack(t0, t1); \ | ||
275 : | r##UVID = vec_add(r##UVID, r); \ | ||
276 : | \ | ||
277 : | t0 = vec_sr(p0, shift_consts[C2]); \ | ||
278 : | t0 = vec_sel(shift_consts[3], t0, (vector unsigned int)mask); \ | ||
279 : | t1 = vec_sr(p1, shift_consts[C2]); \ | ||
280 : | t1 = vec_sel(shift_consts[3], t1, (vector unsigned int)mask); \ | ||
281 : | g = vec_pack(t0, t1); \ | ||
282 : | g##UVID = vec_add(g##UVID, g); \ | ||
283 : | \ | ||
284 : | t0 = vec_sr(p0, shift_consts[C3]); \ | ||
285 : | t0 = vec_sel(shift_consts[3], t0, (vector unsigned int)mask); \ | ||
286 : | t1 = vec_sr(p1, shift_consts[C3]); \ | ||
287 : | t1 = vec_sel(shift_consts[3], t1, (vector unsigned int)mask); \ | ||
288 : | b = vec_pack(t0, t1); \ | ||
289 : | b##UVID = vec_add(b##UVID, b); \ | ||
290 : | \ | ||
291 : | lum = vec_mladd(r, vec_splat(vec_fix_ins[0], 1), (vector unsigned short)shift_consts[3]); \ | ||
292 : | lum = vec_mladd(g, vec_splat(vec_fix_ins[0], 2), lum); \ | ||
293 : | lum = vec_mladd(b, vec_splat(vec_fix_ins[0], 3), lum); \ | ||
294 : | lum = vec_sr(lum, vec_splat(vec_fix_ins[0], 0)); \ | ||
295 : | y_vec = vec_pack(lum, (vector unsigned short)shift_consts[3]); \ | ||
296 : | y_vec = vec_add(y_vec, y_add); \ | ||
297 : | \ | ||
298 : | mask = vec_pack((vector unsigned short)shift_consts[3], vec_splat_u16(-1)); \ | ||
299 : | mask = vec_perm(mask, mask, vec_lvsl(0, y_ptr + (ROW)*y_stride)); \ | ||
300 : | y_vec = vec_perm(y_vec, y_vec, vec_lvsl(0, y_ptr + (ROW)*y_stride)); \ | ||
301 : | y_vec = vec_sel(y_vec, vec_ld(0, y_ptr + (ROW)*y_stride), mask); \ | ||
302 : | vec_st(y_vec, 0, y_ptr + (ROW)*y_stride) | ||
303 : | |||
304 : | #define READ_RGB_UV_ALTIVEC(UV_ROW,UVID) \ | ||
305 : | r##UVID = (vector unsigned short)vec_sum4s((vector signed short)r##UVID, (vector signed int)shift_consts[3]); \ | ||
306 : | g##UVID = (vector unsigned short)vec_sum4s((vector signed short)g##UVID, (vector signed int)shift_consts[3]); \ | ||
307 : | b##UVID = (vector unsigned short)vec_sum4s((vector signed short)b##UVID, (vector signed int)shift_consts[3]); \ | ||
308 : | \ | ||
309 : | t3 = vec_mulo((vector signed short)r##UVID, (vector signed short)vec_splat(vec_fix_ins[1], 1)); \ | ||
310 : | t3 = vec_add(t3, vec_mulo((vector signed short)g##UVID, (vector signed short)vec_splat(vec_fix_ins[1], 2))); \ | ||
311 : | t3 = vec_add(t3, vec_mulo((vector signed short)b##UVID, (vector signed short)vec_splat(vec_fix_ins[1], 3))); \ | ||
312 : | t3 = vec_sr(t3, (vector unsigned int)vec_mergeh((vector unsigned short)shift_consts[3], vec_splat(vec_fix_ins[1], 0))); \ | ||
313 : | \ | ||
314 : | u_vec = vec_pack(vec_pack((vector unsigned int)t3, shift_consts[3]), (vector unsigned short)shift_consts[3]); \ | ||
315 : | u_vec = vec_add(u_vec, u_add); \ | ||
316 : | \ | ||
317 : | mask = vec_pack(vec_splat_u16(-1), (vector unsigned short)shift_consts[3]); \ | ||
318 : | mask = (vector unsigned char)vec_pack((vector unsigned int)mask, shift_consts[3]); \ | ||
319 : | mask = vec_perm(mask, mask, vec_lvsr(0, u_ptr + (UV_ROW)*uv_stride)); \ | ||
320 : | u_vec = vec_perm(u_vec, u_vec, vec_lvsr(0, u_ptr + (UV_ROW)*uv_stride)); \ | ||
321 : | u_vec = vec_sel(vec_ld(0, u_ptr + (UV_ROW)*uv_stride), u_vec, mask); \ | ||
322 : | vec_st(u_vec, 0, u_ptr + (UV_ROW)*uv_stride); \ | ||
323 : | \ | ||
324 : | t3 = vec_mulo((vector signed short)r##UVID, (vector signed short)vec_splat(vec_fix_ins[2], 1)); \ | ||
325 : | t3 = vec_add(t3, vec_mulo((vector signed short)g##UVID, (vector signed short)vec_splat(vec_fix_ins[2], 2))); \ | ||
326 : | t3 = vec_add(t3, vec_mulo((vector signed short)b##UVID, (vector signed short)vec_splat(vec_fix_ins[2], 3))); \ | ||
327 : | t3 = vec_sr(t3, (vector unsigned int)vec_mergeh((vector unsigned short)shift_consts[3], vec_splat(vec_fix_ins[2], 0))); \ | ||
328 : | \ | ||
329 : | v_vec = vec_pack(vec_pack((vector unsigned int)t3, shift_consts[3]), (vector unsigned short)shift_consts[3]); \ | ||
330 : | v_vec = vec_add(v_vec, v_add); \ | ||
331 : | \ | ||
332 : | mask = vec_pack(vec_splat_u16(-1), (vector unsigned short)shift_consts[3]); \ | ||
333 : | mask = (vector unsigned char)vec_pack((vector unsigned int)mask, shift_consts[3]); \ | ||
334 : | mask = vec_perm(mask, mask, vec_lvsr(0, v_ptr + (UV_ROW) * uv_stride)); \ | ||
335 : | v_vec = vec_perm(v_vec, v_vec, vec_lvsr(0, v_ptr + (UV_ROW) * uv_stride)); \ | ||
336 : | v_vec = vec_sel(vec_ld(0, v_ptr + (UV_ROW) * uv_stride), v_vec, mask); \ | ||
337 : | vec_st(v_vec, 0, v_ptr + (UV_ROW) * uv_stride) | ||
338 : | |||
339 : | |||
340 : | #define RGB_TO_YV12_ALTIVEC_ROW(SIZE,C1,C2,C3,C4) \ | ||
341 : | /* nothing */ | ||
342 : | |||
343 : | #define RGB_TO_YV12_ALTIVEC(SIZE,C1,C2,C3,C4) \ | ||
344 : | vector unsigned int p0, p1; \ | ||
345 : | vector unsigned int t0, t1; \ | ||
346 : | vector unsigned short r, g, b, r0, g0, b0; \ | ||
347 : | vector unsigned short lum; \ | ||
348 : | vector unsigned char mask; \ | ||
349 : | vector unsigned char y_vec; \ | ||
350 : | vector unsigned char u_vec; \ | ||
351 : | vector unsigned char v_vec; \ | ||
352 : | vector signed int t3; \ | ||
353 : | \ | ||
354 : | vec_dstt(x_ptr, prefetch_constant, 0); \ | ||
355 : | vec_dstt(x_ptr + (x_stride << 1), prefetch_constant, 1); \ | ||
356 : | \ | ||
357 : | r0 = g0 = b0 = (vector unsigned short)shift_consts[3]; \ | ||
358 : | \ | ||
359 : | READ_RGB_Y_ALTIVEC(SIZE, 0, 0, C1, C2, C3, C4); \ | ||
360 : | READ_RGB_Y_ALTIVEC(SIZE, 1, 0, C1, C2, C3, C4); \ | ||
361 : | READ_RGB_UV_ALTIVEC(0, 0) | ||
362 : | |||
363 : | |||
364 : | /* YUV input */ | ||
365 : | |||
366 : | #define READ_YUYV_Y_ALTIVEC(ROW,C1,C2,C3,C4) \ | ||
367 : | p0 = vec_ld(0, (unsigned int*)(x_ptr + (ROW)*x_stride)); \ | ||
368 : | p1 = vec_ld(16, (unsigned int*)(x_ptr + (ROW)*x_stride)); \ | ||
369 : | \ | ||
370 : | t = vec_lvsl(0, (unsigned char*)0); \ | ||
371 : | t = vec_sl(t, vec_splat_u8(2)); \ | ||
372 : | t = vec_add(t, vec_splat_u8(C1)); \ | ||
373 : | \ | ||
374 : | lum0 = (vector unsigned char)vec_perm(p0, p0, t); \ | ||
375 : | lum1 = (vector unsigned char)vec_perm(p1, p1, t); \ | ||
376 : | \ | ||
377 : | t = vec_lvsl(0, (unsigned char*)0); \ | ||
378 : | t = vec_sl(t, vec_splat_u8(2)); \ | ||
379 : | t = vec_add(t, vec_splat_u8(C3)); \ | ||
380 : | \ | ||
381 : | lum0 = vec_mergeh(lum0, (vector unsigned char)vec_perm(p0, p0, t)); \ | ||
382 : | lum1 = vec_mergeh(lum1, (vector unsigned char)vec_perm(p1, p1, t)); \ | ||
383 : | \ | ||
384 : | lum0 = vec_sel(lum0, lum1, vec_pack(vec_splat_u16(0), vec_splat_u16(-1))); \ | ||
385 : | vec_st(lum0, 0, y_ptr + (ROW)*y_stride); \ | ||
386 : | \ | ||
387 : | t = vec_lvsl(0, (unsigned char*)0); \ | ||
388 : | t = vec_sl(t, vec_splat_u8(2)); \ | ||
389 : | t = vec_add(t, vec_splat_u8(C2)); \ | ||
390 : | \ | ||
391 : | lum0 = (vector unsigned char)vec_perm(p0, p0, t); \ | ||
392 : | lum1 = (vector unsigned char)vec_perm(p1, p1, t); \ | ||
393 : | lum1 = vec_perm(lum1, lum1, vec_lvsr(4, (unsigned char*)0)); \ | ||
394 : | t = vec_pack(vec_pack(vec_splat_u32(0), vec_splat_u32(-1)), vec_splat_u16(-1)); \ | ||
395 : | u##ROW = vec_sel(lum0, lum1, t); \ | ||
396 : | \ | ||
397 : | t = vec_lvsl(0, (unsigned char*)0); \ | ||
398 : | t = vec_sl(t, vec_splat_u8(2)); \ | ||
399 : | t = vec_add(t, vec_splat_u8(C4)); \ | ||
400 : | \ | ||
401 : | lum0 = (vector unsigned char)vec_perm(p0, p0, t); \ | ||
402 : | lum1 = (vector unsigned char)vec_perm(p1, p1, t); \ | ||
403 : | lum1 = vec_perm(lum1, lum1, vec_lvsr(4, (unsigned char*)0)); \ | ||
404 : | t = vec_pack(vec_pack(vec_splat_u32(0), vec_splat_u32(-1)), vec_splat_u16(-1)); \ | ||
405 : | v##ROW = vec_sel(lum0, lum1, t); | ||
406 : | |||
407 : | #define READ_YUYV_UV_ALTIVEC(UV_ROW,ROW1,ROW2,C1,C2,C3,C4) \ | ||
408 : | u##ROW1 = vec_avg(u##ROW1, u##ROW2); \ | ||
409 : | t = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); \ | ||
410 : | t = vec_perm(t, t, vec_lvsl(0, u_ptr + (UV_ROW)*uv_stride)); \ | ||
411 : | u##ROW1 = vec_perm(u##ROW1, u##ROW1, vec_lvsl(0, u_ptr + (UV_ROW)*uv_stride)); \ | ||
412 : | u##ROW1 = vec_sel(u##ROW1, vec_ld(0, u_ptr + (UV_ROW)*uv_stride), t); \ | ||
413 : | vec_st(u##ROW1, 0, u_ptr + (UV_ROW)*uv_stride); \ | ||
414 : | \ | ||
415 : | v##ROW1 = vec_avg(v##ROW1, v##ROW2); \ | ||
416 : | t = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); \ | ||
417 : | t = vec_perm(t, t, vec_lvsl(0, v_ptr + (UV_ROW)*uv_stride)); \ | ||
418 : | v##ROW1 = vec_perm(v##ROW1, v##ROW1, vec_lvsl(0, v_ptr + (UV_ROW)*uv_stride)); \ | ||
419 : | v##ROW1 = vec_sel(v##ROW1, vec_ld(0, v_ptr + (UV_ROW)*uv_stride), t); \ | ||
420 : | vec_st(v##ROW1, 0, v_ptr + (UV_ROW)*uv_stride); | ||
421 : | |||
422 : | |||
423 : | #define YUYV_TO_YV12_ALTIVEC_ROW(SIZE,C1,C2,C3,C4) \ | ||
424 : | /*nothing*/ | ||
425 : | |||
426 : | #define YUYV_TO_YV12_ALTIVEC(SIZE,C1,C2,C3,C4) \ | ||
427 : | vec_dstt(x_ptr, prefetch_constant, 0); \ | ||
428 : | vec_dstt(x_ptr + (x_stride << 1), prefetch_constant, 1); \ | ||
429 : | \ | ||
430 : | READ_YUYV_Y_ALTIVEC (0, C1,C2,C3,C4) \ | ||
431 : | READ_YUYV_Y_ALTIVEC (1, C1,C2,C3,C4) \ | ||
432 : | READ_YUYV_UV_ALTIVEC(0, 0, 1, C1,C2,C3,C4) | ||
433 : | |||
434 : | MAKE_COLORSPACE_ALTIVEC_FROM_RGB(bgra_to_yv12_altivec_c, 4, 8, 2, RGB_TO_YV12_ALTIVEC, 2, 1, 0, 0) | ||
435 : | MAKE_COLORSPACE_ALTIVEC_FROM_RGB(abgr_to_yv12_altivec_c, 4, 8, 2, RGB_TO_YV12_ALTIVEC, 3, 2, 1, 0) | ||
436 : | MAKE_COLORSPACE_ALTIVEC_FROM_RGB(rgba_to_yv12_altivec_c, 4, 8, 2, RGB_TO_YV12_ALTIVEC, 0, 1, 2, 0) | ||
437 : | MAKE_COLORSPACE_ALTIVEC_FROM_RGB(argb_to_yv12_altivec_c, 4, 8, 2, RGB_TO_YV12_ALTIVEC, 1, 2, 3, 0) | ||
438 : | |||
439 : | MAKE_COLORSPACE_ALTIVEC_FROM_YUV(yuyv_to_yv12_altivec_c, 2, 16, 2, YUYV_TO_YV12_ALTIVEC, 0, 1, 2, 3) | ||
440 : | MAKE_COLORSPACE_ALTIVEC_FROM_YUV(uyvy_to_yv12_altivec_c, 2, 16, 2, YUYV_TO_YV12_ALTIVEC, 1, 0, 3, 2) | ||
441 : | |||
442 : | |||
443 : | #define WRITE_YUYV_ALTIVEC(ROW, UV_ROW, C1,C2,C3,C4) \ | ||
444 : | p0 = vec_splat_u8(0); \ | ||
445 : | p1 = vec_splat_u8(0); \ | ||
446 : | \ | ||
447 : | y_vec = vec_perm(vec_ld(0, y_ptr + (ROW)*y_stride), vec_ld(16, y_ptr + (ROW)*y_stride), vec_lvsl(0, y_ptr + (ROW)*y_stride)); \ | ||
448 : | /* C1 */ \ | ||
449 : | t = vec_perm(y_vec, y_vec, vec_sl(vec_lvsl(0, (unsigned char*)0), vec_splat_u8(1))); \ | ||
450 : | mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C1, (unsigned char*)0)); \ | ||
451 : | \ | ||
452 : | p0 = vec_sel(p0, vec_perm(t, t, m4), mask); \ | ||
453 : | edgomez | 1570 | ptmp = vec_perm(t,t, vec_add(m4, vec4));\ |
454 : | p1 = vec_sel(p1, ptmp, mask); \ | ||
455 : | edgomez | 1412 | \ |
456 : | /* C3 */ \ | ||
457 : | edgomez | 1570 | ptmp = vec_add(vec_sl(vec_lvsl(0, (unsigned char*)0), vec_splat_u8(1)), vec_splat_u8(1)); \ |
458 : | t = vec_perm(y_vec, y_vec, ptmp); \ | ||
459 : | edgomez | 1412 | mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C3, (unsigned char*)0)); \ |
460 : | \ | ||
461 : | p0 = vec_sel(p0, vec_perm(t, t, m4), mask); \ | ||
462 : | edgomez | 1570 | ptmp = vec_perm(t, t, vec_add(m4, vec4)); \ |
463 : | p1 = vec_sel(p1, ptmp, mask); \ | ||
464 : | edgomez | 1412 | \ |
465 : | /* C2 */ \ | ||
466 : | u_vec = vec_perm(vec_ld(0,u_ptr), vec_ld(16, u_ptr), vec_lvsl(0, u_ptr)); \ | ||
467 : | mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C2, (unsigned char*)0)); \ | ||
468 : | \ | ||
469 : | p0 = vec_sel(p0, vec_perm(u_vec, u_vec, m4), mask); \ | ||
470 : | edgomez | 1570 | ptmp = vec_perm(u_vec, u_vec, vec_add(m4, vec4)); \ |
471 : | p1 = vec_sel(p1, ptmp, mask); \ | ||
472 : | edgomez | 1412 | \ |
473 : | /* C4 */ \ | ||
474 : | v_vec = vec_perm(vec_ld(0, v_ptr), vec_ld(16, v_ptr), vec_lvsl(0, v_ptr)); \ | ||
475 : | mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C4, (unsigned char*)0)); \ | ||
476 : | \ | ||
477 : | p0 = vec_sel(p0, vec_perm(v_vec, v_vec, m4), mask); \ | ||
478 : | edgomez | 1570 | ptmp = vec_perm(v_vec, v_vec, vec_add(m4, vec4)); \ |
479 : | p1 = vec_sel(p1, ptmp, mask); \ | ||
480 : | edgomez | 1412 | \ |
481 : | vec_st(p0, 0, x_ptr + (ROW)*x_stride); \ | ||
482 : | vec_st(p1, 16, x_ptr + (ROW)*x_stride) | ||
483 : | |||
484 : | |||
485 : | #define YV12_TO_YUYV_ALTIVEC_ROW(SIZE,C1,C2,C3,C4) \ | ||
486 : | /*nothing*/ | ||
487 : | |||
488 : | #define YV12_TO_YUYV_ALTIVEC(SIZE,C1,C2,C3,C4) \ | ||
489 : | vec_dstt(y_ptr, prefetch_constant_y, 0); \ | ||
490 : | vec_dstt(u_ptr, prefetch_constant_uv, 1); \ | ||
491 : | vec_dstt(v_ptr, prefetch_constant_uv, 2); \ | ||
492 : | \ | ||
493 : | WRITE_YUYV_ALTIVEC(0, 0, C1,C2,C3,C4); \ | ||
494 : | WRITE_YUYV_ALTIVEC(1, 0, C1,C2,C3,C4) | ||
495 : | |||
496 : | |||
497 : | edgomez | 1606 | MAKE_COLORSPACE_ALTIVEC_TO_YUV(yv12_to_yuyv_altivec_unaligned_c, 2, 16, 2, YV12_TO_YUYV_ALTIVEC, 0, 1, 2, 3) |
498 : | MAKE_COLORSPACE_ALTIVEC_TO_YUV(yv12_to_uyvy_altivec_unaligned_c, 2, 16, 2, YV12_TO_YUYV_ALTIVEC, 1, 0, 3, 2) | ||
499 : | edgomez | 1412 | |
500 : | edgomez | 1606 | |
501 : | /* This intermediate functions are used because gcc v3.3 seems to produces an invalid register usage with the fallback directly integrated in the altivec routine (!!!) */ | ||
502 : | |||
503 : | #define CHECK_COLORSPACE_ALTIVEC_TO_YUV(NAME,FAST,FALLBACK) \ | ||
504 : | void \ | ||
505 : | NAME(uint8_t *x_ptr, int x_stride, \ | ||
506 : | uint8_t *y_ptr, uint8_t *u_ptr, uint8_t *v_ptr, \ | ||
507 : | int y_stride, int uv_stride, \ | ||
508 : | int width, int height, int vflip) \ | ||
509 : | {\ | ||
510 : | if( ((uint32_t)x_ptr & 15) | (x_stride & 15) )\ | ||
511 : | FALLBACK(x_ptr, x_stride, y_ptr, u_ptr, v_ptr, y_stride, uv_stride, width, height, vflip);\ | ||
512 : | else\ | ||
513 : | FAST(x_ptr, x_stride, y_ptr, u_ptr, v_ptr, y_stride, uv_stride, width, height, vflip);\ | ||
514 : | } | ||
515 : | |||
516 : | CHECK_COLORSPACE_ALTIVEC_TO_YUV(yv12_to_yuyv_altivec_c, yv12_to_yuyv_altivec_unaligned_c, yv12_to_yuyv_c) | ||
517 : | CHECK_COLORSPACE_ALTIVEC_TO_YUV(yv12_to_uyvy_altivec_c, yv12_to_uyvy_altivec_unaligned_c, yv12_to_uyvy_c) |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |