19 |
* along with this program ; if not, write to the Free Software |
* along with this program ; if not, write to the Free Software |
20 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
21 |
* |
* |
22 |
* $Id: colorspace_altivec.c,v 1.2 2004-10-17 10:20:15 edgomez Exp $ |
* $Id: colorspace_altivec.c,v 1.4 2005-03-18 18:01:34 edgomez Exp $ |
23 |
* |
* |
24 |
****************************************************************************/ |
****************************************************************************/ |
25 |
|
|
28 |
#endif |
#endif |
29 |
|
|
30 |
#include "../../portab.h" |
#include "../../portab.h" |
31 |
|
#include "../colorspace.h" |
32 |
|
|
33 |
#undef DEBUG |
#undef DEBUG |
34 |
#include <stdio.h> |
#include <stdio.h> |
172 |
vector unsigned char y_vec; \ |
vector unsigned char y_vec; \ |
173 |
vector unsigned char u_vec; \ |
vector unsigned char u_vec; \ |
174 |
vector unsigned char v_vec; \ |
vector unsigned char v_vec; \ |
175 |
vector unsigned char p0, p1; \ |
vector unsigned char p0, p1, ptmp; \ |
176 |
vector unsigned char mask; \ |
vector unsigned char mask; \ |
177 |
vector unsigned char mask_stencil; \ |
vector unsigned char mask_stencil; \ |
178 |
vector unsigned char t; \ |
vector unsigned char t; \ |
450 |
mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C1, (unsigned char*)0)); \ |
mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C1, (unsigned char*)0)); \ |
451 |
\ |
\ |
452 |
p0 = vec_sel(p0, vec_perm(t, t, m4), mask); \ |
p0 = vec_sel(p0, vec_perm(t, t, m4), mask); \ |
453 |
p1 = vec_sel(p1, vec_perm(t, t, vec_add(m4, vec4)), mask); \ |
ptmp = vec_perm(t,t, vec_add(m4, vec4));\ |
454 |
|
p1 = vec_sel(p1, ptmp, mask); \ |
455 |
\ |
\ |
456 |
/* C3 */ \ |
/* C3 */ \ |
457 |
t = vec_perm(y_vec, y_vec, vec_add(vec_sl(vec_lvsl(0, (unsigned char*)0), vec_splat_u8(1)), vec_splat_u8(1))); \ |
ptmp = vec_add(vec_sl(vec_lvsl(0, (unsigned char*)0), vec_splat_u8(1)), vec_splat_u8(1)); \ |
458 |
|
t = vec_perm(y_vec, y_vec, ptmp); \ |
459 |
mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C3, (unsigned char*)0)); \ |
mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C3, (unsigned char*)0)); \ |
460 |
\ |
\ |
461 |
p0 = vec_sel(p0, vec_perm(t, t, m4), mask); \ |
p0 = vec_sel(p0, vec_perm(t, t, m4), mask); \ |
462 |
p1 = vec_sel(p1, vec_perm(t, t, vec_add(m4, vec4)), mask); \ |
ptmp = vec_perm(t, t, vec_add(m4, vec4)); \ |
463 |
|
p1 = vec_sel(p1, ptmp, mask); \ |
464 |
\ |
\ |
465 |
/* C2 */ \ |
/* C2 */ \ |
466 |
u_vec = vec_perm(vec_ld(0,u_ptr), vec_ld(16, u_ptr), vec_lvsl(0, u_ptr)); \ |
u_vec = vec_perm(vec_ld(0,u_ptr), vec_ld(16, u_ptr), vec_lvsl(0, u_ptr)); \ |
467 |
mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C2, (unsigned char*)0)); \ |
mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C2, (unsigned char*)0)); \ |
468 |
\ |
\ |
469 |
p0 = vec_sel(p0, vec_perm(u_vec, u_vec, m4), mask); \ |
p0 = vec_sel(p0, vec_perm(u_vec, u_vec, m4), mask); \ |
470 |
p1 = vec_sel(p1, vec_perm(u_vec, u_vec, vec_add(m4, vec4)), mask); \ |
ptmp = vec_perm(u_vec, u_vec, vec_add(m4, vec4)); \ |
471 |
|
p1 = vec_sel(p1, ptmp, mask); \ |
472 |
\ |
\ |
473 |
/* C4 */ \ |
/* C4 */ \ |
474 |
v_vec = vec_perm(vec_ld(0, v_ptr), vec_ld(16, v_ptr), vec_lvsl(0, v_ptr)); \ |
v_vec = vec_perm(vec_ld(0, v_ptr), vec_ld(16, v_ptr), vec_lvsl(0, v_ptr)); \ |
475 |
mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C4, (unsigned char*)0)); \ |
mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C4, (unsigned char*)0)); \ |
476 |
\ |
\ |
477 |
p0 = vec_sel(p0, vec_perm(v_vec, v_vec, m4), mask); \ |
p0 = vec_sel(p0, vec_perm(v_vec, v_vec, m4), mask); \ |
478 |
p1 = vec_sel(p1, vec_perm(v_vec, v_vec, vec_add(m4, vec4)), mask); \ |
ptmp = vec_perm(v_vec, v_vec, vec_add(m4, vec4)); \ |
479 |
|
p1 = vec_sel(p1, ptmp, mask); \ |
480 |
\ |
\ |
481 |
vec_st(p0, 0, x_ptr + (ROW)*x_stride); \ |
vec_st(p0, 0, x_ptr + (ROW)*x_stride); \ |
482 |
vec_st(p1, 16, x_ptr + (ROW)*x_stride) |
vec_st(p1, 16, x_ptr + (ROW)*x_stride) |
494 |
WRITE_YUYV_ALTIVEC(1, 0, C1,C2,C3,C4) |
WRITE_YUYV_ALTIVEC(1, 0, C1,C2,C3,C4) |
495 |
|
|
496 |
|
|
497 |
MAKE_COLORSPACE_ALTIVEC_TO_YUV(yv12_to_yuyv_altivec_c, 2, 16, 2, YV12_TO_YUYV_ALTIVEC, 0, 1, 2, 3) |
MAKE_COLORSPACE_ALTIVEC_TO_YUV(yv12_to_yuyv_altivec_unaligned_c, 2, 16, 2, YV12_TO_YUYV_ALTIVEC, 0, 1, 2, 3) |
498 |
MAKE_COLORSPACE_ALTIVEC_TO_YUV(yv12_to_uyvy_altivec_c, 2, 16, 2, YV12_TO_YUYV_ALTIVEC, 1, 0, 3, 2) |
MAKE_COLORSPACE_ALTIVEC_TO_YUV(yv12_to_uyvy_altivec_unaligned_c, 2, 16, 2, YV12_TO_YUYV_ALTIVEC, 1, 0, 3, 2) |
499 |
|
|
500 |
|
|
501 |
|
/* This intermediate functions are used because gcc v3.3 seems to produces an invalid register usage with the fallback directly integrated in the altivec routine (!!!) */ |
502 |
|
|
503 |
|
#define CHECK_COLORSPACE_ALTIVEC_TO_YUV(NAME,FAST,FALLBACK) \ |
504 |
|
void \ |
505 |
|
NAME(uint8_t *x_ptr, int x_stride, \ |
506 |
|
uint8_t *y_ptr, uint8_t *u_ptr, uint8_t *v_ptr, \ |
507 |
|
int y_stride, int uv_stride, \ |
508 |
|
int width, int height, int vflip) \ |
509 |
|
{\ |
510 |
|
if( ((uint32_t)x_ptr & 15) | (x_stride & 15) )\ |
511 |
|
FALLBACK(x_ptr, x_stride, y_ptr, u_ptr, v_ptr, y_stride, uv_stride, width, height, vflip);\ |
512 |
|
else\ |
513 |
|
FAST(x_ptr, x_stride, y_ptr, u_ptr, v_ptr, y_stride, uv_stride, width, height, vflip);\ |
514 |
|
} |
515 |
|
|
516 |
|
CHECK_COLORSPACE_ALTIVEC_TO_YUV(yv12_to_yuyv_altivec_c, yv12_to_yuyv_altivec_unaligned_c, yv12_to_yuyv_c) |
517 |
|
CHECK_COLORSPACE_ALTIVEC_TO_YUV(yv12_to_uyvy_altivec_c, yv12_to_uyvy_altivec_unaligned_c, yv12_to_uyvy_c) |