19 |
* along with this program ; if not, write to the Free Software |
* along with this program ; if not, write to the Free Software |
20 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
21 |
* |
* |
22 |
* $Id: colorspace_altivec.c,v 1.1 2004-04-05 20:36:36 edgomez Exp $ |
* $Id: colorspace_altivec.c,v 1.3 2004-12-09 23:02:54 edgomez Exp $ |
23 |
* |
* |
24 |
****************************************************************************/ |
****************************************************************************/ |
25 |
|
|
171 |
vector unsigned char y_vec; \ |
vector unsigned char y_vec; \ |
172 |
vector unsigned char u_vec; \ |
vector unsigned char u_vec; \ |
173 |
vector unsigned char v_vec; \ |
vector unsigned char v_vec; \ |
174 |
vector unsigned char p0, p1; \ |
vector unsigned char p0, p1, ptmp; \ |
175 |
vector unsigned char mask; \ |
vector unsigned char mask; \ |
176 |
vector unsigned char mask_stencil; \ |
vector unsigned char mask_stencil; \ |
177 |
vector unsigned char t; \ |
vector unsigned char t; \ |
249 |
static inline unsigned |
static inline unsigned |
250 |
build_prefetch(unsigned char block_size, unsigned char block_count, short stride) |
build_prefetch(unsigned char block_size, unsigned char block_count, short stride) |
251 |
{ |
{ |
|
if(block_size > 31) |
|
|
block_size = 0; |
|
|
|
|
252 |
return ((block_size << 24) | (block_count << 16) | stride); |
return ((block_size << 24) | (block_count << 16) | stride); |
253 |
} |
} |
254 |
|
|
449 |
mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C1, (unsigned char*)0)); \ |
mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C1, (unsigned char*)0)); \ |
450 |
\ |
\ |
451 |
p0 = vec_sel(p0, vec_perm(t, t, m4), mask); \ |
p0 = vec_sel(p0, vec_perm(t, t, m4), mask); \ |
452 |
p1 = vec_sel(p1, vec_perm(t, t, vec_add(m4, vec4)), mask); \ |
ptmp = vec_perm(t,t, vec_add(m4, vec4));\ |
453 |
|
p1 = vec_sel(p1, ptmp, mask); \ |
454 |
\ |
\ |
455 |
/* C3 */ \ |
/* C3 */ \ |
456 |
t = vec_perm(y_vec, y_vec, vec_add(vec_sl(vec_lvsl(0, (unsigned char*)0), vec_splat_u8(1)), vec_splat_u8(1))); \ |
ptmp = vec_add(vec_sl(vec_lvsl(0, (unsigned char*)0), vec_splat_u8(1)), vec_splat_u8(1)); \ |
457 |
|
t = vec_perm(y_vec, y_vec, ptmp); \ |
458 |
mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C3, (unsigned char*)0)); \ |
mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C3, (unsigned char*)0)); \ |
459 |
\ |
\ |
460 |
p0 = vec_sel(p0, vec_perm(t, t, m4), mask); \ |
p0 = vec_sel(p0, vec_perm(t, t, m4), mask); \ |
461 |
p1 = vec_sel(p1, vec_perm(t, t, vec_add(m4, vec4)), mask); \ |
ptmp = vec_perm(t, t, vec_add(m4, vec4)); \ |
462 |
|
p1 = vec_sel(p1, ptmp, mask); \ |
463 |
\ |
\ |
464 |
/* C2 */ \ |
/* C2 */ \ |
465 |
u_vec = vec_perm(vec_ld(0,u_ptr), vec_ld(16, u_ptr), vec_lvsl(0, u_ptr)); \ |
u_vec = vec_perm(vec_ld(0,u_ptr), vec_ld(16, u_ptr), vec_lvsl(0, u_ptr)); \ |
466 |
mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C2, (unsigned char*)0)); \ |
mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C2, (unsigned char*)0)); \ |
467 |
\ |
\ |
468 |
p0 = vec_sel(p0, vec_perm(u_vec, u_vec, m4), mask); \ |
p0 = vec_sel(p0, vec_perm(u_vec, u_vec, m4), mask); \ |
469 |
p1 = vec_sel(p1, vec_perm(u_vec, u_vec, vec_add(m4, vec4)), mask); \ |
ptmp = vec_perm(u_vec, u_vec, vec_add(m4, vec4)); \ |
470 |
|
p1 = vec_sel(p1, ptmp, mask); \ |
471 |
\ |
\ |
472 |
/* C4 */ \ |
/* C4 */ \ |
473 |
v_vec = vec_perm(vec_ld(0, v_ptr), vec_ld(16, v_ptr), vec_lvsl(0, v_ptr)); \ |
v_vec = vec_perm(vec_ld(0, v_ptr), vec_ld(16, v_ptr), vec_lvsl(0, v_ptr)); \ |
474 |
mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C4, (unsigned char*)0)); \ |
mask = vec_perm(mask_stencil, mask_stencil, vec_lvsr(C4, (unsigned char*)0)); \ |
475 |
\ |
\ |
476 |
p0 = vec_sel(p0, vec_perm(v_vec, v_vec, m4), mask); \ |
p0 = vec_sel(p0, vec_perm(v_vec, v_vec, m4), mask); \ |
477 |
p1 = vec_sel(p1, vec_perm(v_vec, v_vec, vec_add(m4, vec4)), mask); \ |
ptmp = vec_perm(v_vec, v_vec, vec_add(m4, vec4)); \ |
478 |
|
p1 = vec_sel(p1, ptmp, mask); \ |
479 |
\ |
\ |
480 |
vec_st(p0, 0, x_ptr + (ROW)*x_stride); \ |
vec_st(p0, 0, x_ptr + (ROW)*x_stride); \ |
481 |
vec_st(p1, 16, x_ptr + (ROW)*x_stride) |
vec_st(p1, 16, x_ptr + (ROW)*x_stride) |