/***************************************************************************** * * XVID MPEG-4 VIDEO CODEC * - Colorspace conversion functions with altivec optimization - * * Copyright(C) 2004 Christoph NŠgeli * * This program is free software ; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation ; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY ; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * * $Id$ * ****************************************************************************/ #ifdef HAVE_ALTIVEC_H #include #endif #include "../../portab.h" #include "../colorspace.h" #undef DEBUG #include /********** generic altivec RGB to YV12 colorspace macro **********/ #define MAKE_COLORSPACE_ALTIVEC_FROM_RGB(NAME,SIZE,PIXELS,VPIXELS,FUNC,C1,C2,C3,C4) \ void \ NAME(uint8_t *x_ptr, int x_stride, \ uint8_t *y_ptr, uint8_t *u_ptr, uint8_t *v_ptr, \ int y_stride, int uv_stride, \ int width, int height, int vflip) \ { \ int fixed_width = (width + 15) & ~15; \ int x_dif = x_stride - (SIZE) * fixed_width; \ int y_dif = y_stride - fixed_width; \ int uv_dif = uv_stride - (fixed_width / 2); \ int x, y; \ unsigned prefetch_constant; \ \ register vector unsigned int shift_consts[4]; \ \ vector unsigned char y_add; \ vector unsigned char u_add; \ vector unsigned char v_add; \ \ vector unsigned short vec_fix_ins[3]; \ \ vec_st(vec_ldl(0, &g_vec_fix_ins[0]), 0, &vec_fix_ins[0]); \ vec_st(vec_ldl(0, &g_vec_fix_ins[1]), 0, &vec_fix_ins[1]); \ vec_st(vec_ldl(0, &g_vec_fix_ins[2]), 0, &vec_fix_ins[2]); \ \ shift_consts[0] = vec_add(vec_splat_u32(12), vec_splat_u32(12)); \ shift_consts[1] = vec_add(vec_splat_u32(8), vec_splat_u32(8)); \ shift_consts[2] = vec_splat_u32(8); \ shift_consts[3] = vec_splat_u32(0); \ \ prefetch_constant = build_prefetch(16, 2, (short)x_stride); \ vec_dstt(x_ptr, prefetch_constant, 0); \ vec_dstt(x_ptr + (x_stride << 1), prefetch_constant, 1); \ \ *((unsigned char*)&y_add) = Y_ADD_IN; \ *((unsigned char*)&u_add) = U_ADD_IN; \ *((unsigned char*)&v_add) = V_ADD_IN; \ \ y_add = vec_splat(y_add, 0); \ u_add = vec_splat(u_add, 0); \ v_add = vec_splat(v_add, 0); \ \ if(vflip) { \ x_ptr += (height - 1) * x_stride; \ x_dif = -(SIZE) * fixed_width - x_stride; \ x_stride = -x_stride; \ } \ \ for(y = 0; y < height; y += (VPIXELS)) { \ FUNC##_ROW(SIZE,C1,C2,C3,C4); \ for(x = 0; x < fixed_width; x += (PIXELS)) { \ FUNC(SIZE,C1,C2,C3,C4); \ x_ptr += (PIXELS)*(SIZE); \ y_ptr += (PIXELS); \ u_ptr += (PIXELS)/2; \ v_ptr += (PIXELS)/2; \ } \ x_ptr += x_dif + (VPIXELS-1) * x_stride; \ y_ptr += y_dif + (VPIXELS-1) * y_stride; \ u_ptr += uv_dif + ((VPIXELS/2)-1)*uv_stride; \ v_ptr += uv_dif + ((VPIXELS/2)-1)*uv_stride; \ } \ vec_dssall(); \ } /********** generic altivec YUV to YV12 colorspace macro **********/ #define MAKE_COLORSPACE_ALTIVEC_FROM_YUV(NAME,SIZE,PIXELS,VPIXELS,FUNC,C1,C2,C3,C4) \ void \ NAME(uint8_t *x_ptr, int x_stride, \ uint8_t *y_ptr, uint8_t *u_ptr, uint8_t *v_ptr, \ int y_stride, int uv_stride, \ int width, int height, int vflip) \ { \ int fixed_width = (width + 15) & ~15; \ int x_dif = x_stride - (SIZE)*fixed_width; \ int y_dif = y_stride - fixed_width; \ int uv_dif = uv_stride - (fixed_width / 2); \ int x, y; \ \ unsigned prefetch_constant; \ \ vector unsigned int p0, p1; \ vector unsigned char lum0, lum1; \ vector unsigned char u0, u1; \ vector unsigned char v0, v1; \ vector unsigned char t; \ \ prefetch_constant = build_prefetch(16, 2, (short)x_stride); \ vec_dstt(x_ptr, prefetch_constant, 0); \ vec_dstt(x_ptr + (x_stride << 1), prefetch_constant, 1); \ \ if(vflip) { \ x_ptr += (height - 1) * x_stride; \ x_dif = -(SIZE)*fixed_width - x_stride; \ x_stride = -x_stride; \ } \ \ for(y = 0; y < height; y += (VPIXELS)) { \ FUNC##_ROW(SIZE,C1,C2,C3,C4); \ for(x = 0; x < fixed_width; x += (PIXELS)) { \ FUNC(SIZE,C1,C2,C3,C4); \ x_ptr += (PIXELS)*(SIZE); \ y_ptr += (PIXELS); \ u_ptr += (PIXELS)/2; \ v_ptr += (PIXELS)/2; \ } \ x_ptr += x_dif + (VPIXELS-1) * x_stride; \ y_ptr += y_dif + (VPIXELS-1) * y_stride; \ u_ptr += uv_dif + ((VPIXELS/2)-1) * uv_stride; \ v_ptr += uv_dif + ((VPIXELS/2)-1) * uv_stride; \ } \ vec_dssall(); \ } /********** generic altivec YV12 to YUV colorspace macro **********/ #define MAKE_COLORSPACE_ALTIVEC_TO_YUV(NAME,SIZE,PIXELS,VPIXELS,FUNC,C1,C2,C3,C4) \ void \ NAME(uint8_t *x_ptr, int x_stride, \ uint8_t *y_ptr, uint8_t *u_ptr, uint8_t *v_ptr, \ int y_stride, int uv_stride, \ int width, int height, int vflip) \ { \ int fixed_width = (width + 15) & ~15; \ int x_dif = x_stride - (SIZE)*fixed_width; \ int y_dif = y_stride - fixed_width; \ int uv_dif = uv_stride - (fixed_width / 2); \ int x, y; \ \ vector unsigned char y_vec; \ vector unsigned char u_vec; \ vector unsigned char v_vec; \ vector unsigned char p0, p1, ptmp; \ vector unsigned char mask; \ vector unsigned char mask_stencil; \ vector unsigned char t; \ vector unsigned char m4; \ vector unsigned char vec4; \ \ unsigned prefetch_constant_y; \ unsigned prefetch_constant_uv; \ \ prefetch_constant_y = build_prefetch(16, 4, (short)y_stride); \ prefetch_constant_uv = build_prefetch(16, 2, (short)uv_stride); \ \ vec_dstt(y_ptr, prefetch_constant_y, 0); \ vec_dstt(u_ptr, prefetch_constant_uv, 1); \ vec_dstt(v_ptr, prefetch_constant_uv, 2); \ \ mask_stencil = (vector unsigned char)vec_mergeh( (vector unsigned short)vec_mergeh(vec_splat_u8(-1), vec_splat_u8(0)), vec_splat_u16(0) ); \ m4 = vec_sr(vec_lvsl(0, (unsigned char*)0), vec_splat_u8(2)); \ vec4 = vec_splat_u8(4); \ \ if(vflip) { \ x_ptr += (height - 1) * x_stride; \ x_dif = -(SIZE)*fixed_width - x_stride; \ x_stride = -x_stride; \ } \ \ for(y = 0; y < height; y += (VPIXELS)) { \ FUNC##_ROW(SIZE,C1,C2,C3,C4); \ for(x = 0; x < fixed_width; x += (PIXELS)) { \ FUNC(SIZE,C1,C2,C3,C4); \ x_ptr += (PIXELS)*(SIZE); \ y_ptr += (PIXELS); \ u_ptr += (PIXELS)/2; \ v_ptr += (PIXELS)/2; \ } \ x_ptr += x_dif + (VPIXELS-1) * x_stride; \ y_ptr += y_dif + (VPIXELS-1) * y_stride; \ u_ptr += uv_dif + ((VPIXELS/2)-1) * uv_stride; \ v_ptr += uv_dif + ((VPIXELS/2)-1) * uv_stride; \ } \ vec_dssall(); \ } /********** colorspace input (xxx_to_yv12) functions **********/ /* rgb -> yuv def's this following constants are "official spec" Video Demystified" (ISBN 1-878707-09-4) rgb<->yuv _is_ lossy, since most programs do the conversion differently SCALEBITS/FIX taken from ffmpeg */ #define Y_R_IN 0.257 #define Y_G_IN 0.504 #define Y_B_IN 0.098 #define Y_ADD_IN 16 #define U_R_IN 0.148 #define U_G_IN 0.291 #define U_B_IN 0.439 #define U_ADD_IN 128 #define V_R_IN 0.439 #define V_G_IN 0.368 #define V_B_IN 0.071 #define V_ADD_IN 128 #define SCALEBITS_IN 8 #define FIX_IN(x) ((uint16_t) ((x) * (1L<