--- trunk/xvidcore/src/image/image.c 2004/04/01 11:11:28 1397 +++ trunk/xvidcore/src/image/image.c 2006/10/13 11:26:18 1736 @@ -19,22 +19,22 @@ * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: image.c,v 1.28 2004-04-01 11:11:28 suxen_drol Exp $ + * $Id: image.c,v 1.38 2006-10-13 11:26:18 Skal Exp $ * ****************************************************************************/ #include #include /* memcpy, memset */ #include - #include "../portab.h" #include "../global.h" /* XVID_CSP_XXX's */ #include "../xvid.h" /* XVID_CSP_XXX's */ #include "image.h" #include "colorspace.h" #include "interpolate8x8.h" -#include "reduced.h" #include "../utils/mem_align.h" +#include "../motion/sad.h" +#include "../utils/emms.h" #include "font.h" /* XXX: remove later */ @@ -237,12 +237,11 @@ } } -/* bframe encoding requires image-based u,v interpolation */ void -image_interpolate(const IMAGE * refn, - IMAGE * refh, - IMAGE * refv, - IMAGE * refhv, +image_interpolate(const uint8_t * refn, + uint8_t * refh, + uint8_t * refv, + uint8_t * refhv, uint32_t edged_width, uint32_t edged_height, uint32_t quarterpel, @@ -250,19 +249,14 @@ { const uint32_t offset = EDGE_SIZE2 * (edged_width + 1); /* we only interpolate half of the edge area */ const uint32_t stride_add = 7 * edged_width; -#if 0 - const uint32_t edged_width2 = edged_width / 2; - const uint32_t edged_height2 = edged_height / 2; - const uint32_t offset2 = EDGE_SIZE2 * (edged_width2 + 1); - const uint32_t stride_add2 = 7 * edged_width2; -#endif - uint8_t *n_ptr, *h_ptr, *v_ptr, *hv_ptr; - uint32_t x, y; + uint8_t *n_ptr; + uint8_t *h_ptr, *v_ptr, *hv_ptr; + uint32_t x, y; - n_ptr = refn->y; - h_ptr = refh->y; - v_ptr = refv->y; + n_ptr = (uint8_t*)refn; + h_ptr = refh; + v_ptr = refv; n_ptr -= offset; h_ptr -= offset; @@ -291,8 +285,8 @@ n_ptr += stride_add; } - h_ptr = refh->y + (edged_height - EDGE_SIZE - EDGE_SIZE2)*edged_width - EDGE_SIZE2; - hv_ptr = refhv->y + (edged_height - EDGE_SIZE - EDGE_SIZE2)*edged_width - EDGE_SIZE2; + h_ptr = refh + (edged_height - EDGE_SIZE - EDGE_SIZE2)*edged_width - EDGE_SIZE2; + hv_ptr = refhv + (edged_height - EDGE_SIZE - EDGE_SIZE2)*edged_width - EDGE_SIZE2; for (y = 0; y < (edged_height - EDGE_SIZE); y = y + 8) { hv_ptr -= stride_add; @@ -308,7 +302,7 @@ } } else { - hv_ptr = refhv->y; + hv_ptr = refhv; hv_ptr -= offset; for (y = 0; y < (edged_height - EDGE_SIZE); y += 8) { @@ -334,123 +328,6 @@ n_ptr += stride_add; } } -/* -#ifdef BFRAMES - n_ptr = refn->u; - h_ptr = refh->u; - v_ptr = refv->u; - hv_ptr = refhv->u; - - n_ptr -= offset2; - h_ptr -= offset2; - v_ptr -= offset2; - hv_ptr -= offset2; - - for (y = 0; y < edged_height2; y += 8) { - for (x = 0; x < edged_width2; x += 8) { - interpolate8x8_halfpel_h(h_ptr, n_ptr, edged_width2, rounding); - interpolate8x8_halfpel_v(v_ptr, n_ptr, edged_width2, rounding); - interpolate8x8_halfpel_hv(hv_ptr, n_ptr, edged_width2, rounding); - - n_ptr += 8; - h_ptr += 8; - v_ptr += 8; - hv_ptr += 8; - } - h_ptr += stride_add2; - v_ptr += stride_add2; - hv_ptr += stride_add2; - n_ptr += stride_add2; - } - - n_ptr = refn->v; - h_ptr = refh->v; - v_ptr = refv->v; - hv_ptr = refhv->v; - - n_ptr -= offset2; - h_ptr -= offset2; - v_ptr -= offset2; - hv_ptr -= offset2; - - for (y = 0; y < edged_height2; y = y + 8) { - for (x = 0; x < edged_width2; x = x + 8) { - interpolate8x8_halfpel_h(h_ptr, n_ptr, edged_width2, rounding); - interpolate8x8_halfpel_v(v_ptr, n_ptr, edged_width2, rounding); - interpolate8x8_halfpel_hv(hv_ptr, n_ptr, edged_width2, rounding); - - n_ptr += 8; - h_ptr += 8; - v_ptr += 8; - hv_ptr += 8; - } - h_ptr += stride_add2; - v_ptr += stride_add2; - hv_ptr += stride_add2; - n_ptr += stride_add2; - } -#endif -*/ - /* - interpolate_halfpel_h( - refh->y - offset, - refn->y - offset, - edged_width, edged_height, - rounding); - - interpolate_halfpel_v( - refv->y - offset, - refn->y - offset, - edged_width, edged_height, - rounding); - - interpolate_halfpel_hv( - refhv->y - offset, - refn->y - offset, - edged_width, edged_height, - rounding); - */ - - /* uv-image-based compensation - offset = EDGE_SIZE2 * (edged_width / 2 + 1); - - interpolate_halfpel_h( - refh->u - offset, - refn->u - offset, - edged_width / 2, edged_height / 2, - rounding); - - interpolate_halfpel_v( - refv->u - offset, - refn->u - offset, - edged_width / 2, edged_height / 2, - rounding); - - interpolate_halfpel_hv( - refhv->u - offset, - refn->u - offset, - edged_width / 2, edged_height / 2, - rounding); - - - interpolate_halfpel_h( - refh->v - offset, - refn->v - offset, - edged_width / 2, edged_height / 2, - rounding); - - interpolate_halfpel_v( - refv->v - offset, - refn->v - offset, - edged_width / 2, edged_height / 2, - rounding); - - interpolate_halfpel_hv( - refhv->v - offset, - refn->v - offset, - edged_width / 2, edged_height / 2, - rounding); - */ } @@ -718,7 +595,7 @@ int height, uint32_t edged_width, uint8_t * dst[4], - uint32_t dst_stride[4], + int dst_stride[4], int csp, int interlacing) { @@ -893,24 +770,64 @@ } -long plane_sse(uint8_t * orig, - uint8_t * recon, - uint16_t stride, - uint16_t width, - uint16_t height) +long plane_sse(uint8_t *orig, + uint8_t *recon, + uint16_t stride, + uint16_t width, + uint16_t height) { - int diff, x, y; - long sse=0; + int y, bwidth, bheight; + long sse = 0; - for (y = 0; y < height; y++) { + bwidth = width & (~0x07); + bheight = height & (~0x07); + + /* Compute the 8x8 integer part */ + for (y = 0; y> 1; + int stride2 = stride >> 1; int w = mbl << 4, w2,i; if(w > width) @@ -1051,24 +968,24 @@ dY = (uint8_t*)out_frm->plane[0] + (mby << 4) * out_frm->stride[0] + (mbx << 4); dU = (uint8_t*)out_frm->plane[1] + (mby << 3) * out_frm->stride[1] + (mbx << 3); dV = (uint8_t*)out_frm->plane[2] + (mby << 3) * out_frm->stride[2] + (mbx << 3); - sY = cur->y + (mby << 4) * std + (mbx << 4); - sU = cur->u + (mby << 3) * std2 + (mbx << 3); - sV = cur->v + (mby << 3) * std2 + (mbx << 3); + sY = cur->y + (mby << 4) * stride + (mbx << 4); + sU = cur->u + (mby << 3) * stride2 + (mbx << 3); + sV = cur->v + (mby << 3) * stride2 + (mbx << 3); for(i = 0 ; i < 16 ; i++) { memcpy(dY,sY,w); dY += out_frm->stride[0]; - sY += std; + sY += stride; } for(i = 0 ; i < 8 ; i++) { memcpy(dU,sU,w2); dU += out_frm->stride[1]; - sU += std2; + sU += stride2; } for(i = 0 ; i < 8 ; i++) { memcpy(dV,sV,w2); dV += out_frm->stride[2]; - sV += std2; + sV += stride2; } } @@ -1099,77 +1016,69 @@ } } +/****************************************************************************/ -/* reduced resolution deblocking filter - block = block size (16=rrv, 8=full resolution) - flags = XVID_DEC_YDEBLOCK|XVID_DEC_UVDEBLOCK -*/ -void -image_deblock_rrv(IMAGE * img, int edged_width, - const MACROBLOCK * mbs, int mb_width, int mb_height, int mb_stride, - int block, int flags) -{ - const int edged_width2 = edged_width /2; - const int nblocks = block / 8; /* skals code uses 8pixel block uints */ - int i,j; - - /* luma: j,i in block units */ - - for (j = 1; j < mb_height*2; j++) /* horizontal deblocking */ - for (i = 0; i < mb_width*2; i++) - { - if (mbs[(j-1)/2*mb_stride + (i/2)].mode != MODE_NOT_CODED || - mbs[(j+0)/2*mb_stride + (i/2)].mode != MODE_NOT_CODED) - { - hfilter_31(img->y + (j*block - 1)*edged_width + i*block, - img->y + (j*block + 0)*edged_width + i*block, nblocks); - } - } - - for (j = 0; j < mb_height*2; j++) /* vertical deblocking */ - for (i = 1; i < mb_width*2; i++) - { - if (mbs[(j/2)*mb_stride + (i-1)/2].mode != MODE_NOT_CODED || - mbs[(j/2)*mb_stride + (i+0)/2].mode != MODE_NOT_CODED) - { - vfilter_31(img->y + (j*block)*edged_width + i*block - 1, - img->y + (j*block)*edged_width + i*block + 0, - edged_width, nblocks); - } - } - +static void (*deintl_core)(uint8_t *, int width, int height, const int stride) = 0; +extern void xvid_deinterlace_sse(uint8_t *, int width, int height, const int stride); +#define CLIP_255(x) ( ((x)&~255) ? ((-(x)) >> (8*sizeof((x))-1))&0xff : (x) ) - /* chroma */ - - for (j = 1; j < mb_height; j++) /* horizontal deblocking */ - for (i = 0; i < mb_width; i++) - { - if (mbs[(j-1)*mb_stride + i].mode != MODE_NOT_CODED || - mbs[(j+0)*mb_stride + i].mode != MODE_NOT_CODED) - { - hfilter_31(img->u + (j*block - 1)*edged_width2 + i*block, - img->u + (j*block + 0)*edged_width2 + i*block, nblocks); - hfilter_31(img->v + (j*block - 1)*edged_width2 + i*block, - img->v + (j*block + 0)*edged_width2 + i*block, nblocks); - } - } +static void deinterlace_c(uint8_t *pix, int width, int height, const int bps) +{ + pix += bps; + while(width-->0) + { + int p1 = pix[-bps]; + int p2 = pix[0]; + int p0 = p2; + int j = (height>>1) - 1; + int V; + unsigned char *P = pix++; + while(j-->0) + { + const int p3 = P[ bps]; + const int p4 = P[2*bps]; + V = ((p1+p3+1)>>1) + ((p2 - ((p0+p4+1)>>1)) >> 2); + P[0] = CLIP_255( V ); + p0 = p2; + p1 = p3; + p2 = p4; + P += 2*bps; + } + V = ((p1+p1+1)>>1) + ((p2 - ((p0+p2+1)>>1)) >> 2); + P[0] = CLIP_255( V ); + } +} +#undef CLIP_255 - for (j = 0; j < mb_height; j++) /* vertical deblocking */ - for (i = 1; i < mb_width; i++) +int xvid_image_deinterlace(xvid_image_t* img, int width, int height, int bottom_first) +{ + if (height&1) + return 0; + if (img->csp!=XVID_CSP_PLANAR && img->csp!=XVID_CSP_I420 && img->csp!=XVID_CSP_YV12) + return 0; /* not yet supported */ + if (deintl_core==0) { + deintl_core = deinterlace_c; +#ifdef ARCH_IS_IA32 { - if (mbs[j*mb_stride + i - 1].mode != MODE_NOT_CODED || - mbs[j*mb_stride + i + 0].mode != MODE_NOT_CODED) - { - vfilter_31(img->u + (j*block)*edged_width2 + i*block - 1, - img->u + (j*block)*edged_width2 + i*block + 0, - edged_width2, nblocks); - vfilter_31(img->v + (j*block)*edged_width2 + i*block - 1, - img->v + (j*block)*edged_width2 + i*block + 0, - edged_width2, nblocks); - } + int cpu_flags = check_cpu_features(); + if (cpu_flags & XVID_CPU_MMX) + deintl_core = xvid_deinterlace_sse; } +#endif + } + if (!bottom_first) { + deintl_core(img->plane[0], width, height, img->stride[0]); + deintl_core(img->plane[1], width>>1, height>>1, img->stride[1]); + deintl_core(img->plane[2], width>>1, height>>1, img->stride[2]); + } + else { + deintl_core((uint8_t *)img->plane[0] + ( height -1)*img->stride[0], width, height, -img->stride[0]); + deintl_core((uint8_t *)img->plane[1] + ((height>>1)-1)*img->stride[1], width>>1, height>>1, -img->stride[1]); + deintl_core((uint8_t *)img->plane[2] + ((height>>1)-1)*img->stride[2], width>>1, height>>1, -img->stride[2]); + } + emms(); - + return 1; }