--- trunk/xvidcore/src/image/image.c 2004/04/12 15:49:56 1424 +++ trunk/xvidcore/src/image/image.c 2010/12/30 11:47:06 1932 @@ -3,7 +3,7 @@ * XVID MPEG-4 VIDEO CODEC * - Image management functions - * - * Copyright(C) 2001-2004 Peter Ross + * Copyright(C) 2001-2010 Peter Ross * * This program is free software ; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,23 +19,22 @@ * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: image.c,v 1.29 2004-04-12 15:49:56 edgomez Exp $ + * $Id: image.c,v 1.47 2010-12-30 11:46:08 Isibaar Exp $ * ****************************************************************************/ #include #include /* memcpy, memset */ #include - #include "../portab.h" #include "../global.h" /* XVID_CSP_XXX's */ #include "../xvid.h" /* XVID_CSP_XXX's */ #include "image.h" #include "colorspace.h" #include "interpolate8x8.h" -#include "reduced.h" #include "../utils/mem_align.h" #include "../motion/sad.h" +#include "../utils/emms.h" #include "font.h" /* XXX: remove later */ @@ -150,7 +149,7 @@ /* According to the Standard Clause 7.6.4, padding is done starting at 16 * pixel width and height multiples. This was not respected in old xvids */ - if (bs_version == 0 || bs_version >= SETEDGES_BUG_BEFORE) { + if (bs_version >= SETEDGES_BUG_BEFORE) { width = (width+15)&~15; height = (height+15)&~15; } @@ -238,12 +237,11 @@ } } -/* bframe encoding requires image-based u,v interpolation */ void -image_interpolate(const IMAGE * refn, - IMAGE * refh, - IMAGE * refv, - IMAGE * refhv, +image_interpolate(const uint8_t * refn, + uint8_t * refh, + uint8_t * refv, + uint8_t * refhv, uint32_t edged_width, uint32_t edged_height, uint32_t quarterpel, @@ -251,19 +249,14 @@ { const uint32_t offset = EDGE_SIZE2 * (edged_width + 1); /* we only interpolate half of the edge area */ const uint32_t stride_add = 7 * edged_width; -#if 0 - const uint32_t edged_width2 = edged_width / 2; - const uint32_t edged_height2 = edged_height / 2; - const uint32_t offset2 = EDGE_SIZE2 * (edged_width2 + 1); - const uint32_t stride_add2 = 7 * edged_width2; -#endif - uint8_t *n_ptr, *h_ptr, *v_ptr, *hv_ptr; - uint32_t x, y; + uint8_t *n_ptr; + uint8_t *h_ptr, *v_ptr, *hv_ptr; + uint32_t x, y; - n_ptr = refn->y; - h_ptr = refh->y; - v_ptr = refv->y; + n_ptr = (uint8_t*)refn; + h_ptr = refh; + v_ptr = refv; n_ptr -= offset; h_ptr -= offset; @@ -292,8 +285,8 @@ n_ptr += stride_add; } - h_ptr = refh->y + (edged_height - EDGE_SIZE - EDGE_SIZE2)*edged_width - EDGE_SIZE2; - hv_ptr = refhv->y + (edged_height - EDGE_SIZE - EDGE_SIZE2)*edged_width - EDGE_SIZE2; + h_ptr = refh + (edged_height - EDGE_SIZE - EDGE_SIZE2)*edged_width - EDGE_SIZE2; + hv_ptr = refhv + (edged_height - EDGE_SIZE - EDGE_SIZE2)*edged_width - EDGE_SIZE2; for (y = 0; y < (edged_height - EDGE_SIZE); y = y + 8) { hv_ptr -= stride_add; @@ -309,7 +302,7 @@ } } else { - hv_ptr = refhv->y; + hv_ptr = refhv; hv_ptr -= offset; for (y = 0; y < (edged_height - EDGE_SIZE); y += 8) { @@ -335,123 +328,6 @@ n_ptr += stride_add; } } -/* -#ifdef BFRAMES - n_ptr = refn->u; - h_ptr = refh->u; - v_ptr = refv->u; - hv_ptr = refhv->u; - - n_ptr -= offset2; - h_ptr -= offset2; - v_ptr -= offset2; - hv_ptr -= offset2; - - for (y = 0; y < edged_height2; y += 8) { - for (x = 0; x < edged_width2; x += 8) { - interpolate8x8_halfpel_h(h_ptr, n_ptr, edged_width2, rounding); - interpolate8x8_halfpel_v(v_ptr, n_ptr, edged_width2, rounding); - interpolate8x8_halfpel_hv(hv_ptr, n_ptr, edged_width2, rounding); - - n_ptr += 8; - h_ptr += 8; - v_ptr += 8; - hv_ptr += 8; - } - h_ptr += stride_add2; - v_ptr += stride_add2; - hv_ptr += stride_add2; - n_ptr += stride_add2; - } - - n_ptr = refn->v; - h_ptr = refh->v; - v_ptr = refv->v; - hv_ptr = refhv->v; - - n_ptr -= offset2; - h_ptr -= offset2; - v_ptr -= offset2; - hv_ptr -= offset2; - - for (y = 0; y < edged_height2; y = y + 8) { - for (x = 0; x < edged_width2; x = x + 8) { - interpolate8x8_halfpel_h(h_ptr, n_ptr, edged_width2, rounding); - interpolate8x8_halfpel_v(v_ptr, n_ptr, edged_width2, rounding); - interpolate8x8_halfpel_hv(hv_ptr, n_ptr, edged_width2, rounding); - - n_ptr += 8; - h_ptr += 8; - v_ptr += 8; - hv_ptr += 8; - } - h_ptr += stride_add2; - v_ptr += stride_add2; - hv_ptr += stride_add2; - n_ptr += stride_add2; - } -#endif -*/ - /* - interpolate_halfpel_h( - refh->y - offset, - refn->y - offset, - edged_width, edged_height, - rounding); - - interpolate_halfpel_v( - refv->y - offset, - refn->y - offset, - edged_width, edged_height, - rounding); - - interpolate_halfpel_hv( - refhv->y - offset, - refn->y - offset, - edged_width, edged_height, - rounding); - */ - - /* uv-image-based compensation - offset = EDGE_SIZE2 * (edged_width / 2 + 1); - - interpolate_halfpel_h( - refh->u - offset, - refn->u - offset, - edged_width / 2, edged_height / 2, - rounding); - - interpolate_halfpel_v( - refv->u - offset, - refn->u - offset, - edged_width / 2, edged_height / 2, - rounding); - - interpolate_halfpel_hv( - refhv->u - offset, - refn->u - offset, - edged_width / 2, edged_height / 2, - rounding); - - - interpolate_halfpel_h( - refh->v - offset, - refn->v - offset, - edged_width / 2, edged_height / 2, - rounding); - - interpolate_halfpel_v( - refv->v - offset, - refn->v - offset, - edged_width / 2, edged_height / 2, - rounding); - - interpolate_halfpel_hv( - refhv->v - offset, - refn->v - offset, - edged_width / 2, edged_height / 2, - rounding); - */ } @@ -509,30 +385,46 @@ uint8_t * y_ptr, uint8_t * u_ptr, uint8_t * v_ptr, int y_stride, int uv_stride, int width, int height, int vflip, - packedFunc * func_opt, packedFunc func_c, int size) + packedFunc * func_opt, packedFunc func_c, + int size, int interlacing) { - int width_opt, width_c; + int width_opt, width_c, height_opt; + + if (width<0 || width==1 || height==1) return; /* forget about it */ if (func_opt != func_c && x_stride < size*((width+15)/16)*16) { width_opt = width & (~15); - width_c = width - width_opt; + width_c = (width - width_opt) & (~1); } - else + else if (func_opt != func_c && !(width&1) && (size==3)) { - width_opt = width; + /* MMX reads 4 bytes per pixel for RGB/BGR */ + width_opt = width - 2; + width_c = 2; + } + else { + /* Enforce the width to be divisable by two. */ + width_opt = width & (~1); width_c = 0; } + /* packed conversions require height to be divisable by 2 + (or even by 4 for interlaced conversion) */ + if (interlacing) + height_opt = height & (~3); + else + height_opt = height & (~1); + func_opt(x_ptr, x_stride, y_ptr, u_ptr, v_ptr, y_stride, uv_stride, - width_opt, height, vflip); + width_opt, height_opt, vflip); if (width_c) { func_c(x_ptr + size*width_opt, x_stride, y_ptr + width_opt, u_ptr + width_opt/2, v_ptr + width_opt/2, - y_stride, uv_stride, width_c, height, vflip); + y_stride, uv_stride, width_c, height_opt, vflip); } } @@ -561,7 +453,7 @@ src[0], src_stride[0], image->y, image->u, image->v, edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), interlacing?rgb555i_to_yv12 :rgb555_to_yv12, - interlacing?rgb555i_to_yv12_c:rgb555_to_yv12_c, 2); + interlacing?rgb555i_to_yv12_c:rgb555_to_yv12_c, 2, interlacing); break; case XVID_CSP_RGB565: @@ -569,7 +461,7 @@ src[0], src_stride[0], image->y, image->u, image->v, edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), interlacing?rgb565i_to_yv12 :rgb565_to_yv12, - interlacing?rgb565i_to_yv12_c:rgb565_to_yv12_c, 2); + interlacing?rgb565i_to_yv12_c:rgb565_to_yv12_c, 2, interlacing); break; @@ -578,7 +470,7 @@ src[0], src_stride[0], image->y, image->u, image->v, edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), interlacing?bgri_to_yv12 :bgr_to_yv12, - interlacing?bgri_to_yv12_c:bgr_to_yv12_c, 3); + interlacing?bgri_to_yv12_c:bgr_to_yv12_c, 3, interlacing); break; case XVID_CSP_BGRA: @@ -586,7 +478,7 @@ src[0], src_stride[0], image->y, image->u, image->v, edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), interlacing?bgrai_to_yv12 :bgra_to_yv12, - interlacing?bgrai_to_yv12_c:bgra_to_yv12_c, 4); + interlacing?bgrai_to_yv12_c:bgra_to_yv12_c, 4, interlacing); break; case XVID_CSP_ABGR : @@ -594,15 +486,23 @@ src[0], src_stride[0], image->y, image->u, image->v, edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), interlacing?abgri_to_yv12 :abgr_to_yv12, - interlacing?abgri_to_yv12_c:abgr_to_yv12_c, 4); + interlacing?abgri_to_yv12_c:abgr_to_yv12_c, 4, interlacing); break; + case XVID_CSP_RGB: + safe_packed_conv( + src[0], src_stride[0], image->y, image->u, image->v, + edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), + interlacing?rgbi_to_yv12 :rgb_to_yv12, + interlacing?rgbi_to_yv12_c:rgb_to_yv12_c, 3, interlacing); + break; + case XVID_CSP_RGBA : safe_packed_conv( src[0], src_stride[0], image->y, image->u, image->v, edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), interlacing?rgbai_to_yv12 :rgba_to_yv12, - interlacing?rgbai_to_yv12_c:rgba_to_yv12_c, 4); + interlacing?rgbai_to_yv12_c:rgba_to_yv12_c, 4, interlacing); break; case XVID_CSP_ARGB: @@ -610,7 +510,7 @@ src[0], src_stride[0], image->y, image->u, image->v, edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), interlacing?argbi_to_yv12 : argb_to_yv12, - interlacing?argbi_to_yv12_c: argb_to_yv12_c, 4); + interlacing?argbi_to_yv12_c: argb_to_yv12_c, 4, interlacing); break; case XVID_CSP_YUY2: @@ -618,7 +518,7 @@ src[0], src_stride[0], image->y, image->u, image->v, edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), interlacing?yuyvi_to_yv12 :yuyv_to_yv12, - interlacing?yuyvi_to_yv12_c:yuyv_to_yv12_c, 2); + interlacing?yuyvi_to_yv12_c:yuyv_to_yv12_c, 2, interlacing); break; case XVID_CSP_YVYU: /* u/v swapped */ @@ -626,7 +526,7 @@ src[0], src_stride[0], image->y, image->v, image->u, edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), interlacing?yuyvi_to_yv12 :yuyv_to_yv12, - interlacing?yuyvi_to_yv12_c:yuyv_to_yv12_c, 2); + interlacing?yuyvi_to_yv12_c:yuyv_to_yv12_c, 2, interlacing); break; case XVID_CSP_UYVY: @@ -634,7 +534,7 @@ src[0], src_stride[0], image->y, image->u, image->v, edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), interlacing?uyvyi_to_yv12 :uyvy_to_yv12, - interlacing?uyvyi_to_yv12_c:uyvy_to_yv12_c, 2); + interlacing?uyvyi_to_yv12_c:uyvy_to_yv12_c, 2, interlacing); break; case XVID_CSP_I420: /* YCbCr == YUV == internal colorspace for MPEG */ @@ -719,7 +619,7 @@ int height, uint32_t edged_width, uint8_t * dst[4], - uint32_t dst_stride[4], + int dst_stride[4], int csp, int interlacing) { @@ -738,7 +638,7 @@ dst[0], dst_stride[0], image->y, image->u, image->v, edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), interlacing?yv12_to_rgb555i :yv12_to_rgb555, - interlacing?yv12_to_rgb555i_c:yv12_to_rgb555_c, 2); + interlacing?yv12_to_rgb555i_c:yv12_to_rgb555_c, 2, interlacing); return 0; case XVID_CSP_RGB565: @@ -746,7 +646,7 @@ dst[0], dst_stride[0], image->y, image->u, image->v, edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), interlacing?yv12_to_rgb565i :yv12_to_rgb565, - interlacing?yv12_to_rgb565i_c:yv12_to_rgb565_c, 2); + interlacing?yv12_to_rgb565i_c:yv12_to_rgb565_c, 2, interlacing); return 0; case XVID_CSP_BGR: @@ -754,7 +654,7 @@ dst[0], dst_stride[0], image->y, image->u, image->v, edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), interlacing?yv12_to_bgri :yv12_to_bgr, - interlacing?yv12_to_bgri_c:yv12_to_bgr_c, 3); + interlacing?yv12_to_bgri_c:yv12_to_bgr_c, 3, interlacing); return 0; case XVID_CSP_BGRA: @@ -762,7 +662,7 @@ dst[0], dst_stride[0], image->y, image->u, image->v, edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), interlacing?yv12_to_bgrai :yv12_to_bgra, - interlacing?yv12_to_bgrai_c:yv12_to_bgra_c, 4); + interlacing?yv12_to_bgrai_c:yv12_to_bgra_c, 4, interlacing); return 0; case XVID_CSP_ABGR: @@ -770,7 +670,15 @@ dst[0], dst_stride[0], image->y, image->u, image->v, edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), interlacing?yv12_to_abgri :yv12_to_abgr, - interlacing?yv12_to_abgri_c:yv12_to_abgr_c, 4); + interlacing?yv12_to_abgri_c:yv12_to_abgr_c, 4, interlacing); + return 0; + + case XVID_CSP_RGB: + safe_packed_conv( + dst[0], dst_stride[0], image->y, image->u, image->v, + edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), + interlacing?yv12_to_rgbi :yv12_to_rgb, + interlacing?yv12_to_rgbi_c:yv12_to_rgb_c, 3, interlacing); return 0; case XVID_CSP_RGBA: @@ -778,7 +686,7 @@ dst[0], dst_stride[0], image->y, image->u, image->v, edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), interlacing?yv12_to_rgbai :yv12_to_rgba, - interlacing?yv12_to_rgbai_c:yv12_to_rgba_c, 4); + interlacing?yv12_to_rgbai_c:yv12_to_rgba_c, 4, interlacing); return 0; case XVID_CSP_ARGB: @@ -786,7 +694,7 @@ dst[0], dst_stride[0], image->y, image->u, image->v, edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), interlacing?yv12_to_argbi :yv12_to_argb, - interlacing?yv12_to_argbi_c:yv12_to_argb_c, 4); + interlacing?yv12_to_argbi_c:yv12_to_argb_c, 4, interlacing); return 0; case XVID_CSP_YUY2: @@ -794,7 +702,7 @@ dst[0], dst_stride[0], image->y, image->u, image->v, edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), interlacing?yv12_to_yuyvi :yv12_to_yuyv, - interlacing?yv12_to_yuyvi_c:yv12_to_yuyv_c, 2); + interlacing?yv12_to_yuyvi_c:yv12_to_yuyv_c, 2, interlacing); return 0; case XVID_CSP_YVYU: /* u,v swapped */ @@ -802,7 +710,7 @@ dst[0], dst_stride[0], image->y, image->v, image->u, edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), interlacing?yv12_to_yuyvi :yv12_to_yuyv, - interlacing?yv12_to_yuyvi_c:yv12_to_yuyv_c, 2); + interlacing?yv12_to_yuyvi_c:yv12_to_yuyv_c, 2, interlacing); return 0; case XVID_CSP_UYVY: @@ -810,7 +718,7 @@ dst[0], dst_stride[0], image->y, image->u, image->v, edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), interlacing?yv12_to_uyvyi :yv12_to_uyvy, - interlacing?yv12_to_uyvyi_c:yv12_to_uyvy_c, 2); + interlacing?yv12_to_uyvyi_c:yv12_to_uyvy_c, 2, interlacing); return 0; case XVID_CSP_I420: /* YCbCr == YUV == internal colorspace for MPEG */ @@ -954,6 +862,98 @@ return (sse); } +void image_block_variance(IMAGE * orig_image, + uint16_t stride, + MACROBLOCK *mbs, + uint16_t mb_width, + uint16_t mb_height) +{ + DECLARE_ALIGNED_MATRIX(sums, 1, 4, uint16_t, CACHE_LINE); + DECLARE_ALIGNED_MATRIX(squares, 1, 4, uint32_t, CACHE_LINE); + + int x, y, i, j; + uint8_t *orig_y = orig_image->y; + uint8_t *orig_u = orig_image->u; + uint8_t *orig_v = orig_image->v; + + for (y = 0; y < mb_height; y++) { + for (x = 0; x < mb_width; x++) { + MACROBLOCK *pMB = &mbs[x + y * mb_width]; + uint32_t var4[4]; + uint32_t sum = 0, square = 0; + + /* y-blocks */ + for (j = 0; j < 2; j++) { + for (i = 0; i < 2; i++) { + int lsum = blocksum8(orig_y + ((y<<4) + (j<<3))*stride + (x<<4) + (i<<3), + stride, sums, squares); + int lsquare = (squares[0] + squares[1] + squares[2] + squares[3])<<6; + + sum += lsum; + square += lsquare; + + var4[0] = (squares[0]<<4) - sums[0]*sums[0]; + var4[1] = (squares[1]<<4) - sums[1]*sums[1]; + var4[2] = (squares[2]<<4) - sums[2]*sums[2]; + var4[3] = (squares[3]<<4) - sums[3]*sums[3]; + + pMB->rel_var8[j*2 + i] = lsquare - lsum*lsum; + if (pMB->rel_var8[j*2 + i]) + pMB->rel_var8[j*2 + i] = ((var4[0] + var4[1] + var4[2] + var4[3])<<8) / + pMB->rel_var8[j*2 + i]; /* 4*(Var(Di)/Var(D)) */ + else + pMB->rel_var8[j*2 + i] = 64; + } + } + + /* u */ + { + int lsum = blocksum8(orig_u + (y<<3)*(stride>>1) + (x<<3), + stride, sums, squares); + int lsquare = (squares[0] + squares[1] + squares[2] + squares[3])<<6; + + sum += lsum; + square += lsquare; + + var4[0] = (squares[0]<<4) - sums[0]*sums[0]; + var4[1] = (squares[1]<<4) - sums[1]*sums[1]; + var4[2] = (squares[2]<<4) - sums[2]*sums[2]; + var4[3] = (squares[3]<<4) - sums[3]*sums[3]; + + pMB->rel_var8[4] = lsquare - lsum*lsum; + if (pMB->rel_var8[4]) + pMB->rel_var8[4] = ((var4[0] + var4[1] + var4[2] + var4[3])<<8) / + pMB->rel_var8[4]; /* 4*(Var(Di)/Var(D)) */ + else + pMB->rel_var8[4] = 64; + } + + /* v */ + { + int lsum = blocksum8(orig_v + (y<<3)*(stride>>1) + (x<<3), + stride, sums, squares); + int lsquare = (squares[0] + squares[1] + squares[2] + squares[3])<<6; + + sum += lsum; + square += lsquare; + + var4[0] = (squares[0]<<4) - sums[0]*sums[0]; + var4[1] = (squares[1]<<4) - sums[1]*sums[1]; + var4[2] = (squares[2]<<4) - sums[2]*sums[2]; + var4[3] = (squares[3]<<4) - sums[3]*sums[3]; + + pMB->rel_var8[5] = lsquare - lsum*lsum; + if (pMB->rel_var8[5]) + pMB->rel_var8[5] = ((var4[0] + var4[1] + var4[2] + var4[3])<<8) / + pMB->rel_var8[5]; /* 4*(Var(Di)/Var(D)) */ + else + pMB->rel_var8[5] = 64; + } + + } + } +} + #if 0 #include @@ -1080,9 +1080,9 @@ } void -output_slice(IMAGE * cur, int std, int width, xvid_image_t* out_frm, int mbx, int mby,int mbl) { +output_slice(IMAGE * cur, int stride, int width, xvid_image_t* out_frm, int mbx, int mby,int mbl) { uint8_t *dY,*dU,*dV,*sY,*sU,*sV; - int std2 = std >> 1; + int stride2 = stride >> 1; int w = mbl << 4, w2,i; if(w > width) @@ -1092,24 +1092,24 @@ dY = (uint8_t*)out_frm->plane[0] + (mby << 4) * out_frm->stride[0] + (mbx << 4); dU = (uint8_t*)out_frm->plane[1] + (mby << 3) * out_frm->stride[1] + (mbx << 3); dV = (uint8_t*)out_frm->plane[2] + (mby << 3) * out_frm->stride[2] + (mbx << 3); - sY = cur->y + (mby << 4) * std + (mbx << 4); - sU = cur->u + (mby << 3) * std2 + (mbx << 3); - sV = cur->v + (mby << 3) * std2 + (mbx << 3); + sY = cur->y + (mby << 4) * stride + (mbx << 4); + sU = cur->u + (mby << 3) * stride2 + (mbx << 3); + sV = cur->v + (mby << 3) * stride2 + (mbx << 3); for(i = 0 ; i < 16 ; i++) { memcpy(dY,sY,w); dY += out_frm->stride[0]; - sY += std; + sY += stride; } for(i = 0 ; i < 8 ; i++) { memcpy(dU,sU,w2); dU += out_frm->stride[1]; - sU += std2; + sU += stride2; } for(i = 0 ; i < 8 ; i++) { memcpy(dV,sV,w2); dV += out_frm->stride[2]; - sV += std2; + sV += stride2; } } @@ -1140,77 +1140,69 @@ } } +/****************************************************************************/ -/* reduced resolution deblocking filter - block = block size (16=rrv, 8=full resolution) - flags = XVID_DEC_YDEBLOCK|XVID_DEC_UVDEBLOCK -*/ -void -image_deblock_rrv(IMAGE * img, int edged_width, - const MACROBLOCK * mbs, int mb_width, int mb_height, int mb_stride, - int block, int flags) -{ - const int edged_width2 = edged_width /2; - const int nblocks = block / 8; /* skals code uses 8pixel block uints */ - int i,j; - - /* luma: j,i in block units */ - - for (j = 1; j < mb_height*2; j++) /* horizontal deblocking */ - for (i = 0; i < mb_width*2; i++) - { - if (mbs[(j-1)/2*mb_stride + (i/2)].mode != MODE_NOT_CODED || - mbs[(j+0)/2*mb_stride + (i/2)].mode != MODE_NOT_CODED) - { - hfilter_31(img->y + (j*block - 1)*edged_width + i*block, - img->y + (j*block + 0)*edged_width + i*block, nblocks); - } - } - - for (j = 0; j < mb_height*2; j++) /* vertical deblocking */ - for (i = 1; i < mb_width*2; i++) - { - if (mbs[(j/2)*mb_stride + (i-1)/2].mode != MODE_NOT_CODED || - mbs[(j/2)*mb_stride + (i+0)/2].mode != MODE_NOT_CODED) - { - vfilter_31(img->y + (j*block)*edged_width + i*block - 1, - img->y + (j*block)*edged_width + i*block + 0, - edged_width, nblocks); - } - } - - +static void (*deintl_core)(uint8_t *, int width, int height, const int stride) = 0; +extern void xvid_deinterlace_sse(uint8_t *, int width, int height, const int stride); - /* chroma */ +#define CLIP_255(x) ( ((x)&~255) ? ((-(x)) >> (8*sizeof((x))-1))&0xff : (x) ) - for (j = 1; j < mb_height; j++) /* horizontal deblocking */ - for (i = 0; i < mb_width; i++) - { - if (mbs[(j-1)*mb_stride + i].mode != MODE_NOT_CODED || - mbs[(j+0)*mb_stride + i].mode != MODE_NOT_CODED) - { - hfilter_31(img->u + (j*block - 1)*edged_width2 + i*block, - img->u + (j*block + 0)*edged_width2 + i*block, nblocks); - hfilter_31(img->v + (j*block - 1)*edged_width2 + i*block, - img->v + (j*block + 0)*edged_width2 + i*block, nblocks); - } - } +static void deinterlace_c(uint8_t *pix, int width, int height, const int bps) +{ + pix += bps; + while(width-->0) + { + int p1 = pix[-bps]; + int p2 = pix[0]; + int p0 = p2; + int j = (height>>1) - 1; + int V; + unsigned char *P = pix++; + while(j-->0) + { + const int p3 = P[ bps]; + const int p4 = P[2*bps]; + V = ((p1+p3+1)>>1) + ((p2 - ((p0+p4+1)>>1)) >> 2); + P[0] = CLIP_255( V ); + p0 = p2; + p1 = p3; + p2 = p4; + P += 2*bps; + } + V = ((p1+p1+1)>>1) + ((p2 - ((p0+p2+1)>>1)) >> 2); + P[0] = CLIP_255( V ); + } +} +#undef CLIP_255 - for (j = 0; j < mb_height; j++) /* vertical deblocking */ - for (i = 1; i < mb_width; i++) +int xvid_image_deinterlace(xvid_image_t* img, int width, int height, int bottom_first) +{ + if (height&1) + return 0; + if (img->csp!=XVID_CSP_PLANAR && img->csp!=XVID_CSP_I420 && img->csp!=XVID_CSP_YV12) + return 0; /* not yet supported */ + if (deintl_core==0) { + deintl_core = deinterlace_c; +#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) { - if (mbs[j*mb_stride + i - 1].mode != MODE_NOT_CODED || - mbs[j*mb_stride + i + 0].mode != MODE_NOT_CODED) - { - vfilter_31(img->u + (j*block)*edged_width2 + i*block - 1, - img->u + (j*block)*edged_width2 + i*block + 0, - edged_width2, nblocks); - vfilter_31(img->v + (j*block)*edged_width2 + i*block - 1, - img->v + (j*block)*edged_width2 + i*block + 0, - edged_width2, nblocks); - } + int cpu_flags = check_cpu_features(); + if (cpu_flags & XVID_CPU_MMX) + deintl_core = xvid_deinterlace_sse; } +#endif + } + if (!bottom_first) { + deintl_core(img->plane[0], width, height, img->stride[0]); + deintl_core(img->plane[1], width>>1, height>>1, img->stride[1]); + deintl_core(img->plane[2], width>>1, height>>1, img->stride[2]); + } + else { + deintl_core((uint8_t *)img->plane[0] + ( height -1)*img->stride[0], width, height, -img->stride[0]); + deintl_core((uint8_t *)img->plane[1] + ((height>>1)-1)*img->stride[1], width>>1, height>>1, -img->stride[1]); + deintl_core((uint8_t *)img->plane[2] + ((height>>1)-1)*img->stride[2], width>>1, height>>1, -img->stride[2]); + } + emms(); - + return 1; }