--- trunk/xvidcore/src/image/image.c 2002/09/07 09:04:41 435 +++ trunk/xvidcore/src/image/image.c 2019/01/17 14:25:05 2173 @@ -1,46 +1,42 @@ -/***************************************************************************** +/************************************************************************** * * XVID MPEG-4 VIDEO CODEC - * - image module - + * - Image management functions - * - * Copyright(C) 2002 Peter Ross + * Copyright(C) 2001-2010 Peter Ross * - * This program is an implementation of a part of one or more MPEG-4 - * Video tools as specified in ISO/IEC 14496-2 standard. Those intending - * to use this software module in hardware or software products are - * advised that its use may infringe existing patents or copyrights, and - * any such use would be at such party's own risk. The original - * developer of this software module and his/her company, and subsequent - * editors and their companies, will have no liability for use of this - * software or modifications or derivatives thereof. - * - * This program is free software; you can redistribute it and/or modify + * This program is free software ; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or + * the Free Software Foundation ; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of + * but WITHOUT ANY WARRANTY ; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software + * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * + * $Id$ + * ****************************************************************************/ #include -#include // memcpy, memset +#include /* memcpy, memset */ #include - #include "../portab.h" -#include "../xvid.h" // XVID_CSP_XXX's +#include "../global.h" /* XVID_CSP_XXX's */ +#include "../xvid.h" /* XVID_CSP_XXX's */ #include "image.h" #include "colorspace.h" #include "interpolate8x8.h" -#include "../divx4.h" #include "../utils/mem_align.h" +#include "../motion/sad.h" +#include "../utils/emms.h" + +#include "font.h" /* XXX: remove later */ #define SAFETY 64 #define EDGE_SIZE2 (EDGE_SIZE/2) @@ -53,29 +49,31 @@ { const uint32_t edged_width2 = edged_width / 2; const uint32_t edged_height2 = edged_height / 2; - uint32_t i; image->y = xvid_malloc(edged_width * (edged_height + 1) + SAFETY, CACHE_LINE); if (image->y == NULL) { return -1; } - - for (i = 0; i < edged_width * edged_height + SAFETY; i++) { - image->y[i] = 0; - } + memset(image->y, 0, edged_width * (edged_height + 1) + SAFETY); image->u = xvid_malloc(edged_width2 * edged_height2 + SAFETY, CACHE_LINE); if (image->u == NULL) { xvid_free(image->y); + image->y = NULL; return -1; } + memset(image->u, 0, edged_width2 * edged_height2 + SAFETY); + image->v = xvid_malloc(edged_width2 * edged_height2 + SAFETY, CACHE_LINE); if (image->v == NULL) { xvid_free(image->u); + image->u = NULL; xvid_free(image->y); + image->y = NULL; return -1; } + memset(image->v, 0, edged_width2 * edged_height2 + SAFETY); image->y += EDGE_SIZE * edged_width + EDGE_SIZE; image->u += EDGE_SIZE2 * edged_width2 + EDGE_SIZE2; @@ -95,12 +93,15 @@ if (image->y) { xvid_free(image->y - (EDGE_SIZE * edged_width + EDGE_SIZE)); + image->y = NULL; } if (image->u) { xvid_free(image->u - (EDGE_SIZE2 * edged_width2 + EDGE_SIZE2)); + image->u = NULL; } if (image->v) { xvid_free(image->v - (EDGE_SIZE2 * edged_width2 + EDGE_SIZE2)); + image->v = NULL; } } @@ -109,19 +110,9 @@ image_swap(IMAGE * image1, IMAGE * image2) { - uint8_t *tmp; - - tmp = image1->y; - image1->y = image2->y; - image2->y = tmp; - - tmp = image1->u; - image1->u = image2->u; - image2->u = tmp; - - tmp = image1->v; - image1->v = image2->v; - image2->v = tmp; + SWAP(uint8_t*, image1->y, image2->y); + SWAP(uint8_t*, image1->u, image2->u); + SWAP(uint8_t*, image1->v, image2->v); } @@ -136,6 +127,10 @@ memcpy(image1->v, image2->v, edged_width * height / 4); } +/* setedges bug was in this BS versions */ +#define SETEDGES_BUG_BEFORE 18 +#define SETEDGES_BUG_AFTER 57 +#define SETEDGES_BUG_REFIXED 63 void image_setedges(IMAGE * image, @@ -143,31 +138,33 @@ uint32_t edged_height, uint32_t width, uint32_t height, - uint32_t interlacing) + int bs_version) { const uint32_t edged_width2 = edged_width / 2; - const uint32_t width2 = width / 2; + uint32_t width2; uint32_t i; uint8_t *dst; uint8_t *src; - dst = image->y - (EDGE_SIZE + EDGE_SIZE * edged_width); src = image->y; + /* According to the Standard Clause 7.6.4, padding is done starting at 16 + * pixel width and height multiples. This was not respected in old xvids */ + if ((bs_version >= SETEDGES_BUG_BEFORE && + bs_version < SETEDGES_BUG_AFTER) || + bs_version >= SETEDGES_BUG_REFIXED) { + width = (width+15)&~15; + height = (height+15)&~15; + } + + width2 = width/2; + for (i = 0; i < EDGE_SIZE; i++) { -/* // if interlacing, edges contain top-most data from each field - if (interlacing && (i & 1)) { - memset(dst, *(src + edged_width), EDGE_SIZE); - memcpy(dst + EDGE_SIZE, src + edged_width, width); - memset(dst + edged_width - EDGE_SIZE, - *(src + edged_width + width - 1), EDGE_SIZE); - } else {*/ - memset(dst, *src, EDGE_SIZE); - memcpy(dst + EDGE_SIZE, src, width); - memset(dst + edged_width - EDGE_SIZE, *(src + width - 1), - EDGE_SIZE); - /*}*/ + memset(dst, *src, EDGE_SIZE); + memcpy(dst + EDGE_SIZE, src, width); + memset(dst + edged_width - EDGE_SIZE, *(src + width - 1), + EDGE_SIZE); dst += edged_width; } @@ -180,23 +177,15 @@ src -= edged_width; for (i = 0; i < EDGE_SIZE; i++) { -/* // if interlacing, edges contain bottom-most data from each field - if (interlacing && !(i & 1)) { - memset(dst, *(src - edged_width), EDGE_SIZE); - memcpy(dst + EDGE_SIZE, src - edged_width, width); - memset(dst + edged_width - EDGE_SIZE, - *(src - edged_width + width - 1), EDGE_SIZE); - } else {*/ - memset(dst, *src, EDGE_SIZE); - memcpy(dst + EDGE_SIZE, src, width); - memset(dst + edged_width - EDGE_SIZE, *(src + width - 1), + memset(dst, *src, EDGE_SIZE); + memcpy(dst + EDGE_SIZE, src, width); + memset(dst + edged_width - EDGE_SIZE, *(src + width - 1), EDGE_SIZE); - /*}*/ dst += edged_width; } -//U + /* U */ dst = image->u - (EDGE_SIZE2 + EDGE_SIZE2 * edged_width2); src = image->u; @@ -224,7 +213,7 @@ } -// V + /* V */ dst = image->v - (EDGE_SIZE2 + EDGE_SIZE2 * edged_width2); src = image->v; @@ -252,186 +241,378 @@ } } -// bframe encoding requires image-based u,v interpolation void -image_interpolate(const IMAGE * refn, - IMAGE * refh, - IMAGE * refv, - IMAGE * refhv, +image_interpolate(const uint8_t * refn, + uint8_t * refh, + uint8_t * refv, + uint8_t * refhv, uint32_t edged_width, uint32_t edged_height, + uint32_t quarterpel, uint32_t rounding) { - const uint32_t offset = EDGE_SIZE * (edged_width + 1); + const uint32_t offset = EDGE_SIZE2 * (edged_width + 1); /* we only interpolate half of the edge area */ const uint32_t stride_add = 7 * edged_width; - uint8_t *n_ptr, *h_ptr, *v_ptr, *hv_ptr; + uint8_t *n_ptr; + uint8_t *h_ptr, *v_ptr, *hv_ptr; uint32_t x, y; - - n_ptr = refn->y; - h_ptr = refh->y; - v_ptr = refv->y; - hv_ptr = refhv->y; + n_ptr = (uint8_t*)refn; + h_ptr = refh; + v_ptr = refv; n_ptr -= offset; h_ptr -= offset; v_ptr -= offset; - hv_ptr -= offset; - for (y = 0; y < edged_height; y = y + 8) { - for (x = 0; x < edged_width; x = x + 8) { - interpolate8x8_halfpel_h(h_ptr, n_ptr, edged_width, rounding); - interpolate8x8_halfpel_v(v_ptr, n_ptr, edged_width, rounding); - interpolate8x8_halfpel_hv(hv_ptr, n_ptr, edged_width, rounding); - - n_ptr += 8; - h_ptr += 8; - v_ptr += 8; - hv_ptr += 8; - } - h_ptr += stride_add; - v_ptr += stride_add; - hv_ptr += stride_add; - n_ptr += stride_add; - } - - /* - interpolate_halfpel_h( - refh->y - offset, - refn->y - offset, - edged_width, edged_height, - rounding); - - interpolate_halfpel_v( - refv->y - offset, - refn->y - offset, - edged_width, edged_height, - rounding); - - interpolate_halfpel_hv( - refhv->y - offset, - refn->y - offset, - edged_width, edged_height, - rounding); - */ - - /* uv-image-based compensation - offset = EDGE_SIZE2 * (edged_width / 2 + 1); - - interpolate_halfpel_h( - refh->u - offset, - refn->u - offset, - edged_width / 2, edged_height / 2, - rounding); - - interpolate_halfpel_v( - refv->u - offset, - refn->u - offset, - edged_width / 2, edged_height / 2, - rounding); - - interpolate_halfpel_hv( - refhv->u - offset, - refn->u - offset, - edged_width / 2, edged_height / 2, - rounding); - - - interpolate_halfpel_h( - refh->v - offset, - refn->v - offset, - edged_width / 2, edged_height / 2, - rounding); - - interpolate_halfpel_v( - refv->v - offset, - refn->v - offset, - edged_width / 2, edged_height / 2, - rounding); - - interpolate_halfpel_hv( - refhv->v - offset, - refn->v - offset, - edged_width / 2, edged_height / 2, - rounding); - */ + /* Note we initialize the hv pointer later, as we can optimize code a bit + * doing it down to up in quarterpel and up to down in halfpel */ + if(quarterpel) { + + for (y = 0; y < (edged_height - EDGE_SIZE); y += 8) { + for (x = 0; x < (edged_width - EDGE_SIZE); x += 8) { + interpolate8x8_6tap_lowpass_h(h_ptr, n_ptr, edged_width, rounding); + interpolate8x8_6tap_lowpass_v(v_ptr, n_ptr, edged_width, rounding); + + n_ptr += 8; + h_ptr += 8; + v_ptr += 8; + } + + n_ptr += EDGE_SIZE; + h_ptr += EDGE_SIZE; + v_ptr += EDGE_SIZE; + + h_ptr += stride_add; + v_ptr += stride_add; + n_ptr += stride_add; + } + + h_ptr = refh + (edged_height - EDGE_SIZE - EDGE_SIZE2)*edged_width - EDGE_SIZE2; + hv_ptr = refhv + (edged_height - EDGE_SIZE - EDGE_SIZE2)*edged_width - EDGE_SIZE2; + + for (y = 0; y < (edged_height - EDGE_SIZE); y = y + 8) { + hv_ptr -= stride_add; + h_ptr -= stride_add; + hv_ptr -= EDGE_SIZE; + h_ptr -= EDGE_SIZE; + + for (x = 0; x < (edged_width - EDGE_SIZE); x = x + 8) { + hv_ptr -= 8; + h_ptr -= 8; + interpolate8x8_6tap_lowpass_v(hv_ptr, h_ptr, edged_width, rounding); + } + } + } else { + + hv_ptr = refhv; + hv_ptr -= offset; + + for (y = 0; y < (edged_height - EDGE_SIZE); y += 8) { + for (x = 0; x < (edged_width - EDGE_SIZE); x += 8) { + interpolate8x8_halfpel_h(h_ptr, n_ptr, edged_width, rounding); + interpolate8x8_halfpel_v(v_ptr, n_ptr, edged_width, rounding); + interpolate8x8_halfpel_hv(hv_ptr, n_ptr, edged_width, rounding); + + n_ptr += 8; + h_ptr += 8; + v_ptr += 8; + hv_ptr += 8; + } + + h_ptr += EDGE_SIZE; + v_ptr += EDGE_SIZE; + hv_ptr += EDGE_SIZE; + n_ptr += EDGE_SIZE; + + h_ptr += stride_add; + v_ptr += stride_add; + hv_ptr += stride_add; + n_ptr += stride_add; + } + } +} + + +/* +chroma optimize filter, invented by mf +a chroma pixel is average from the surrounding pixels, when the +correpsonding luma pixels are pure black or white. +*/ + +void +image_chroma_optimize(IMAGE * img, int width, int height, int edged_width) +{ + int x,y; + int pixels = 0; + + for (y = 1; y < height/2 - 1; y++) + for (x = 1; x < width/2 - 1; x++) + { +#define IS_PURE(a) ((a)<=16||(a)>=235) +#define IMG_Y(Y,X) img->y[(Y)*edged_width + (X)] +#define IMG_U(Y,X) img->u[(Y)*edged_width/2 + (X)] +#define IMG_V(Y,X) img->v[(Y)*edged_width/2 + (X)] + + if (IS_PURE(IMG_Y(y*2 ,x*2 )) && + IS_PURE(IMG_Y(y*2 ,x*2+1)) && + IS_PURE(IMG_Y(y*2+1,x*2 )) && + IS_PURE(IMG_Y(y*2+1,x*2+1))) + { + IMG_U(y,x) = (IMG_U(y,x-1) + IMG_U(y-1, x) + IMG_U(y, x+1) + IMG_U(y+1, x)) / 4; + IMG_V(y,x) = (IMG_V(y,x-1) + IMG_V(y-1, x) + IMG_V(y, x+1) + IMG_V(y+1, x)) / 4; + pixels++; + } + +#undef IS_PURE +#undef IMG_Y +#undef IMG_U +#undef IMG_V + } + + DPRINTF(XVID_DEBUG_DEBUG,"chroma_optimized_pixels = %i/%i\n", pixels, width*height/4); +} + + + + + +/* + perform safe packed colorspace conversion, by splitting + the image up into an optimized area (pixel width divisible by 16), + and two unoptimized/plain-c areas (pixel width divisible by 2) +*/ + +static void +safe_packed_conv(uint8_t * x_ptr, int x_stride, + uint8_t * y_ptr, uint8_t * u_ptr, uint8_t * v_ptr, + int y_stride, int uv_stride, + int width, int height, int vflip, + packedFunc * func_opt, packedFunc func_c, + int size, int interlacing) +{ + int width_opt, width_c, height_opt; + + if (width<0 || width==1 || height==1) return; /* forget about it */ + + if (func_opt != func_c && x_stride < size*((width+15)/16)*16) + { + width_opt = width & (~15); + width_c = (width - width_opt) & (~1); + } + else if (func_opt != func_c && !(width&1) && (size==3)) + { + /* MMX reads 4 bytes per pixel for RGB/BGR */ + width_opt = width - 2; + width_c = 2; + } + else { + /* Enforce the width to be divisable by two. */ + width_opt = width & (~1); + width_c = 0; + } + + /* packed conversions require height to be divisable by 2 + (or even by 4 for interlaced conversion) */ + if (interlacing) + height_opt = height & (~3); + else + height_opt = height & (~1); + + func_opt(x_ptr, x_stride, + y_ptr, u_ptr, v_ptr, y_stride, uv_stride, + width_opt, height_opt, vflip); + + if (width_c) + { + func_c(x_ptr + size*width_opt, x_stride, + y_ptr + width_opt, u_ptr + width_opt/2, v_ptr + width_opt/2, + y_stride, uv_stride, width_c, height_opt, vflip); + } } + int image_input(IMAGE * image, uint32_t width, int height, uint32_t edged_width, - uint8_t * src, - int csp) + uint8_t * src[4], + int src_stride[4], + int csp, + int interlacing) { - -/* if (csp & XVID_CSP_VFLIP) - { - height = -height; - } -*/ + const int edged_width2 = edged_width/2; + const int width2 = width/2; + const int height2 = height/2; +#if 0 + const int height_signed = (csp & XVID_CSP_VFLIP) ? -height : height; +#endif switch (csp & ~XVID_CSP_VFLIP) { case XVID_CSP_RGB555: - rgb555_to_yv12(image->y, image->u, image->v, src, width, height, - edged_width); - return 0; + safe_packed_conv( + src[0], src_stride[0], image->y, image->u, image->v, + edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), + interlacing?rgb555i_to_yv12 :rgb555_to_yv12, + interlacing?rgb555i_to_yv12_c:rgb555_to_yv12_c, 2, interlacing); + break; case XVID_CSP_RGB565: - rgb565_to_yv12(image->y, image->u, image->v, src, width, height, - edged_width); - return 0; + safe_packed_conv( + src[0], src_stride[0], image->y, image->u, image->v, + edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), + interlacing?rgb565i_to_yv12 :rgb565_to_yv12, + interlacing?rgb565i_to_yv12_c:rgb565_to_yv12_c, 2, interlacing); + break; - case XVID_CSP_RGB24: - rgb24_to_yv12(image->y, image->u, image->v, src, width, height, - edged_width); - return 0; + case XVID_CSP_BGR: + safe_packed_conv( + src[0], src_stride[0], image->y, image->u, image->v, + edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), + interlacing?bgri_to_yv12 :bgr_to_yv12, + interlacing?bgri_to_yv12_c:bgr_to_yv12_c, 3, interlacing); + break; - case XVID_CSP_RGB32: - rgb32_to_yv12(image->y, image->u, image->v, src, width, height, - edged_width); - return 0; + case XVID_CSP_BGRA: + safe_packed_conv( + src[0], src_stride[0], image->y, image->u, image->v, + edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), + interlacing?bgrai_to_yv12 :bgra_to_yv12, + interlacing?bgrai_to_yv12_c:bgra_to_yv12_c, 4, interlacing); + break; - case XVID_CSP_I420: - yuv_to_yv12(image->y, image->u, image->v, src, width, height, - edged_width); - return 0; + case XVID_CSP_ABGR : + safe_packed_conv( + src[0], src_stride[0], image->y, image->u, image->v, + edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), + interlacing?abgri_to_yv12 :abgr_to_yv12, + interlacing?abgri_to_yv12_c:abgr_to_yv12_c, 4, interlacing); + break; - case XVID_CSP_YV12: /* u/v swapped */ - yuv_to_yv12(image->y, image->v, image->u, src, width, height, - edged_width); - return 0; + case XVID_CSP_RGB: + safe_packed_conv( + src[0], src_stride[0], image->y, image->u, image->v, + edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), + interlacing?rgbi_to_yv12 :rgb_to_yv12, + interlacing?rgbi_to_yv12_c:rgb_to_yv12_c, 3, interlacing); + break; + + case XVID_CSP_RGBA : + safe_packed_conv( + src[0], src_stride[0], image->y, image->u, image->v, + edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), + interlacing?rgbai_to_yv12 :rgba_to_yv12, + interlacing?rgbai_to_yv12_c:rgba_to_yv12_c, 4, interlacing); + break; + + case XVID_CSP_ARGB: + safe_packed_conv( + src[0], src_stride[0], image->y, image->u, image->v, + edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), + interlacing?argbi_to_yv12 : argb_to_yv12, + interlacing?argbi_to_yv12_c: argb_to_yv12_c, 4, interlacing); + break; case XVID_CSP_YUY2: - yuyv_to_yv12(image->y, image->u, image->v, src, width, height, - edged_width); - return 0; + safe_packed_conv( + src[0], src_stride[0], image->y, image->u, image->v, + edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), + interlacing?yuyvi_to_yv12 :yuyv_to_yv12, + interlacing?yuyvi_to_yv12_c:yuyv_to_yv12_c, 2, interlacing); + break; case XVID_CSP_YVYU: /* u/v swapped */ - yuyv_to_yv12(image->y, image->v, image->u, src, width, height, - edged_width); - return 0; + safe_packed_conv( + src[0], src_stride[0], image->y, image->v, image->u, + edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), + interlacing?yuyvi_to_yv12 :yuyv_to_yv12, + interlacing?yuyvi_to_yv12_c:yuyv_to_yv12_c, 2, interlacing); + break; case XVID_CSP_UYVY: - uyvy_to_yv12(image->y, image->u, image->v, src, width, height, - edged_width); - return 0; + safe_packed_conv( + src[0], src_stride[0], image->y, image->u, image->v, + edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), + interlacing?uyvyi_to_yv12 :uyvy_to_yv12, + interlacing?uyvyi_to_yv12_c:uyvy_to_yv12_c, 2, interlacing); + break; - case XVID_CSP_USER: - user_to_yuv_c(image->y, image->u, image->v, edged_width, - (DEC_PICTURE *) src, width, height); - return 0; + case XVID_CSP_I420: /* YCbCr == YUV == internal colorspace for MPEG */ + yv12_to_yv12(image->y, image->u, image->v, edged_width, edged_width2, + src[0], src[0] + src_stride[0]*height, src[0] + src_stride[0]*height + (src_stride[0]/2)*height2, + src_stride[0], src_stride[0]/2, width, height, (csp & XVID_CSP_VFLIP)); + break; + + case XVID_CSP_YV12: /* YCrCb == YVA == U and V plane swapped */ + yv12_to_yv12(image->y, image->v, image->u, edged_width, edged_width2, + src[0], src[0] + src_stride[0]*height, src[0] + src_stride[0]*height + (src_stride[0]/2)*height2, + src_stride[0], src_stride[0]/2, width, height, (csp & XVID_CSP_VFLIP)); + break; + + case XVID_CSP_PLANAR: /* YCbCr with arbitrary pointers and different strides for Y and UV */ + yv12_to_yv12(image->y, image->u, image->v, edged_width, edged_width2, + src[0], src[1], src[2], src_stride[0], src_stride[1], /* v: dst_stride[2] not yet supported */ + width, height, (csp & XVID_CSP_VFLIP)); + break; case XVID_CSP_NULL: break; + default : + return -1; } - return -1; + + /* pad out image when the width and/or height is not a multiple of 16 */ + + if (width & 15) + { + int i; + int pad_width = 16 - (width&15); + for (i = 0; i < height; i++) + { + memset(image->y + i*edged_width + width, + *(image->y + i*edged_width + width - 1), pad_width); + } + for (i = 0; i < height/2; i++) + { + memset(image->u + i*edged_width2 + width2, + *(image->u + i*edged_width2 + width2 - 1),pad_width/2); + memset(image->v + i*edged_width2 + width2, + *(image->v + i*edged_width2 + width2 - 1),pad_width/2); + } + } + + if (height & 15) + { + int pad_height = 16 - (height&15); + int length = ((width+15)/16)*16; + int i; + for (i = 0; i < pad_height; i++) + { + memcpy(image->y + (height+i)*edged_width, + image->y + (height-1)*edged_width,length); + } + + for (i = 0; i < pad_height/2; i++) + { + memcpy(image->u + (height2+i)*edged_width2, + image->u + (height2-1)*edged_width2,length/2); + memcpy(image->v + (height2+i)*edged_width2, + image->v + (height2-1)*edged_width2,length/2); + } + } + +/* + if (interlacing) + image_printf(image, edged_width, height, 5,5, "[i]"); + image_dump_yuvpgm(image, edged_width, ((width+15)/16)*16, ((height+15)/16)*16, "\\encode.pgm"); +*/ + return 0; } @@ -441,70 +622,141 @@ uint32_t width, int height, uint32_t edged_width, - uint8_t * dst, - uint32_t dst_stride, - int csp) + uint8_t * dst[4], + int dst_stride[4], + int csp, + int interlacing) { - if (csp & XVID_CSP_VFLIP) { - height = -height; - } + const int edged_width2 = edged_width/2; + int height2 = height/2; + +/* + if (interlacing) + image_printf(image, edged_width, height, 5,100, "[i]=%i,%i",width,height); + image_dump_yuvpgm(image, edged_width, width, height, "\\decode.pgm"); +*/ switch (csp & ~XVID_CSP_VFLIP) { case XVID_CSP_RGB555: - yv12_to_rgb555(dst, dst_stride, image->y, image->u, image->v, - edged_width, edged_width / 2, width, height); + safe_packed_conv( + dst[0], dst_stride[0], image->y, image->u, image->v, + edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), + interlacing?yv12_to_rgb555i :yv12_to_rgb555, + interlacing?yv12_to_rgb555i_c:yv12_to_rgb555_c, 2, interlacing); return 0; case XVID_CSP_RGB565: - yv12_to_rgb565(dst, dst_stride, image->y, image->u, image->v, - edged_width, edged_width / 2, width, height); - return 0; - - case XVID_CSP_RGB24: - yv12_to_rgb24(dst, dst_stride, image->y, image->u, image->v, - edged_width, edged_width / 2, width, height); - return 0; - - case XVID_CSP_RGB32: - yv12_to_rgb32(dst, dst_stride, image->y, image->u, image->v, - edged_width, edged_width / 2, width, height); - return 0; - - case XVID_CSP_I420: - yv12_to_yuv(dst, dst_stride, image->y, image->u, image->v, edged_width, - edged_width / 2, width, height); - return 0; - - case XVID_CSP_YV12: // u,v swapped - yv12_to_yuv(dst, dst_stride, image->y, image->v, image->u, edged_width, - edged_width / 2, width, height); + safe_packed_conv( + dst[0], dst_stride[0], image->y, image->u, image->v, + edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), + interlacing?yv12_to_rgb565i :yv12_to_rgb565, + interlacing?yv12_to_rgb565i_c:yv12_to_rgb565_c, 2, interlacing); + return 0; + + case XVID_CSP_BGR: + safe_packed_conv( + dst[0], dst_stride[0], image->y, image->u, image->v, + edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), + interlacing?yv12_to_bgri :yv12_to_bgr, + interlacing?yv12_to_bgri_c:yv12_to_bgr_c, 3, interlacing); + return 0; + + case XVID_CSP_BGRA: + safe_packed_conv( + dst[0], dst_stride[0], image->y, image->u, image->v, + edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), + interlacing?yv12_to_bgrai :yv12_to_bgra, + interlacing?yv12_to_bgrai_c:yv12_to_bgra_c, 4, interlacing); + return 0; + + case XVID_CSP_ABGR: + safe_packed_conv( + dst[0], dst_stride[0], image->y, image->u, image->v, + edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), + interlacing?yv12_to_abgri :yv12_to_abgr, + interlacing?yv12_to_abgri_c:yv12_to_abgr_c, 4, interlacing); + return 0; + + case XVID_CSP_RGB: + safe_packed_conv( + dst[0], dst_stride[0], image->y, image->u, image->v, + edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), + interlacing?yv12_to_rgbi :yv12_to_rgb, + interlacing?yv12_to_rgbi_c:yv12_to_rgb_c, 3, interlacing); + return 0; + + case XVID_CSP_RGBA: + safe_packed_conv( + dst[0], dst_stride[0], image->y, image->u, image->v, + edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), + interlacing?yv12_to_rgbai :yv12_to_rgba, + interlacing?yv12_to_rgbai_c:yv12_to_rgba_c, 4, interlacing); + return 0; + + case XVID_CSP_ARGB: + safe_packed_conv( + dst[0], dst_stride[0], image->y, image->u, image->v, + edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), + interlacing?yv12_to_argbi :yv12_to_argb, + interlacing?yv12_to_argbi_c:yv12_to_argb_c, 4, interlacing); return 0; case XVID_CSP_YUY2: - yv12_to_yuyv(dst, dst_stride, image->y, image->u, image->v, - edged_width, edged_width / 2, width, height); + safe_packed_conv( + dst[0], dst_stride[0], image->y, image->u, image->v, + edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), + interlacing?yv12_to_yuyvi :yv12_to_yuyv, + interlacing?yv12_to_yuyvi_c:yv12_to_yuyv_c, 2, interlacing); return 0; - case XVID_CSP_YVYU: // u,v swapped - yv12_to_yuyv(dst, dst_stride, image->y, image->v, image->u, - edged_width, edged_width / 2, width, height); + case XVID_CSP_YVYU: /* u,v swapped */ + safe_packed_conv( + dst[0], dst_stride[0], image->y, image->v, image->u, + edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), + interlacing?yv12_to_yuyvi :yv12_to_yuyv, + interlacing?yv12_to_yuyvi_c:yv12_to_yuyv_c, 2, interlacing); return 0; case XVID_CSP_UYVY: - yv12_to_uyvy(dst, dst_stride, image->y, image->u, image->v, - edged_width, edged_width / 2, width, height); - return 0; - - case XVID_CSP_USER: - ((DEC_PICTURE *) dst)->y = image->y; - ((DEC_PICTURE *) dst)->u = image->u; - ((DEC_PICTURE *) dst)->v = image->v; - ((DEC_PICTURE *) dst)->stride_y = edged_width; - ((DEC_PICTURE *) dst)->stride_uv = edged_width / 2; + safe_packed_conv( + dst[0], dst_stride[0], image->y, image->u, image->v, + edged_width, edged_width2, width, height, (csp & XVID_CSP_VFLIP), + interlacing?yv12_to_uyvyi :yv12_to_uyvy, + interlacing?yv12_to_uyvyi_c:yv12_to_uyvy_c, 2, interlacing); + return 0; + + case XVID_CSP_I420: /* YCbCr == YUV == internal colorspace for MPEG */ + yv12_to_yv12(dst[0], dst[0] + dst_stride[0]*height, dst[0] + dst_stride[0]*height + (dst_stride[0]/2)*height2, + dst_stride[0], dst_stride[0]/2, + image->y, image->u, image->v, edged_width, edged_width2, + width, height, (csp & XVID_CSP_VFLIP)); + return 0; + + case XVID_CSP_YV12: /* YCrCb == YVU == U and V plane swapped */ + yv12_to_yv12(dst[0], dst[0] + dst_stride[0]*height, dst[0] + dst_stride[0]*height + (dst_stride[0]/2)*height2, + dst_stride[0], dst_stride[0]/2, + image->y, image->v, image->u, edged_width, edged_width2, + width, height, (csp & XVID_CSP_VFLIP)); + return 0; + + case XVID_CSP_PLANAR: /* YCbCr with arbitrary pointers and different strides for Y and UV */ + yv12_to_yv12(dst[0], dst[1], dst[2], + dst_stride[0], dst_stride[1], /* v: dst_stride[2] not yet supported */ + image->y, image->u, image->v, edged_width, edged_width2, + width, height, (csp & XVID_CSP_VFLIP)); + return 0; + + case XVID_CSP_INTERNAL : + dst[0] = image->y; + dst[1] = image->u; + dst[2] = image->v; + dst_stride[0] = edged_width; + dst_stride[1] = edged_width/2; + dst_stride[2] = edged_width/2; return 0; case XVID_CSP_NULL: - case XVID_CSP_EXTERN: + case XVID_CSP_SLICE: return 0; } @@ -544,7 +796,169 @@ return psnr_y; } -/* + +float sse_to_PSNR(long sse, int pixels) +{ + if (sse==0) + return 99.99F; + + return 48.131F - 10*(float)log10((float)sse/(float)(pixels)); /* log10(255*255)=4.8131 */ + +} + +long plane_sse(uint8_t *orig, + uint8_t *recon, + uint16_t stride, + uint16_t width, + uint16_t height) +{ + int y, bwidth, bheight; + long sse = 0; + + bwidth = width & (~0x07); + bheight = height & (~0x07); + + /* Compute the 8x8 integer part */ + for (y = 0; yy; + uint8_t *orig_u = orig_image->u; + uint8_t *orig_v = orig_image->v; + + for (y = 0; y < mb_height; y++) { + for (x = 0; x < mb_width; x++) { + MACROBLOCK *pMB = &mbs[x + y * mb_width]; + uint32_t var4[4]; + uint32_t sum = 0, square = 0; + + /* y-blocks */ + for (j = 0; j < 2; j++) { + for (i = 0; i < 2; i++) { + int lsum = blocksum8(orig_y + ((y<<4) + (j<<3))*stride + (x<<4) + (i<<3), + stride, sums, squares); + int lsquare = (squares[0] + squares[1] + squares[2] + squares[3])<<6; + + sum += lsum; + square += lsquare; + + var4[0] = (squares[0]<<4) - sums[0]*sums[0]; + var4[1] = (squares[1]<<4) - sums[1]*sums[1]; + var4[2] = (squares[2]<<4) - sums[2]*sums[2]; + var4[3] = (squares[3]<<4) - sums[3]*sums[3]; + + pMB->rel_var8[j*2 + i] = lsquare - lsum*lsum; + if (pMB->rel_var8[j*2 + i]) + pMB->rel_var8[j*2 + i] = ((var4[0] + var4[1] + var4[2] + var4[3])<<8) / + pMB->rel_var8[j*2 + i]; /* 4*(Var(Di)/Var(D)) */ + else + pMB->rel_var8[j*2 + i] = 64; + } + } + + /* u */ + { + int lsum = blocksum8(orig_u + (y<<3)*(stride>>1) + (x<<3), + stride, sums, squares); + int lsquare = (squares[0] + squares[1] + squares[2] + squares[3])<<6; + + sum += lsum; + square += lsquare; + + var4[0] = (squares[0]<<4) - sums[0]*sums[0]; + var4[1] = (squares[1]<<4) - sums[1]*sums[1]; + var4[2] = (squares[2]<<4) - sums[2]*sums[2]; + var4[3] = (squares[3]<<4) - sums[3]*sums[3]; + + pMB->rel_var8[4] = lsquare - lsum*lsum; + if (pMB->rel_var8[4]) + pMB->rel_var8[4] = ((var4[0] + var4[1] + var4[2] + var4[3])<<8) / + pMB->rel_var8[4]; /* 4*(Var(Di)/Var(D)) */ + else + pMB->rel_var8[4] = 64; + } + + /* v */ + { + int lsum = blocksum8(orig_v + (y<<3)*(stride>>1) + (x<<3), + stride, sums, squares); + int lsquare = (squares[0] + squares[1] + squares[2] + squares[3])<<6; + + sum += lsum; + square += lsquare; + + var4[0] = (squares[0]<<4) - sums[0]*sums[0]; + var4[1] = (squares[1]<<4) - sums[1]*sums[1]; + var4[2] = (squares[2]<<4) - sums[2]*sums[2]; + var4[3] = (squares[3]<<4) - sums[3]*sums[3]; + + pMB->rel_var8[5] = lsquare - lsum*lsum; + if (pMB->rel_var8[5]) + pMB->rel_var8[5] = ((var4[0] + var4[1] + var4[2] + var4[3])<<8) / + pMB->rel_var8[5]; /* 4*(Var(Di)/Var(D)) */ + else + pMB->rel_var8[5] = 64; + } + + } + } +} + +#if 0 #include #include @@ -553,7 +967,7 @@ { FILE * f; char hdr[1024]; - + f = fopen(filename, "wb"); if ( f == NULL) { @@ -568,7 +982,7 @@ } -// dump image+edges to yuv pgm files +/* dump image+edges to yuv pgm files */ int image_dump(IMAGE * image, uint32_t edged_width, uint32_t edged_height, char * path, int number) { @@ -591,7 +1005,7 @@ return 0; } -*/ +#endif @@ -640,7 +1054,6 @@ } -#define ABS(X) (((X)>0)?(X):-(X)) float image_mad(const IMAGE * img1, const IMAGE * img2, @@ -657,49 +1070,143 @@ for (y = 0; y < height; y++) for (x = 0; x < width; x++) - sum += ABS(img1->y[x + y * stride] - img2->y[x + y * stride]); + sum += abs(img1->y[x + y * stride] - img2->y[x + y * stride]); for (y = 0; y < height2; y++) for (x = 0; x < width2; x++) - sum += ABS(img1->u[x + y * stride2] - img2->u[x + y * stride2]); + sum += abs(img1->u[x + y * stride2] - img2->u[x + y * stride2]); for (y = 0; y < height2; y++) for (x = 0; x < width2; x++) - sum += ABS(img1->v[x + y * stride2] - img2->v[x + y * stride2]); + sum += abs(img1->v[x + y * stride2] - img2->v[x + y * stride2]); return (float) sum / (width * height * 3 / 2); } void -output_slice(IMAGE * cur, int std, int width, XVID_DEC_PICTURE* out_frm, int mbx, int mby,int mbl) { +output_slice(IMAGE * cur, int stride, int width, xvid_image_t* out_frm, int mbx, int mby,int mbl) { uint8_t *dY,*dU,*dV,*sY,*sU,*sV; - int std2 = std >> 1; + int stride2 = stride >> 1; int w = mbl << 4, w2,i; if(w > width) w = width; w2 = w >> 1; - dY = (uint8_t*)out_frm->y + (mby << 4) * out_frm->stride_y + (mbx << 4); - dU = (uint8_t*)out_frm->u + (mby << 3) * out_frm->stride_u + (mbx << 3); - dV = (uint8_t*)out_frm->v + (mby << 3) * out_frm->stride_v + (mbx << 3); - sY = cur->y + (mby << 4) * std + (mbx << 4); - sU = cur->u + (mby << 3) * std2 + (mbx << 3); - sV = cur->v + (mby << 3) * std2 + (mbx << 3); + dY = (uint8_t*)out_frm->plane[0] + (mby << 4) * out_frm->stride[0] + (mbx << 4); + dU = (uint8_t*)out_frm->plane[1] + (mby << 3) * out_frm->stride[1] + (mbx << 3); + dV = (uint8_t*)out_frm->plane[2] + (mby << 3) * out_frm->stride[2] + (mbx << 3); + sY = cur->y + (mby << 4) * stride + (mbx << 4); + sU = cur->u + (mby << 3) * stride2 + (mbx << 3); + sV = cur->v + (mby << 3) * stride2 + (mbx << 3); for(i = 0 ; i < 16 ; i++) { memcpy(dY,sY,w); - dY += out_frm->stride_y; - sY += std; + dY += out_frm->stride[0]; + sY += stride; } for(i = 0 ; i < 8 ; i++) { memcpy(dU,sU,w2); - dU += out_frm->stride_u; - sU += std2; + dU += out_frm->stride[1]; + sU += stride2; } for(i = 0 ; i < 8 ; i++) { memcpy(dV,sV,w2); - dV += out_frm->stride_v; - sV += std2; + dV += out_frm->stride[2]; + sV += stride2; } } + + +void +image_clear(IMAGE * img, int width, int height, int edged_width, + int y, int u, int v) +{ + uint8_t * p; + int i; + + p = img->y; + for (i = 0; i < height; i++) { + memset(p, y, width); + p += edged_width; + } + + p = img->u; + for (i = 0; i < height/2; i++) { + memset(p, u, width/2); + p += edged_width/2; + } + + p = img->v; + for (i = 0; i < height/2; i++) { + memset(p, v, width/2); + p += edged_width/2; + } +} + +/****************************************************************************/ + +static void (*deintl_core)(uint8_t *, int width, int height, const int stride) = 0; +extern void xvid_deinterlace_sse(uint8_t *, int width, int height, const int stride); + +#define CLIP_255(x) ( ((x)&~255) ? ((-(x)) >> (8*sizeof((x))-1))&0xff : (x) ) + +static void deinterlace_c(uint8_t *pix, int width, int height, const int bps) +{ + pix += bps; + while(width-->0) + { + int p1 = pix[-bps]; + int p2 = pix[0]; + int p0 = p2; + int j = (height>>1) - 1; + int V; + unsigned char *P = pix++; + while(j-->0) + { + const int p3 = P[ bps]; + const int p4 = P[2*bps]; + V = ((p1+p3+1)>>1) + ((p2 - ((p0+p4+1)>>1)) >> 2); + P[0] = CLIP_255( V ); + p0 = p2; + p1 = p3; + p2 = p4; + P += 2*bps; + } + V = ((p1+p1+1)>>1) + ((p2 - ((p0+p2+1)>>1)) >> 2); + P[0] = CLIP_255( V ); + } +} +#undef CLIP_255 + +int xvid_image_deinterlace(xvid_image_t* img, int width, int height, int bottom_first) +{ + if (height&1) + return 0; + if (img->csp!=XVID_CSP_PLANAR && img->csp!=XVID_CSP_I420 && img->csp!=XVID_CSP_YV12) + return 0; /* not yet supported */ + if (deintl_core==0) { + deintl_core = deinterlace_c; +#if defined(ARCH_IS_IA32) || defined(ARCH_IS_X86_64) + { + int cpu_flags = check_cpu_features(); + if (cpu_flags & XVID_CPU_MMX) + deintl_core = xvid_deinterlace_sse; + } +#endif + } + if (!bottom_first) { + deintl_core(img->plane[0], width, height, img->stride[0]); + deintl_core(img->plane[1], width>>1, height>>1, img->stride[1]); + deintl_core(img->plane[2], width>>1, height>>1, img->stride[2]); + } + else { + deintl_core((uint8_t *)img->plane[0] + ( height -1)*img->stride[0], width, height, -img->stride[0]); + deintl_core((uint8_t *)img->plane[1] + ((height>>1)-1)*img->stride[1], width>>1, height>>1, -img->stride[1]); + deintl_core((uint8_t *)img->plane[2] + ((height>>1)-1)*img->stride[2], width>>1, height>>1, -img->stride[2]); + } + emms(); + + return 1; +} +