--- branches/dev-api-4/xvidcore/src/motion/motion_est.c 2003/06/09 01:25:19 1053 +++ branches/dev-api-4/xvidcore/src/motion/motion_est.c 2003/06/26 11:50:37 1075 @@ -1,32 +1,29 @@ -/************************************************************************** +/***************************************************************************** + * + * XVID MPEG-4 VIDEO CODEC + * - Motion Estimation related code - * - * XVID MPEG-4 VIDEO CODEC - * motion estimation + * Copyright(C) 2002 Christoph Lampert + * 2002 Michael Militzer + * 2002-2003 Radoslaw Czyz * - * This program is an implementation of a part of one or more MPEG-4 - * Video tools as specified in ISO/IEC 14496-2 standard. Those intending - * to use this software module in hardware or software products are - * advised that its use may infringe existing patents or copyrights, and - * any such use would be at such party's own risk. The original - * developer of this software module and his/her company, and subsequent - * editors and their companies, will have no liability for use of this - * software or modifications or derivatives thereof. + * This program is free software ; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation ; either version 2 of the License, or + * (at your option) any later version. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY ; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. + * You should have received a copy of the GNU General Public License + * along with this program ; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + * $Id: motion_est.c,v 1.58.2.19 2003-06-26 11:50:37 syskin Exp $ * - *************************************************************************/ + ****************************************************************************/ #include #include @@ -670,11 +667,20 @@ for(i = 0; i < 4; i++) { int s = 8*((i&1) + (i>>1)*data->iEdgedWidth); transfer_8to16subro(in, data->Cur + s, ptr + s, data->iEdgedWidth); - bits += data->temp[i] = Block_CalcBits(coeff, in, data->iQuant, data->quant_type, &cbp, i); + bits += data->temp[i] = Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, i); } bits += t = BITS_MULT*d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision, 0); + if (data->temp[0] + t < data->iMinSAD[1]) { + data->iMinSAD[1] = data->temp[0] + t; current[1].x = x; current[1].y = y; } + if (data->temp[1] < data->iMinSAD[2]) { + data->iMinSAD[2] = data->temp[1]; current[2].x = x; current[2].y = y; } + if (data->temp[2] < data->iMinSAD[3]) { + data->iMinSAD[3] = data->temp[2]; current[3].x = x; current[3].y = y; } + if (data->temp[3] < data->iMinSAD[4]) { + data->iMinSAD[4] = data->temp[3]; current[4].x = x; current[4].y = y; } + bits += BITS_MULT*xvid_cbpy_tab[15-(cbp>>2)].len; if (bits >= data->iMinSAD[0]) return; @@ -686,13 +692,13 @@ /* chroma U */ ptr = interpolate8x8_switch2(data->RefQ + 64, data->RefP[4], 0, 0, xc, yc, data->iEdgedWidth/2, data->rounding); transfer_8to16subro(in, ptr, data->CurU, data->iEdgedWidth/2); - bits += Block_CalcBits(coeff, in, data->iQuant, data->quant_type, &cbp, 4); + bits += Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, 4); if (bits >= data->iMinSAD[0]) return; /* chroma V */ ptr = interpolate8x8_switch2(data->RefQ + 64, data->RefP[5], 0, 0, xc, yc, data->iEdgedWidth/2, data->rounding); transfer_8to16subro(in, ptr, data->CurV, data->iEdgedWidth/2); - bits += Block_CalcBits(coeff, in, data->iQuant, data->quant_type, &cbp, 5); + bits += Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, 5); bits += BITS_MULT*mcbpc_inter_tab[(MODE_INTER & 7) | ((cbp & 3) << 3)].len; @@ -701,17 +707,8 @@ current[0].x = x; current[0].y = y; *dir = Direction; } - - if (data->temp[0] + t < data->iMinSAD[1]) { - data->iMinSAD[1] = data->temp[0] + t; current[1].x = x; current[1].y = y; } - if (data->temp[1] < data->iMinSAD[2]) { - data->iMinSAD[2] = data->temp[1]; current[2].x = x; current[2].y = y; } - if (data->temp[2] < data->iMinSAD[3]) { - data->iMinSAD[3] = data->temp[2]; current[3].x = x; current[3].y = y; } - if (data->temp[3] < data->iMinSAD[4]) { - data->iMinSAD[4] = data->temp[3]; current[4].x = x; current[4].y = y; } - } + static void CheckCandidateBits8(const int x, const int y, const int Direction, int * const dir, const SearchData * const data) { @@ -734,7 +731,7 @@ } transfer_8to16subro(in, data->Cur, ptr, data->iEdgedWidth); - bits = Block_CalcBits(coeff, in, data->iQuant, data->quant_type, &cbp, 5); + bits = Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, 5); bits += BITS_MULT*d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision, 0); if (bits < data->iMinSAD[0]) { @@ -1092,7 +1089,7 @@ VECTOR currentMV[5]; VECTOR currentQMV[5]; int32_t iMinSAD[5]; - DECLARE_ALIGNED_MATRIX(dct_space, 2, 64, int16_t, CACHE_LINE); + DECLARE_ALIGNED_MATRIX(dct_space, 3, 64, int16_t, CACHE_LINE); SearchData Data; memset(&Data, 0, sizeof(SearchData)); Data.iEdgedWidth = iEdgedWidth; @@ -1293,7 +1290,7 @@ Data->iMinSAD[3] = pMB->sad8[2]; Data->iMinSAD[4] = pMB->sad8[3]; - if ((!(VopFlags & XVID_VOP_MODEDECISION_BITS)) || (x | y)) { + if ((!(VopFlags & XVID_VOP_MODEDECISION_BITS)) && (x | y)) { threshA = Data->temp[0]; /* that's where we keep this SAD atm */ if (threshA < 512) threshA = 512; else if (threshA > 1024) threshA = 1024; @@ -2164,24 +2161,26 @@ } } -#define INTRA_THRESH 1700 -#define INTER_THRESH 1200 +#define INTRA_THRESH 2200 +#define INTER_THRESH 50 +#define INTRA_THRESH2 95 int MEanalysis( const IMAGE * const pRef, const FRAMEINFO * const Current, const MBParam * const pParam, - const int maxIntra, /* maximum number if non-I frames */ - const int intraCount, /* number of non-I frames after last I frame; 0 if we force P/B frame */ - const int bCount, /* number of B frames in a row */ + const int maxIntra, //maximum number if non-I frames + const int intraCount, //number of non-I frames after last I frame; 0 if we force P/B frame + const int bCount, // number of B frames in a row const int b_thresh) { uint32_t x, y, intra = 0; int sSAD = 0; MACROBLOCK * const pMBs = Current->mbs; const IMAGE * const pCurrent = &Current->image; - int IntraThresh = INTRA_THRESH, InterThresh = INTER_THRESH + 10*b_thresh; + int IntraThresh = INTRA_THRESH, InterThresh = INTER_THRESH + b_thresh; int s = 0, blocks = 0; + int complexity = 0; int32_t iMinSAD[5], temp[5]; VECTOR currentMV[5]; @@ -2193,26 +2192,27 @@ Data.temp = temp; CheckCandidate = CheckCandidate32I; + if (intraCount != 0) { - if (intraCount < 10) /* we're right after an I frame */ + if (intraCount < 10) // we're right after an I frame IntraThresh += 15* (intraCount - 10) * (intraCount - 10); else - if ( 5*(maxIntra - intraCount) < maxIntra) /* we're close to maximum. 2 sec when max is 10 sec */ + if ( 5*(maxIntra - intraCount) < maxIntra) // we're close to maximum. 2 sec when max is 10 sec IntraThresh -= (IntraThresh * (maxIntra - 8*(maxIntra - intraCount)))/maxIntra; } - InterThresh -= (350 - 8*b_thresh) * bCount; - if (InterThresh < 300 + 5*b_thresh) InterThresh = 300 + 5*b_thresh; + InterThresh -= 12 * bCount; + if (InterThresh < 15 + b_thresh) InterThresh = 15 + b_thresh; if (sadInit) (*sadInit) (); for (y = 1; y < pParam->mb_height-1; y += 2) { for (x = 1; x < pParam->mb_width-1; x += 2) { int i; - blocks += 4; + blocks += 10; if (bCount == 0) pMBs[x + y * pParam->mb_width].mvs[0] = zeroMV; - else { /* extrapolation of the vector found for last frame */ + else { //extrapolation of the vector found for last frame pMBs[x + y * pParam->mb_width].mvs[0].x = (pMBs[x + y * pParam->mb_width].mvs[0].x * (bCount+1) ) / bCount; pMBs[x + y * pParam->mb_width].mvs[0].y = @@ -2224,34 +2224,33 @@ for (i = 0; i < 4; i++) { int dev; MACROBLOCK *pMB = &pMBs[x+(i&1) + (y+(i>>1)) * pParam->mb_width]; - if (pMB->sad16 > IntraThresh) { - dev = dev16(pCurrent->y + (x + (i&1) + (y + (i>>1)) * pParam->edged_width) * 16, - pParam->edged_width); - if (dev + IntraThresh < pMB->sad16) { - pMB->mode = MODE_INTRA; - if (++intra > ((pParam->mb_height-2)*(pParam->mb_width-2))/2) return I_VOP; - } + dev = dev16(pCurrent->y + (x + (i&1) + (y + (i>>1)) * pParam->edged_width) * 16, + pParam->edged_width); + + complexity += dev; + if (dev + IntraThresh < pMB->sad16) { + pMB->mode = MODE_INTRA; + if (++intra > ((pParam->mb_height-2)*(pParam->mb_width-2))/2) return I_VOP; } - if (pMB->mvs[0].x == 0 && pMB->mvs[0].y == 0) s++; + + if (pMB->mvs[0].x == 0 && pMB->mvs[0].y == 0) + if (dev > 500 && pMB->sad16 < 1000) + sSAD += 1000; sSAD += pMB->sad16; } } } + complexity >>= 7; - sSAD /= blocks; - - if (b_thresh < 20) { - s = (10*s) / blocks; - if (s > 4) sSAD += (s - 2) * (40 - 2*b_thresh); /* static block - looks bad when in bframe... */ - } + sSAD /= complexity + 4*blocks; + if (intraCount > 12 && sSAD > INTRA_THRESH2 ) return I_VOP; if (sSAD > InterThresh ) return P_VOP; emms(); return B_VOP; } - static WARPPOINTS GlobalMotionEst(const MACROBLOCK * const pMBs, const MBParam * const pParam, @@ -2624,14 +2623,14 @@ /* chroma U */ ptr = interpolate8x8_switch2(Data->RefQ + 64, Data->RefP[4], 0, 0, sumx, sumy, Data->iEdgedWidth/2, Data->rounding); transfer_8to16subro(in, Data->CurU, ptr, Data->iEdgedWidth/2); - bits += Block_CalcBits(coeff, in, Data->iQuant, Data->quant_type, &cbp, 4); + bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 4); if (bits >= *Data->iMinSAD) return bits; /* chroma V */ ptr = interpolate8x8_switch2(Data->RefQ + 64, Data->RefP[5], 0, 0, sumx, sumy, Data->iEdgedWidth/2, Data->rounding); transfer_8to16subro(in, Data->CurV, ptr, Data->iEdgedWidth/2); - bits += Block_CalcBits(coeff, in, Data->iQuant, Data->quant_type, &cbp, 5); + bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 5); bits += BITS_MULT*mcbpc_inter_tab[(MODE_INTER4V & 7) | ((cbp & 3) << 3)].len; @@ -2648,7 +2647,7 @@ for(i = 0; i < 4; i++) { int s = 8*((i&1) + (i>>1)*Data->iEdgedWidth); transfer_8to16copy(in, Data->Cur + s, Data->iEdgedWidth); - bits += Block_CalcBitsIntra(coeff, in, Data->iQuant, Data->quant_type, &cbp, i, &dc); + bits += Block_CalcBitsIntra(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, i, &dc); if (bits >= Data->iMinSAD[0]) return bits; } @@ -2657,13 +2656,13 @@ /*chroma U */ transfer_8to16copy(in, Data->CurU, Data->iEdgedWidth/2); - bits += Block_CalcBitsIntra(coeff, in, Data->iQuant, Data->quant_type, &cbp, 4, &dc); + bits += Block_CalcBitsIntra(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 4, &dc); if (bits >= Data->iMinSAD[0]) return bits; /* chroma V */ transfer_8to16copy(in, Data->CurV, Data->iEdgedWidth/2); - bits += Block_CalcBitsIntra(coeff, in, Data->iQuant, Data->quant_type, &cbp, 5, &dc); + bits += Block_CalcBitsIntra(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 5, &dc); bits += BITS_MULT*mcbpc_inter_tab[(MODE_INTRA & 7) | ((cbp & 3) << 3)].len;