--- branches/dev-api-4/xvidcore/src/motion/motion_est.c	2003/06/09 01:25:19	1053
+++ branches/dev-api-4/xvidcore/src/motion/motion_est.c	2003/06/26 11:50:37	1075
@@ -1,32 +1,29 @@
-/**************************************************************************
+/*****************************************************************************
+ *
+ *  XVID MPEG-4 VIDEO CODEC
+ *  - Motion Estimation related code  -
  *
- *	XVID MPEG-4 VIDEO CODEC
- *	motion estimation
+ *  Copyright(C) 2002 Christoph Lampert <gruel@web.de>
+ *               2002 Michael Militzer <michael@xvid.org>
+ *               2002-2003 Radoslaw Czyz <xvid@syskin.cjb.net>
  *
- *	This program is an implementation of a part of one or more MPEG-4
- *	Video tools as specified in ISO/IEC 14496-2 standard.  Those intending
- *	to use this software module in hardware or software products are
- *	advised that its use may infringe existing patents or copyrights, and
- *	any such use would be at such party's own risk.  The original
- *	developer of this software module and his/her company, and subsequent
- *	editors and their companies, will have no liability for use of this
- *	software or modifications or derivatives thereof.
+ *  This program is free software ; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation ; either version 2 of the License, or
+ *  (at your option) any later version.
  *
- *	This program is free software; you can redistribute it and/or modify
- *	it under the terms of the GNU General Public License as published by
- *	the Free Software Foundation; either version 2 of the License, or
- *	(at your option) any later version.
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY ; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
  *
- *	This program is distributed in the hope that it will be useful,
- *	but WITHOUT ANY WARRANTY; without even the implied warranty of
- *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *	GNU General Public License for more details.
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program ; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
  *
- *	You should have received a copy of the GNU General Public License
- *	along with this program; if not, write to the Free Software
- *	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * $Id: motion_est.c,v 1.58.2.19 2003-06-26 11:50:37 syskin Exp $
  *
- *************************************************************************/
+ ****************************************************************************/
 
 #include <assert.h>
 #include <stdio.h>
@@ -670,11 +667,20 @@
 	for(i = 0; i < 4; i++) {
 		int s = 8*((i&1) + (i>>1)*data->iEdgedWidth);
 		transfer_8to16subro(in, data->Cur + s, ptr + s, data->iEdgedWidth);
-		bits += data->temp[i] = Block_CalcBits(coeff, in, data->iQuant, data->quant_type, &cbp, i);
+		bits += data->temp[i] = Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, i);
 	}
 
 	bits += t = BITS_MULT*d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision, 0);
 
+	if (data->temp[0] + t < data->iMinSAD[1]) {
+		data->iMinSAD[1] = data->temp[0] + t; current[1].x = x; current[1].y = y; }
+	if (data->temp[1] < data->iMinSAD[2]) {
+		data->iMinSAD[2] = data->temp[1]; current[2].x = x; current[2].y = y; }
+	if (data->temp[2] < data->iMinSAD[3]) {
+		data->iMinSAD[3] = data->temp[2]; current[3].x = x; current[3].y = y; }
+	if (data->temp[3] < data->iMinSAD[4]) {
+		data->iMinSAD[4] = data->temp[3]; current[4].x = x; current[4].y = y; }
+
 	bits += BITS_MULT*xvid_cbpy_tab[15-(cbp>>2)].len;
 
 	if (bits >= data->iMinSAD[0]) return;
@@ -686,13 +692,13 @@
 	/* chroma U */
 	ptr = interpolate8x8_switch2(data->RefQ + 64, data->RefP[4], 0, 0, xc, yc,  data->iEdgedWidth/2, data->rounding);
 	transfer_8to16subro(in, ptr, data->CurU, data->iEdgedWidth/2);
-	bits += Block_CalcBits(coeff, in, data->iQuant, data->quant_type, &cbp, 4);
+	bits += Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, 4);
 	if (bits >= data->iMinSAD[0]) return;
 
 	/* chroma V */
 	ptr = interpolate8x8_switch2(data->RefQ + 64, data->RefP[5], 0, 0, xc, yc,  data->iEdgedWidth/2, data->rounding);
 	transfer_8to16subro(in, ptr, data->CurV, data->iEdgedWidth/2);
-	bits += Block_CalcBits(coeff, in, data->iQuant, data->quant_type, &cbp, 5);
+	bits += Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, 5);
 
 	bits += BITS_MULT*mcbpc_inter_tab[(MODE_INTER & 7) | ((cbp & 3) << 3)].len;
 
@@ -701,17 +707,8 @@
 		current[0].x = x; current[0].y = y;
 		*dir = Direction;
 	}
-
-	if (data->temp[0] + t < data->iMinSAD[1]) {
-		data->iMinSAD[1] = data->temp[0] + t; current[1].x = x; current[1].y = y; }
-	if (data->temp[1] < data->iMinSAD[2]) {
-		data->iMinSAD[2] = data->temp[1]; current[2].x = x; current[2].y = y; }
-	if (data->temp[2] < data->iMinSAD[3]) {
-		data->iMinSAD[3] = data->temp[2]; current[3].x = x; current[3].y = y; }
-	if (data->temp[3] < data->iMinSAD[4]) {
-		data->iMinSAD[4] = data->temp[3]; current[4].x = x; current[4].y = y; }
-
 }
+
 static void
 CheckCandidateBits8(const int x, const int y, const int Direction, int * const dir, const SearchData * const data)
 {
@@ -734,7 +731,7 @@
 	}
 
 	transfer_8to16subro(in, data->Cur, ptr, data->iEdgedWidth);
-	bits = Block_CalcBits(coeff, in, data->iQuant, data->quant_type, &cbp, 5);
+	bits = Block_CalcBits(coeff, in, data->dctSpace + 128, data->iQuant, data->quant_type, &cbp, 5);
 	bits += BITS_MULT*d_mv_bits(x, y, data->predMV, data->iFcode, data->qpel^data->qpel_precision, 0);
 
 	if (bits < data->iMinSAD[0]) {
@@ -1092,7 +1089,7 @@
 	VECTOR currentMV[5];
 	VECTOR currentQMV[5];
 	int32_t iMinSAD[5];
-	DECLARE_ALIGNED_MATRIX(dct_space, 2, 64, int16_t, CACHE_LINE);
+	DECLARE_ALIGNED_MATRIX(dct_space, 3, 64, int16_t, CACHE_LINE);
 	SearchData Data;
 	memset(&Data, 0, sizeof(SearchData));
 	Data.iEdgedWidth = iEdgedWidth;
@@ -1293,7 +1290,7 @@
 	Data->iMinSAD[3] = pMB->sad8[2];
 	Data->iMinSAD[4] = pMB->sad8[3];
 
-	if ((!(VopFlags & XVID_VOP_MODEDECISION_BITS)) || (x | y)) {
+	if ((!(VopFlags & XVID_VOP_MODEDECISION_BITS)) && (x | y)) {
 		threshA = Data->temp[0]; /* that's where we keep this SAD atm */
 		if (threshA < 512) threshA = 512;
 		else if (threshA > 1024) threshA = 1024;
@@ -2164,24 +2161,26 @@
 	}
 }
 
-#define INTRA_THRESH	1700
-#define INTER_THRESH	1200
+#define INTRA_THRESH	2200
+#define INTER_THRESH	50
+#define INTRA_THRESH2	95
 
 int
 MEanalysis(	const IMAGE * const pRef,
 			const FRAMEINFO * const Current,
 			const MBParam * const pParam,
-			const int maxIntra, /* maximum number if non-I frames */
-			const int intraCount, /* number of non-I frames after last I frame; 0 if we force P/B frame */
-			const int bCount,  /* number of B frames in a row */
+			const int maxIntra, //maximum number if non-I frames
+			const int intraCount, //number of non-I frames after last I frame; 0 if we force P/B frame
+			const int bCount,  // number of B frames in a row
 			const int b_thresh)
 {
 	uint32_t x, y, intra = 0;
 	int sSAD = 0;
 	MACROBLOCK * const pMBs = Current->mbs;
 	const IMAGE * const pCurrent = &Current->image;
-	int IntraThresh = INTRA_THRESH, InterThresh = INTER_THRESH + 10*b_thresh;
+	int IntraThresh = INTRA_THRESH, InterThresh = INTER_THRESH + b_thresh;
 	int s = 0, blocks = 0;
+	int complexity = 0;
 
 	int32_t iMinSAD[5], temp[5];
 	VECTOR currentMV[5];
@@ -2193,26 +2192,27 @@
 	Data.temp = temp;
 	CheckCandidate = CheckCandidate32I;
 
+
 	if (intraCount != 0) {
-		if (intraCount < 10) /* we're right after an I frame */
+		if (intraCount < 10) // we're right after an I frame
 			IntraThresh += 15* (intraCount - 10) * (intraCount - 10);
 		else
-			if ( 5*(maxIntra - intraCount) < maxIntra) /* we're close to maximum. 2 sec when max is 10 sec */
+			if ( 5*(maxIntra - intraCount) < maxIntra) // we're close to maximum. 2 sec when max is 10 sec
 				IntraThresh -= (IntraThresh * (maxIntra - 8*(maxIntra - intraCount)))/maxIntra;
 	}
 
-	InterThresh -= (350 - 8*b_thresh) * bCount;
-	if (InterThresh < 300 + 5*b_thresh) InterThresh = 300 + 5*b_thresh;
+	InterThresh -= 12 * bCount;
+	if (InterThresh < 15 + b_thresh) InterThresh = 15 + b_thresh;
 
 	if (sadInit) (*sadInit) ();
 
 	for (y = 1; y < pParam->mb_height-1; y += 2) {
 		for (x = 1; x < pParam->mb_width-1; x += 2) {
 			int i;
-			blocks += 4;
+			blocks += 10;
 
 			if (bCount == 0) pMBs[x + y * pParam->mb_width].mvs[0] = zeroMV;
-			else { /* extrapolation of the vector found for last frame */
+			else { //extrapolation of the vector found for last frame
 				pMBs[x + y * pParam->mb_width].mvs[0].x =
 					(pMBs[x + y * pParam->mb_width].mvs[0].x * (bCount+1) ) / bCount;
 				pMBs[x + y * pParam->mb_width].mvs[0].y =
@@ -2224,34 +2224,33 @@
 			for (i = 0; i < 4; i++) {
 				int dev;
 				MACROBLOCK *pMB = &pMBs[x+(i&1) + (y+(i>>1)) * pParam->mb_width];
-				if (pMB->sad16 > IntraThresh) {
-					dev = dev16(pCurrent->y + (x + (i&1) + (y + (i>>1)) * pParam->edged_width) * 16,
-									pParam->edged_width);
-					if (dev + IntraThresh < pMB->sad16) {
-						pMB->mode = MODE_INTRA;
-						if (++intra > ((pParam->mb_height-2)*(pParam->mb_width-2))/2) return I_VOP;
-					}
+				dev = dev16(pCurrent->y + (x + (i&1) + (y + (i>>1)) * pParam->edged_width) * 16,
+								pParam->edged_width);
+	
+				complexity += dev;
+				if (dev + IntraThresh < pMB->sad16) {
+					pMB->mode = MODE_INTRA;
+					if (++intra > ((pParam->mb_height-2)*(pParam->mb_width-2))/2) return I_VOP;
 				}
-				if (pMB->mvs[0].x == 0 && pMB->mvs[0].y == 0) s++;
+
+				if (pMB->mvs[0].x == 0 && pMB->mvs[0].y == 0) 
+					if (dev > 500 && pMB->sad16 < 1000)
+						sSAD += 1000;
 
 				sSAD += pMB->sad16;
 			}
 		}
 	}
+	complexity >>= 7;
 
-	sSAD /= blocks;
-
-	if (b_thresh < 20) {
-		s = (10*s) / blocks;
-		if (s > 4) sSAD += (s - 2) * (40 - 2*b_thresh); /* static block - looks bad when in bframe... */
-	}
+	sSAD /= complexity + 4*blocks;
 
+	if (intraCount > 12 && sSAD > INTRA_THRESH2 ) return I_VOP;
 	if (sSAD > InterThresh ) return P_VOP;
 	emms();
 	return B_VOP;
 }
 
-
 static WARPPOINTS
 GlobalMotionEst(const MACROBLOCK * const pMBs,
 				const MBParam * const pParam,
@@ -2624,14 +2623,14 @@
 	/* chroma U */
 	ptr = interpolate8x8_switch2(Data->RefQ + 64, Data->RefP[4], 0, 0, sumx, sumy, Data->iEdgedWidth/2, Data->rounding);
 	transfer_8to16subro(in, Data->CurU, ptr, Data->iEdgedWidth/2);
-	bits += Block_CalcBits(coeff, in, Data->iQuant, Data->quant_type, &cbp, 4);
+	bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 4);
 
 	if (bits >= *Data->iMinSAD) return bits;
 
 	/* chroma V */
 	ptr = interpolate8x8_switch2(Data->RefQ + 64, Data->RefP[5], 0, 0, sumx, sumy, Data->iEdgedWidth/2, Data->rounding);
 	transfer_8to16subro(in, Data->CurV, ptr, Data->iEdgedWidth/2);
-	bits += Block_CalcBits(coeff, in, Data->iQuant, Data->quant_type, &cbp, 5);
+	bits += Block_CalcBits(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 5);
 
 	bits += BITS_MULT*mcbpc_inter_tab[(MODE_INTER4V & 7) | ((cbp & 3) << 3)].len;
 
@@ -2648,7 +2647,7 @@
 	for(i = 0; i < 4; i++) {
 		int s = 8*((i&1) + (i>>1)*Data->iEdgedWidth);
 		transfer_8to16copy(in, Data->Cur + s, Data->iEdgedWidth);
-		bits += Block_CalcBitsIntra(coeff, in, Data->iQuant, Data->quant_type, &cbp, i, &dc);
+		bits += Block_CalcBitsIntra(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, i, &dc);
 
 		if (bits >= Data->iMinSAD[0]) return bits;
 	}
@@ -2657,13 +2656,13 @@
 
 	/*chroma U */
 	transfer_8to16copy(in, Data->CurU, Data->iEdgedWidth/2);
-	bits += Block_CalcBitsIntra(coeff, in, Data->iQuant, Data->quant_type, &cbp, 4, &dc);
+	bits += Block_CalcBitsIntra(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 4, &dc);
 	
 	if (bits >= Data->iMinSAD[0]) return bits;
 
 	/* chroma V */
 	transfer_8to16copy(in, Data->CurV, Data->iEdgedWidth/2);
-	bits += Block_CalcBitsIntra(coeff, in, Data->iQuant, Data->quant_type, &cbp, 5, &dc);
+	bits += Block_CalcBitsIntra(coeff, in, Data->dctSpace + 128, Data->iQuant, Data->quant_type, &cbp, 5, &dc);
 
 	bits += BITS_MULT*mcbpc_inter_tab[(MODE_INTRA & 7) | ((cbp & 3) << 3)].len;