--- trunk/xvidcore/src/motion/motion_est.c	2002/04/23 00:04:51	132
+++ trunk/xvidcore/src/motion/motion_est.c	2002/05/07 20:03:18	167
@@ -2,6 +2,8 @@
  *
  *  Modifications:
  *
+ *	01.05.2002	updated MotionEstimationBVOP
+ *	25.04.2002 partial prevMB conversion
  *  22.04.2002 remove some compile warning by chenm001 <chenm001@163.com>
  *  14.04.2002 added MotionEstimationBVOP()
  *  02.04.2002 add EPZS(^2) as ME algorithm, use PMV_USESQUARES to choose between 
@@ -55,6 +57,7 @@
 /* sad16(0,0) bias; mpeg4 spec suggests nb/2+1 */
 /* nb  = vop pixels * 2^(bpp-8) */
 #define MV16_00_BIAS	(128+1)
+#define MV8_00_BIAS	(0)
 
 /* INTER bias for INTER/INTRA decision; mpeg4 spec suggests 2*nb */
 #define INTER_BIAS	512
@@ -79,8 +82,11 @@
 					const IMAGE * const pCur,
 					const int x, const int y,
 					const uint32_t MotionFlags,
+					const uint32_t iQuant,
+					const uint32_t iFcode,
 					const MBParam * const pParam,
-					MACROBLOCK * const pMBs,
+					const MACROBLOCK * const pMBs,
+					const MACROBLOCK * const prevMBs,
 					VECTOR * const currMV,
 					VECTOR * const currPMV);
 
@@ -92,8 +98,11 @@
 					const IMAGE * const pCur,
 					const int x, const int y,
 					const uint32_t MotionFlags,
+					const uint32_t iQuant,
+					const uint32_t iFcode,
 					const MBParam * const pParam,
-					MACROBLOCK * const pMBs,
+					const MACROBLOCK * const pMBs,
+					const MACROBLOCK * const prevMBs,
 					VECTOR * const currMV,
 					VECTOR * const currPMV);
 
@@ -105,10 +114,13 @@
 					const uint8_t * const pRefHV,
 					const IMAGE * const pCur,
 					const int x, const int y,
-					const int start_x, int start_y,
+					const int start_x, const int start_y,
 					const uint32_t MotionFlags,
+					const uint32_t iQuant,
+					const uint32_t iFcode,
 					const MBParam * const pParam,
-					MACROBLOCK * const pMBs,
+					const MACROBLOCK * const pMBs,
+					const MACROBLOCK * const prevMBs,
 					VECTOR * const currMV,
 					VECTOR * const currPMV);
 
@@ -119,10 +131,13 @@
 					const uint8_t * const pRefHV,
 					const IMAGE * const pCur,
 					const int x, const int y,
-					const int start_x, int start_y,
+					const int start_x, const int start_y,
 					const uint32_t MotionFlags,
+					const uint32_t iQuant,
+					const uint32_t iFcode,
 					const MBParam * const pParam,
-					MACROBLOCK * const pMBs,
+					const MACROBLOCK * const pMBs,
+					const MACROBLOCK * const prevMBs,
 					VECTOR * const currMV,
 					VECTOR * const currPMV);
 
@@ -170,6 +185,19 @@
 
 typedef MainSearch8Func* MainSearch8FuncPtr;
 
+static int32_t lambda_vec16[32] =  /* rounded values for lambda param for weight of motion bits as in modified H.26L */
+	{     0    ,(int)(1.00235+0.5), (int)(1.15582+0.5), (int)(1.31976+0.5), (int)(1.49591+0.5), (int)(1.68601+0.5),
+	(int)(1.89187+0.5), (int)(2.11542+0.5), (int)(2.35878+0.5), (int)(2.62429+0.5), (int)(2.91455+0.5), 
+	(int)(3.23253+0.5), (int)(3.58158+0.5), (int)(3.96555+0.5), (int)(4.38887+0.5), (int)(4.85673+0.5), 
+	(int)(5.37519+0.5), (int)(5.95144+0.5), (int)(6.59408+0.5), (int)(7.31349+0.5), (int)(8.12242+0.5), 
+	(int)(9.03669+0.5), (int)(10.0763+0.5), (int)(11.2669+0.5), (int)(12.6426+0.5), (int)(14.2493+0.5), 
+	(int)(16.1512+0.5), (int)(18.442+0.5),  (int)(21.2656+0.5), (int)(24.8580+0.5), (int)(29.6436+0.5), 
+	(int)(36.4949+0.5)	};
+
+static int32_t *lambda_vec8 = lambda_vec16;	/* same table for INTER and INTER4V for now*/
+
+
+
 // mv.length table
 static const uint32_t mvtab[33] = {
     1,  2,  3,  4,  6,  7,  7,  7,
@@ -205,15 +233,15 @@
 }
 
 
-static __inline uint32_t calc_delta_16(const int32_t dx, const int32_t dy, const uint32_t iFcode)
+static __inline uint32_t calc_delta_16(const int32_t dx, const int32_t dy, const uint32_t iFcode, const uint32_t iQuant)
 {
-	return NEIGH_TEND_16X16 * (mv_bits(dx, iFcode) + mv_bits(dy, iFcode));
+	return NEIGH_TEND_16X16 * lambda_vec16[iQuant] * (mv_bits(dx, iFcode) + mv_bits(dy, iFcode));
 }
 
-static __inline uint32_t calc_delta_8(const int32_t dx, const int32_t dy, const uint32_t iFcode)
+static __inline uint32_t calc_delta_8(const int32_t dx, const int32_t dy, const uint32_t iFcode, const uint32_t iQuant)
 
 {
-    return NEIGH_TEND_8X8 * (mv_bits(dx, iFcode) + mv_bits(dy, iFcode));
+    return NEIGH_TEND_8X8 * lambda_vec8[iQuant] * (mv_bits(dx, iFcode) + mv_bits(dy, iFcode));
 }
 
 
@@ -232,18 +260,23 @@
 #endif
 
 bool MotionEstimation(
-	MACROBLOCK * const pMBs,
 	MBParam * const pParam,
-	const IMAGE * const pRef,
+	FRAMEINFO * const current,
+	FRAMEINFO * const reference,
 	const IMAGE * const pRefH,
 	const IMAGE * const pRefV,
 	const IMAGE * const pRefHV,
-	IMAGE * const pCurrent, 
 	const uint32_t iLimit)
 
 {
 	const uint32_t iWcount = pParam->mb_width;
 	const uint32_t iHcount = pParam->mb_height;
+	MACROBLOCK * pMBs = current->mbs;
+	IMAGE * pCurrent = &current->image;
+
+	MACROBLOCK * prevMBs = reference->mbs;	// previous frame
+	IMAGE * pRef = &reference->image;
+
  
 	uint32_t i, j, iIntra = 0;
 
@@ -256,16 +289,17 @@
 
 	if (sadInit)
 		(*sadInit)();
-		
+
 	// note: i==horizontal, j==vertical
 	for (i = 0; i < iHcount; i++)
 		for (j = 0; j < iWcount; j++)
 		{
 			MACROBLOCK *pMB = &pMBs[j + i * iWcount];
+			MACROBLOCK *prevMB = &prevMBs[j + i * iWcount];
 
 			sad16 = SEARCH16(pRef->y, pRefH->y, pRefV->y, pRefHV->y, pCurrent, 
-					 j, i, pParam->motion_flags,
-					 pParam, pMBs, &mv16, &pmv16); 
+					 j, i, current->motion_flags, current->quant, current->fcode,
+					 pParam, pMBs, prevMBs, &mv16, &pmv16); 
 			pMB->sad16=sad16;
 
 
@@ -281,6 +315,8 @@
 				pMB->mvs[0].x = pMB->mvs[1].x = pMB->mvs[2].x = pMB->mvs[3].x = 0;
 				pMB->mvs[0].y = pMB->mvs[1].y = pMB->mvs[2].y = pMB->mvs[3].y = 0;
 
+				pMB->sad8[0] = pMB->sad8[1] = pMB->sad8[2] = pMB->sad8[3] = 0;
+
 				iIntra++;
 				if(iIntra >= iLimit)	
 					return 1;
@@ -288,23 +324,27 @@
 				continue;
 			}
 
-			if (pParam->global_flags & XVID_INTER4V)
+			if (current->global_flags & XVID_INTER4V)
 			{
 				pMB->sad8[0] = SEARCH8(pRef->y, pRefH->y, pRefV->y, pRefHV->y, pCurrent, 
-						       2 * j, 2 * i, mv16.x, mv16.y, pParam->motion_flags, 
-						       pParam, pMBs, &pMB->mvs[0], &pMB->pmvs[0]); 
+						       2 * j, 2 * i, mv16.x, mv16.y, 
+							   current->motion_flags, current->quant, current->fcode,
+						       pParam, pMBs, prevMBs, &pMB->mvs[0], &pMB->pmvs[0]);
 
 				pMB->sad8[1] = SEARCH8(pRef->y, pRefH->y, pRefV->y, pRefHV->y, pCurrent, 
-						       2 * j + 1, 2 * i, mv16.x, mv16.y, pParam->motion_flags,
-						       pParam, pMBs, &pMB->mvs[1], &pMB->pmvs[1]); 
+						       2 * j + 1, 2 * i, mv16.x, mv16.y, 
+							   current->motion_flags, current->quant, current->fcode,
+						       pParam, pMBs, prevMBs, &pMB->mvs[1], &pMB->pmvs[1]); 
 
 				pMB->sad8[2] = SEARCH8(pRef->y, pRefH->y, pRefV->y, pRefHV->y, pCurrent, 
-						       2 * j, 2 * i + 1, mv16.x, mv16.y, pParam->motion_flags,
-						       pParam, pMBs, &pMB->mvs[2], &pMB->pmvs[2]); 
+						       2 * j, 2 * i + 1, mv16.x, mv16.y, 
+							   current->motion_flags, current->quant, current->fcode,
+						       pParam, pMBs, prevMBs, &pMB->mvs[2], &pMB->pmvs[2]); 
 			
 				pMB->sad8[3] = SEARCH8(pRef->y, pRefH->y, pRefV->y, pRefHV->y, pCurrent, 
-						       2 * j + 1, 2 * i + 1, mv16.x, mv16.y, pParam->motion_flags,
-						       pParam, pMBs, &pMB->mvs[3], &pMB->pmvs[3]); 
+						       2 * j + 1, 2 * i + 1, mv16.x, mv16.y, 
+							   current->motion_flags, current->quant, current->fcode,
+						       pParam, pMBs, prevMBs, &pMB->mvs[3], &pMB->pmvs[3]); 
 
 				sad8 = pMB->sad8[0] + pMB->sad8[1] + pMB->sad8[2] + pMB->sad8[3];
 			}
@@ -314,19 +354,28 @@
 			   mpeg4:   if (sad8 < sad16 - nb/2+1) use_inter4v
 			*/
 
-			if (pMB->dquant == NO_CHANGE) {
-				if (((pParam->global_flags & XVID_INTER4V)==0) || 
-				    (sad16 < (sad8 + (int32_t)(IMV16X16 * pParam->quant)))) { 
+			if (!(current->global_flags & XVID_LUMIMASKING) || pMB->dquant == NO_CHANGE) 
+			{
+				if (((current->global_flags & XVID_INTER4V)==0) || 
+				    (sad16 < (sad8 + (int32_t)(IMV16X16 * current->quant)))) 
+				{ 
 			
 					sad8 = sad16;
 					pMB->mode = MODE_INTER;
 					pMB->mvs[0].x = pMB->mvs[1].x = pMB->mvs[2].x = pMB->mvs[3].x = mv16.x;
 					pMB->mvs[0].y = pMB->mvs[1].y = pMB->mvs[2].y = pMB->mvs[3].y = mv16.y;
+					pMB->sad8[0] = pMB->sad8[1] = pMB->sad8[2] = pMB->sad8[3] = sad16;
 					pMB->pmvs[0].x = pmv16.x;
 					pMB->pmvs[0].y = pmv16.y;
 				}
 				else
+				{
 					pMB->mode = MODE_INTER4V;
+                                        pMB->sad8[0] *= 4;
+					pMB->sad8[1] *= 4;
+					pMB->sad8[2] *= 4;
+					pMB->sad8[3] *= 4;
+				}
 			}
 			else 
 			{
@@ -334,6 +383,8 @@
 				pMB->mode = MODE_INTER;
 				pMB->mvs[0].x = pMB->mvs[1].x = pMB->mvs[2].x = pMB->mvs[3].x = mv16.x;
 				pMB->mvs[0].y = pMB->mvs[1].y = pMB->mvs[2].y = pMB->mvs[3].y = mv16.y;
+                                pMB->sad8[0] = pMB->sad8[1] = pMB->sad8[2] = pMB->sad8[3] = sad16;
+
 				pMB->pmvs[0].x = pmv16.x;
 				pMB->pmvs[0].y = pmv16.y;
 			}
@@ -352,16 +403,14 @@
     && (0 <= max_dy) && (0 >= min_dy) ) \
   { \
     iSAD = sad16( cur, get_ref(pRef, pRefH, pRefV, pRefHV, x, y, 16, 0, 0 , iEdgedWidth), iEdgedWidth, MV_MAX_ERROR); \
-    iSAD += calc_delta_16(-pmv[0].x, -pmv[0].y, (uint8_t)iFcode) * iQuant;\
-    if (iSAD <= iQuant * 96)	\
-   	iSAD -= MV16_00_BIAS; \
+    iSAD += calc_delta_16(-pmv[0].x, -pmv[0].y, (uint8_t)iFcode, iQuant);\
     if (iSAD < iMinSAD) \
     {  iMinSAD=iSAD; currMV->x=0; currMV->y=0; }  }	\
 }
 
 #define NOCHECK_MV16_CANDIDATE(X,Y) { \
     iSAD = sad16( cur, get_ref(pRef, pRefH, pRefV, pRefHV, x, y, 16, X, Y, iEdgedWidth),iEdgedWidth, iMinSAD); \
-    iSAD += calc_delta_16((X) - pmv[0].x, (Y) - pmv[0].y, (uint8_t)iFcode) * iQuant;\
+    iSAD += calc_delta_16((X) - pmv[0].x, (Y) - pmv[0].y, (uint8_t)iFcode, iQuant);\
     if (iSAD < iMinSAD) \
     {  iMinSAD=iSAD; currMV->x=(X); currMV->y=(Y); } \
 }
@@ -371,7 +420,7 @@
     && ((Y) <= max_dy) && ((Y) >= min_dy) ) \
   { \
     iSAD = sad16( cur, get_ref(pRef, pRefH, pRefV, pRefHV, x, y, 16, X, Y, iEdgedWidth),iEdgedWidth, iMinSAD); \
-    iSAD += calc_delta_16((X) - pmv[0].x, (Y) - pmv[0].y, (uint8_t)iFcode) * iQuant;\
+    iSAD += calc_delta_16((X) - pmv[0].x, (Y) - pmv[0].y, (uint8_t)iFcode, iQuant);\
     if (iSAD < iMinSAD) \
     {  iMinSAD=iSAD; currMV->x=(X); currMV->y=(Y); } } \
 }
@@ -381,7 +430,7 @@
     && ((Y) <= max_dy) && ((Y) >= min_dy) ) \
   { \
     iSAD = sad16( cur, get_ref(pRef, pRefH, pRefV, pRefHV, x, y, 16, X, Y, iEdgedWidth),iEdgedWidth, iMinSAD); \
-    iSAD += calc_delta_16((X) - pmv[0].x, (Y) - pmv[0].y, (uint8_t)iFcode) * iQuant;\
+    iSAD += calc_delta_16((X) - pmv[0].x, (Y) - pmv[0].y, (uint8_t)iFcode, iQuant);\
     if (iSAD < iMinSAD) \
     {  iMinSAD=iSAD; currMV->x=(X); currMV->y=(Y); iDirection=(D); } } \
 }
@@ -391,7 +440,7 @@
     && ((Y) <= max_dy) && ((Y) >= min_dy) ) \
   { \
     iSAD = sad16( cur, get_ref(pRef, pRefH, pRefV, pRefHV, x, y, 16, X, Y, iEdgedWidth),iEdgedWidth, iMinSAD); \
-    iSAD += calc_delta_16((X) - pmv[0].x, (Y) - pmv[0].y, (uint8_t)iFcode) * iQuant;\
+    iSAD += calc_delta_16((X) - pmv[0].x, (Y) - pmv[0].y, (uint8_t)iFcode, iQuant);\
     if (iSAD < iMinSAD) \
     {  iMinSAD=iSAD; currMV->x=(X); currMV->y=(Y); iDirection=(D); iFound=0; } } \
 }
@@ -399,7 +448,7 @@
 
 #define CHECK_MV8_ZERO {\
   iSAD = sad8( cur, get_ref(pRef, pRefH, pRefV, pRefHV, x, y, 8, 0, 0 , iEdgedWidth), iEdgedWidth); \
-  iSAD += calc_delta_8(-pmv[0].x, -pmv[0].y, (uint8_t)iFcode) * iQuant;\
+  iSAD += calc_delta_8(-pmv[0].x, -pmv[0].y, (uint8_t)iFcode, iQuant);\
   if (iSAD < iMinSAD) \
   { iMinSAD=iSAD; currMV->x=0; currMV->y=0; } \
 }
@@ -407,7 +456,7 @@
 #define NOCHECK_MV8_CANDIDATE(X,Y) \
   { \
     iSAD = sad8( cur, get_ref(pRef, pRefH, pRefV, pRefHV, x, y, 8, (X), (Y), iEdgedWidth),iEdgedWidth); \
-    iSAD += calc_delta_8((X)-pmv[0].x, (Y)-pmv[0].y, (uint8_t)iFcode) * iQuant;\
+    iSAD += calc_delta_8((X)-pmv[0].x, (Y)-pmv[0].y, (uint8_t)iFcode, iQuant);\
     if (iSAD < iMinSAD) \
     {  iMinSAD=iSAD; currMV->x=(X); currMV->y=(Y); } \
 }
@@ -417,7 +466,7 @@
     && ((Y) <= max_dy) && ((Y) >= min_dy) ) \
   { \
     iSAD = sad8( cur, get_ref(pRef, pRefH, pRefV, pRefHV, x, y, 8, (X), (Y), iEdgedWidth),iEdgedWidth); \
-    iSAD += calc_delta_8((X)-pmv[0].x, (Y)-pmv[0].y, (uint8_t)iFcode) * iQuant;\
+    iSAD += calc_delta_8((X)-pmv[0].x, (Y)-pmv[0].y, (uint8_t)iFcode, iQuant);\
     if (iSAD < iMinSAD) \
     {  iMinSAD=iSAD; currMV->x=(X); currMV->y=(Y); } } \
 }
@@ -427,7 +476,7 @@
     && ((Y) <= max_dy) && ((Y) >= min_dy) ) \
   { \
     iSAD = sad8( cur, get_ref(pRef, pRefH, pRefV, pRefHV, x, y, 8, (X), (Y), iEdgedWidth),iEdgedWidth); \
-    iSAD += calc_delta_8((X)-pmv[0].x, (Y)-pmv[0].y, (uint8_t)iFcode) * iQuant;\
+    iSAD += calc_delta_8((X)-pmv[0].x, (Y)-pmv[0].y, (uint8_t)iFcode, iQuant);\
     if (iSAD < iMinSAD) \
     {  iMinSAD=iSAD; currMV->x=(X); currMV->y=(Y); iDirection=(D); } } \
 }
@@ -437,7 +486,7 @@
     && ((Y) <= max_dy) && ((Y) >= min_dy) ) \
   { \
     iSAD = sad8( cur, get_ref(pRef, pRefH, pRefV, pRefHV, x, y, 8, (X), (Y), iEdgedWidth),iEdgedWidth); \
-    iSAD += calc_delta_8((X)-pmv[0].x, (Y)-pmv[0].y, (uint8_t)iFcode) * iQuant;\
+    iSAD += calc_delta_8((X)-pmv[0].x, (Y)-pmv[0].y, (uint8_t)iFcode, iQuant);\
     if (iSAD < iMinSAD) \
     {  iMinSAD=iSAD; currMV->x=(X); currMV->y=(Y); iDirection=(D); iFound=0; } } \
 }
@@ -452,13 +501,15 @@
 					const IMAGE * const pCur,
 					const int x, const int y,
 					const uint32_t MotionFlags, 				
+					const uint32_t iQuant,
+					const uint32_t iFcode,
 					MBParam * const pParam,
-					MACROBLOCK * const pMBs,				
+					const MACROBLOCK * const pMBs,				
+					const MACROBLOCK * const prevMBs,
 					VECTOR * const currMV,
 					VECTOR * const currPMV)
 {
 	const int32_t iEdgedWidth = pParam->edged_width; 
-	const int32_t iQuant = pParam->quant;
 	const uint8_t * cur = pCur->y + x*16 + y*16*iEdgedWidth;
 	int32_t iSAD;
 	int32_t pred_x,pred_y;
@@ -780,14 +831,15 @@
 					const IMAGE * const pCur,
 					const int x, const int y,
 					const uint32_t MotionFlags,
+					const uint32_t iQuant,
+					const uint32_t iFcode,
 					const MBParam * const pParam,
-					MACROBLOCK * const pMBs,
+					const MACROBLOCK * const pMBs,
+					const MACROBLOCK * const prevMBs,
 					VECTOR * const currMV,
 					VECTOR * const currPMV)
 {
     const uint32_t iWcount = pParam->mb_width;
-	const int32_t iFcode = pParam->fixed_code;
-	const int32_t iQuant = pParam->quant;
 	const int32_t iWidth = pParam->width;
 	const int32_t iHeight = pParam->height;
 	const int32_t iEdgedWidth = pParam->edged_width; 
@@ -809,7 +861,8 @@
 	VECTOR pmv[4];
 	int32_t psad[4];
 	
-	MACROBLOCK * const pMB = pMBs + x + y * iWcount;
+	const MACROBLOCK * const pMB = pMBs + x + y * iWcount;
+	const MACROBLOCK * const prevMB = prevMBs + x + y * iWcount;
 
 	static int32_t threshA,threshB;
     	int32_t bPredEq;
@@ -853,7 +906,7 @@
    If PredEq=1 and MVpredicted = Previous Frame MV, set Found=2  
 */
 
-        if ((bPredEq) && (MVequal(pmv[0],pMB->mvs[0]) ) )
+        if ((bPredEq) && (MVequal(pmv[0],prevMB->mvs[0]) ) )
 		iFound=2;
 
 /* Step 3: If Distance>0 or thresb<1536 or PredEq=1 Select small Diamond Search. 
@@ -905,9 +958,9 @@
 	iMinSAD = sad16( cur, 
 			 get_ref_mv(pRef, pRefH, pRefV, pRefHV, x, y, 16, currMV, iEdgedWidth),
 			 iEdgedWidth, MV_MAX_ERROR);
-  	iMinSAD += calc_delta_16(currMV->x-pmv[0].x, currMV->y-pmv[0].y, (uint8_t)iFcode) * iQuant;
+  	iMinSAD += calc_delta_16(currMV->x-pmv[0].x, currMV->y-pmv[0].y, (uint8_t)iFcode, iQuant);
 	
-	if ( (iMinSAD < 256 ) || ( (MVequal(*currMV,pMB->mvs[0])) && ((uint32_t)iMinSAD < pMB->sad16) ) )
+	if ( (iMinSAD < 256 ) || ( (MVequal(*currMV,prevMB->mvs[0])) && ((uint32_t)iMinSAD < prevMB->sad16) ) )
 	{
 		
 		if (MotionFlags & PMV_QUICKSTOP16) 
@@ -920,7 +973,7 @@
    Step 5: Calculate SAD for motion vectors taken from left block, top, top-right, and Previous frame block. 
    Also calculate (0,0) but do not subtract offset. 
    Let MinSAD be the smallest SAD up to this point. 
-   If MV is (0,0) subtract offset. ******** WHAT'S THIS 'OFFSET' ??? ***********
+   If MV is (0,0) subtract offset. 
 */
 
 // (0,0) is always possible
@@ -928,7 +981,7 @@
 	CHECK_MV16_ZERO;
 
 // previous frame MV is always possible
-	CHECK_MV16_CANDIDATE(pMB->mvs[0].x,pMB->mvs[0].y);
+	CHECK_MV16_CANDIDATE(prevMB->mvs[0].x,prevMB->mvs[0].y);
 	
 // left neighbour, if allowed
 	if (x != 0) 
@@ -959,12 +1012,16 @@
 			CHECK_MV16_CANDIDATE(pmv[3].x,pmv[3].y);
 		}
 	}
+	
+    	if ( (MVzero(*currMV)) && (!MVzero(pmv[0])) /* && (iMinSAD <= iQuant * 96)*/ )
+		iMinSAD -= MV16_00_BIAS;
 
+	
 /* Step 6: If MinSAD <= thresa goto Step 10. 
    If Motion Vector equal to Previous frame motion vector and MinSAD<PrevFrmSAD goto Step 10. 
 */
 
-	if ( (iMinSAD <= threshA) || ( MVequal(*currMV,pMB->mvs[0]) && ((uint32_t)iMinSAD < pMB->sad16) ) )
+	if ( (iMinSAD <= threshA) || ( MVequal(*currMV,prevMB->mvs[0]) && ((uint32_t)iMinSAD < prevMB->sad16) ) )
 	{	
 		if (MotionFlags & PMV_QUICKSTOP16) 
 			goto PMVfast16_Terminate_without_Refine;
@@ -1150,17 +1207,17 @@
 					const uint8_t * const pRefHV,
 					const IMAGE * const pCur,
 					const int x, const int y,
-					const int start_x, int start_y,
+					const int start_x, const int start_y,
 					const uint32_t MotionFlags,
+					const uint32_t iQuant,
+					const uint32_t iFcode,
 					const MBParam * const pParam,
-					MACROBLOCK * const pMBs,
+					const MACROBLOCK * const pMBs,
+					const MACROBLOCK * const prevMBs,
 					VECTOR * const currMV,
 					VECTOR * const currPMV)
 {
-        const uint32_t iWcount = pParam->mb_width;
-
-	const int32_t iFcode = pParam->fixed_code;
-	const int32_t iQuant = pParam->quant;
+    const uint32_t iWcount = pParam->mb_width;
 	const int32_t iWidth = pParam->width;
 	const int32_t iHeight = pParam->height;
 	const int32_t iEdgedWidth = pParam->edged_width; 
@@ -1179,7 +1236,8 @@
 	VECTOR newMV;
 	VECTOR backupMV;
 	
-	MACROBLOCK * const pMB = pMBs + (x>>1) + (y>>1) * iWcount;
+	const MACROBLOCK * const pMB = pMBs + (x>>1) + (y>>1) * iWcount;
+	const MACROBLOCK * const prevMB = prevMBs + (x>>1) + (y>>1) * iWcount;
 
 	static int32_t threshA,threshB;
     	int32_t iFound,bPredEq;
@@ -1191,8 +1249,6 @@
 	get_range(&min_dx, &max_dx, &min_dy, &max_dy,
 		  x, y, 8, iWidth, iHeight, iFcode);
 
-/* we work with abs. MVs, not relative to prediction, so range is relative to 0,0 */
-
 	if (!(MotionFlags & PMV_HALFPELDIAMOND8 ))
 	{ min_dx = EVEN(min_dx);
 	max_dx = EVEN(max_dx);
@@ -1225,7 +1281,7 @@
    If PredEq=1 and MVpredicted = Previous Frame MV, set Found=2  
 */
 
-        if ((bPredEq) && (MVequal(pmv[0],pMB->mvs[iSubBlock]) ) )
+        if ((bPredEq) && (MVequal(pmv[0],prevMB->mvs[iSubBlock]) ) )
 		iFound=2;
 
 /* Step 3: If Distance>0 or thresb<1536 or PredEq=1 Select small Diamond Search. 
@@ -1256,9 +1312,9 @@
 	iMinSAD = sad8( cur, 
 			get_ref_mv(pRef, pRefH, pRefV, pRefHV, x, y, 8, currMV, iEdgedWidth),
 			iEdgedWidth);
-  	iMinSAD += calc_delta_8(currMV->x - pmv[0].x, currMV->y - pmv[0].y, (uint8_t)iFcode) * iQuant;
+  	iMinSAD += calc_delta_8(currMV->x - pmv[0].x, currMV->y - pmv[0].y, (uint8_t)iFcode, iQuant);
 	
-	if ( (iMinSAD < 256/4 ) || ( (MVequal(*currMV,pMB->mvs[iSubBlock])) && ((uint32_t)iMinSAD < pMB->sad8[iSubBlock]) ) )
+	if ( (iMinSAD < 256/4 ) || ( (MVequal(*currMV,prevMB->mvs[iSubBlock])) && ((uint32_t)iMinSAD < prevMB->sad8[iSubBlock]) ) )
 	{
 		if (MotionFlags & PMV_QUICKSTOP16) 
 			goto PMVfast8_Terminate_without_Refine;
@@ -1271,7 +1327,7 @@
    Step 5: Calculate SAD for motion vectors taken from left block, top, top-right, and Previous frame block. 
    Also calculate (0,0) but do not subtract offset. 
    Let MinSAD be the smallest SAD up to this point. 
-   If MV is (0,0) subtract offset. ******** WHAT'S THIS 'OFFSET' ??? ***********
+   If MV is (0,0) subtract offset. 
 */
 
 // the prediction might be even better than mv16
@@ -1281,7 +1337,7 @@
 	CHECK_MV8_ZERO;
 
 // previous frame MV is always possible
-	CHECK_MV8_CANDIDATE(pMB->mvs[iSubBlock].x,pMB->mvs[iSubBlock].y);
+	CHECK_MV8_CANDIDATE(prevMB->mvs[iSubBlock].x,prevMB->mvs[iSubBlock].y);
 	
 // left neighbour, if allowed
 	if (psad[1] != MV_MAX_ERROR) 
@@ -1313,11 +1369,15 @@
 		}
 	}
 
+    	if ( (MVzero(*currMV)) && (!MVzero(pmv[0])) /* && (iMinSAD <= iQuant * 96) */ )
+		iMinSAD -= MV8_00_BIAS;
+
+
 /* Step 6: If MinSAD <= thresa goto Step 10. 
    If Motion Vector equal to Previous frame motion vector and MinSAD<PrevFrmSAD goto Step 10. 
 */
 
-	if ( (iMinSAD <= threshA) || ( MVequal(*currMV,pMB->mvs[iSubBlock]) && ((uint32_t)iMinSAD < pMB->sad8[iSubBlock]) ) )
+	if ( (iMinSAD <= threshA) || ( MVequal(*currMV,prevMB->mvs[iSubBlock]) && ((uint32_t)iMinSAD < prevMB->sad8[iSubBlock]) ) )
 	{	
 		if (MotionFlags & PMV_QUICKSTOP16) 
 			goto PMVfast8_Terminate_without_Refine;
@@ -1407,15 +1467,16 @@
 					const IMAGE * const pCur,
 					const int x, const int y,
 					const uint32_t MotionFlags,
+					const uint32_t iQuant,
+					const uint32_t iFcode,
 					const MBParam * const pParam,
-					MACROBLOCK * const pMBs,
+					const MACROBLOCK * const pMBs,
+					const MACROBLOCK * const prevMBs,
 					VECTOR * const currMV,
 					VECTOR * const currPMV)
 {
     const uint32_t iWcount = pParam->mb_width;
     const uint32_t iHcount = pParam->mb_height;
-	const int32_t iFcode = pParam->fixed_code;
-	const int32_t iQuant = pParam->quant;
 
 	const int32_t iWidth = pParam->width;
 	const int32_t iHeight = pParam->height;
@@ -1435,7 +1496,8 @@
 	int32_t psad[8];
 	
 	static MACROBLOCK * oldMBs = NULL; 
-	MACROBLOCK * const pMB = pMBs + x + y * iWcount;
+	const MACROBLOCK * const pMB = pMBs + x + y * iWcount;
+	const MACROBLOCK * const prevMB = prevMBs + x + y * iWcount;
 	MACROBLOCK * oldMB = NULL;
 
 	static int32_t thresh2;
@@ -1445,8 +1507,8 @@
 	MainSearch16FuncPtr EPZSMainSearchPtr;
 
 	if (oldMBs == NULL)
-	{	oldMBs = (MACROBLOCK*) calloc(1,iWcount*iHcount*sizeof(MACROBLOCK));
-		fprintf(stderr,"allocated %d bytes for oldMBs\n",iWcount*iHcount*sizeof(MACROBLOCK));
+	{	oldMBs = (MACROBLOCK*) calloc(iWcount*iHcount,sizeof(MACROBLOCK));
+//		fprintf(stderr,"allocated %d bytes for oldMBs\n",iWcount*iHcount*sizeof(MACROBLOCK));
 	}
 	oldMB = oldMBs + x + y * iWcount;
 
@@ -1454,8 +1516,6 @@
 	get_range(&min_dx, &max_dx, &min_dy, &max_dy,
 			x, y, 16, iWidth, iHeight, iFcode);
 
-/* we work with abs. MVs, not relative to prediction, so get_range is called relative to 0,0 */
-
 	if (!(MotionFlags & PMV_HALFPEL16 ))
 	{ min_dx = EVEN(min_dx);
 	  max_dx = EVEN(max_dx);
@@ -1495,10 +1555,10 @@
 	iMinSAD = sad16( cur, 
 		get_ref_mv(pRef, pRefH, pRefV, pRefHV, x, y, 16, currMV, iEdgedWidth),
 		iEdgedWidth, MV_MAX_ERROR);
-  	iMinSAD += calc_delta_16(currMV->x-pmv[0].x, currMV->y-pmv[0].y, (uint8_t)iFcode) * iQuant;
+  	iMinSAD += calc_delta_16(currMV->x-pmv[0].x, currMV->y-pmv[0].y, (uint8_t)iFcode, iQuant);
 	
 // thresh1 is fixed to 256 
-	if ( (iMinSAD < 256 ) || ( (MVequal(*currMV,pMB->mvs[0])) && ((uint32_t)iMinSAD < pMB->sad16) ) )
+	if ( (iMinSAD < 256 ) || ( (MVequal(*currMV, prevMB->mvs[0])) && ((uint32_t)iMinSAD < prevMB->sad16) ) )
 		{
 			if (MotionFlags & PMV_QUICKSTOP16) 
 				goto EPZS16_Terminate_without_Refine;
@@ -1509,7 +1569,7 @@
 /************** This is predictor SET B: (0,0), prev.frame MV, neighbours **************/ 
 
 // previous frame MV 
-	CHECK_MV16_CANDIDATE(pMB->mvs[0].x,pMB->mvs[0].y);
+	CHECK_MV16_CANDIDATE(prevMB->mvs[0].x,prevMB->mvs[0].y);
 
 // set threshhold based on Min of Prediction and SAD of collocated block
 // CHECK_MV16 always uses iSAD for the SAD of last vector to check, so now iSAD is what we want
@@ -1565,7 +1625,7 @@
 */
 
 	if ( (iMinSAD <= thresh2) 
-		|| ( MVequal(*currMV,pMB->mvs[0]) && ((uint32_t)iMinSAD <= pMB->sad16) ) )
+		|| ( MVequal(*currMV,prevMB->mvs[0]) && ((uint32_t)iMinSAD <= prevMB->sad16) ) )
 		{	
 			if (MotionFlags & PMV_QUICKSTOP16) 
 				goto EPZS16_Terminate_without_Refine;
@@ -1575,28 +1635,28 @@
 
 /***** predictor SET C: acceleration MV (new!), neighbours in prev. frame(new!) ****/
 
-	backupMV = pMB->mvs[0]; 		// last MV
-	backupMV.x += (pMB->mvs[0].x - oldMB->mvs[0].x );	// acceleration X
-	backupMV.y += (pMB->mvs[0].y - oldMB->mvs[0].y );	// acceleration Y 
+	backupMV = prevMB->mvs[0]; 		// collocated MV
+	backupMV.x += (prevMB->mvs[0].x - oldMB->mvs[0].x );	// acceleration X
+	backupMV.y += (prevMB->mvs[0].y - oldMB->mvs[0].y );	// acceleration Y 
 
-	CHECK_MV16_CANDIDATE(backupMV.x,backupMV.y);	
+	CHECK_MV16_CANDIDATE(backupMV.x,backupMV.y);
 
 // left neighbour
 	if (x != 0)  
-		CHECK_MV16_CANDIDATE((oldMB-1)->mvs[0].x,oldMB->mvs[0].y);		
+		CHECK_MV16_CANDIDATE((prevMB-1)->mvs[0].x,(prevMB-1)->mvs[0].y);		
 
 // top neighbour 
 	if (y != 0)
-		CHECK_MV16_CANDIDATE((oldMB-iWcount)->mvs[0].x,oldMB->mvs[0].y);		
+		CHECK_MV16_CANDIDATE((prevMB-iWcount)->mvs[0].x,(prevMB-iWcount)->mvs[0].y);		
 
 // right neighbour, if allowed (this value is not written yet, so take it from   pMB->mvs 
 
 	if ((uint32_t)x != iWcount-1)
-		CHECK_MV16_CANDIDATE((pMB+1)->mvs[0].x,oldMB->mvs[0].y);		
+		CHECK_MV16_CANDIDATE((prevMB+1)->mvs[0].x,(prevMB+1)->mvs[0].y);		
 
 // bottom neighbour, dito
 	if ((uint32_t)y != iHcount-1)
-		CHECK_MV16_CANDIDATE((pMB+iWcount)->mvs[0].x,oldMB->mvs[0].y);		
+		CHECK_MV16_CANDIDATE((prevMB+iWcount)->mvs[0].x,(prevMB+iWcount)->mvs[0].y);		
 
 /* Terminate if MinSAD <= T_3 (here T_3 = T_2)  */
 	if (iMinSAD <= thresh2)
@@ -1653,7 +1713,7 @@
 			iSAD = (*EPZSMainSearchPtr)(pRef, pRefH, pRefV, pRefHV, cur,
 				x, y, 
 			0, 0, iMinSAD, &newMV, 
-			pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, /*iDiamondSize*/ 2, iFcode, iQuant, 0);
+			pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, 2, iFcode, iQuant, 0);
 		
 			if (iSAD < iMinSAD) 
 			{
@@ -1674,7 +1734,7 @@
 
 EPZS16_Terminate_without_Refine:
 
-	*oldMB = *pMB;
+	*oldMB = *prevMB;
 	
 	currPMV->x = currMV->x - pmv[0].x;
 	currPMV->y = currMV->y - pmv[0].y;
@@ -1691,15 +1751,17 @@
 					const int x, const int y,
 					const int start_x, const int start_y,
 					const uint32_t MotionFlags,
+					const uint32_t iQuant,
+					const uint32_t iFcode,
 					const MBParam * const pParam,
-					MACROBLOCK * const pMBs,
+					const MACROBLOCK * const pMBs,
+					const MACROBLOCK * const prevMBs,
 					VECTOR * const currMV,
 					VECTOR * const currPMV)
 {
-    const uint32_t iWcount = pParam->mb_width;
-	const int32_t iFcode = pParam->fixed_code;
-	const int32_t iQuant = pParam->quant;
+/* Please not that EPZS might not be a good choice for 8x8-block motion search ! */
 
+	const uint32_t iWcount = pParam->mb_width;
 	const int32_t iWidth = pParam->width;
 	const int32_t iHeight = pParam->height;
 	const int32_t iEdgedWidth = pParam->edged_width; 
@@ -1721,7 +1783,8 @@
 
 	const	int32_t iSubBlock = ((y&1)<<1) + (x&1);
 	
-	MACROBLOCK * const pMB = pMBs + (x>>1) + (y>>1) * iWcount;
+	const MACROBLOCK * const pMB = pMBs + (x>>1) + (y>>1) * iWcount;
+	const MACROBLOCK * const prevMB = prevMBs + (x>>1) + (y>>1) * iWcount;
 
     	int32_t bPredEq;
     	int32_t iMinSAD,iSAD=9999;
@@ -1775,7 +1838,7 @@
 	iMinSAD = sad8( cur, 
 		get_ref_mv(pRef, pRefH, pRefV, pRefHV, x, y, 8, currMV, iEdgedWidth),
 		iEdgedWidth);
-  	iMinSAD += calc_delta_8(currMV->x-pmv[0].x, currMV->y-pmv[0].y, (uint8_t)iFcode) * iQuant;
+  	iMinSAD += calc_delta_8(currMV->x-pmv[0].x, currMV->y-pmv[0].y, (uint8_t)iFcode, iQuant);
 
 	
 // thresh1 is fixed to 256 
@@ -1789,13 +1852,50 @@
 
 /************** This is predictor SET B: (0,0), prev.frame MV, neighbours **************/ 
 
-// previous frame MV 
-	CHECK_MV8_CANDIDATE(pMB->mvs[0].x,pMB->mvs[0].y);
 
 // MV=(0,0) is often a good choice
-
 	CHECK_MV8_ZERO;
 
+// previous frame MV 
+	CHECK_MV8_CANDIDATE(prevMB->mvs[iSubBlock].x,prevMB->mvs[iSubBlock].y);
+	
+// left neighbour, if allowed
+	if (psad[1] != MV_MAX_ERROR) 
+	{
+		if (!(MotionFlags & PMV_HALFPEL8 ))	
+		{	pmv[1].x = EVEN(pmv[1].x);	
+			pmv[1].y = EVEN(pmv[1].y);
+		}
+		CHECK_MV8_CANDIDATE(pmv[1].x,pmv[1].y);		
+	}
+
+// top neighbour, if allowed
+	if (psad[2] != MV_MAX_ERROR) 
+	{	
+		if (!(MotionFlags & PMV_HALFPEL8 ))
+		{	pmv[2].x = EVEN(pmv[2].x);
+			pmv[2].y = EVEN(pmv[2].y);
+		}
+		CHECK_MV8_CANDIDATE(pmv[2].x,pmv[2].y);
+	
+// top right neighbour, if allowed
+		if (psad[3] != MV_MAX_ERROR) 
+		{
+			if (!(MotionFlags & PMV_HALFPEL8 ))
+			{	pmv[3].x = EVEN(pmv[3].x);
+				pmv[3].y = EVEN(pmv[3].y);
+			}
+			CHECK_MV8_CANDIDATE(pmv[3].x,pmv[3].y);
+		}
+	}
+
+/*  // this bias is zero anyway, at the moment! 
+
+    	if ( (MVzero(*currMV)) && (!MVzero(pmv[0])) ) // && (iMinSAD <= iQuant * 96) 
+		iMinSAD -= MV8_00_BIAS;		
+
+*/ 
+
 /* Terminate if MinSAD <= T_2 
    Terminate if MV[t] == MV[t-1] and MinSAD[t] <= MinSAD[t-1] 
 */
@@ -1808,27 +1908,31 @@
 				goto EPZS8_Terminate_with_Refine;
 		}
 
-/************ (if Diamond Search)  **************/
+/************ (Diamond Search)  **************/
 
 	backupMV = *currMV; /* save best prediction, actually only for EXTSEARCH */
 
 	if (!(MotionFlags & PMV_HALFPELDIAMOND8))
 		iDiamondSize *= 2;
 		
-/* default: use best prediction as starting point for one call of PMVfast_MainSearch */
+/* default: use best prediction as starting point for one call of EPZS_MainSearch */
+
+/* // there is no EPZS^2 for inter4v at the moment
 
-//	if (MotionFlags & PMV_USESQUARES8)
-//		EPZSMainSearchPtr = Square8_MainSearch;
-//	else
-		EPZSMainSearchPtr = Diamond8_MainSearch;
+	if (MotionFlags & PMV_USESQUARES8)
+		EPZSMainSearchPtr = Square8_MainSearch;
+	else
+*/
+
+	EPZSMainSearchPtr = Diamond8_MainSearch; 
 		
 	iSAD = (*EPZSMainSearchPtr)(pRef, pRefH, pRefV, pRefHV, cur,
 		x, y, 
 		currMV->x, currMV->y, iMinSAD, &newMV, 
 		pmv, min_dx, max_dx, min_dy, max_dy, iEdgedWidth, 
-		iDiamondSize, iFcode, iQuant, 00);
+		iDiamondSize, iFcode, iQuant, 0);
+
 
-	
 	if (iSAD < iMinSAD) 
 	{
 		*currMV = newMV;
@@ -1893,7 +1997,7 @@
 // TODO: need to incorporate prediction here (eg. sad += calc_delta_16)
 ***************************************************************/
 
-/*
+
 void MotionEstimationBVOP(
 			MBParam * const pParam,
 			FRAMEINFO * const frame,
@@ -1915,7 +2019,7 @@
     const uint32_t mb_height = pParam->mb_height;
 	const int32_t edged_width = pParam->edged_width;
  
-	int32_t i,j;
+	uint32_t i,j;
 
 	int32_t f_sad16;
 	int32_t b_sad16;
@@ -1939,7 +2043,7 @@
 				&& b_mb->mvs[0].x == 0
 				&& b_mb->mvs[0].y == 0)
 			{
-				mb->mode = MB_IGNORE;
+				mb->mode = MODE_NOT_CODED;
 				mb->mvs[0].x = 0;
 				mb->mvs[0].y = 0;
 				mb->b_mvs[0].x = 0;
@@ -1954,7 +2058,7 @@
 						i, j, 
 						frame->motion_flags,  frame->quant, frame->fcode,
 						pParam, 
-						f_mbs, 
+						f_mbs, f_mbs /* todo */,
 						&mb->mvs[0], &pmv_dontcare);	// ignore pmv
 
 			// backward search
@@ -1963,7 +2067,7 @@
 						i, j, 
 						frame->motion_flags,  frame->quant, frame->bcode,
 						pParam, 
-						b_mbs, 
+						b_mbs, b_mbs, /* todo */
 						&mb->b_mvs[0], &pmv_dontcare);  // ignore pmv
 
 			// interpolate search (simple, but effective)
@@ -1983,28 +2087,26 @@
 			if (f_sad16 < b_sad16)
 			{
 				best_sad = f_sad16;
-				mb->mode = MB_FORWARD;
+				mb->mode = MODE_FORWARD;
 			}
 			else
 			{
 				best_sad = b_sad16;
-				mb->mode = MB_BACKWARD;
+				mb->mode = MODE_BACKWARD;
 			}
 				
 			if (i_sad16 < best_sad)
 			{
 				best_sad = i_sad16;
-				mb->mode = MB_INTERPOLATE;
+				mb->mode = MODE_INTERPOLATE;
 			}
 
 			if (d_sad16 < best_sad)
 			{
 				best_sad = d_sad16;
-				mb->mode = MB_DIRECT;
+				mb->mode = MODE_DIRECT;
 			}
 
 		}
 	}
 }
-
-*/