21 |
* along with this program ; if not, write to the Free Software |
* along with this program ; if not, write to the Free Software |
22 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
23 |
* |
* |
24 |
* $Id: estimation_pvop.c,v 1.18 2005-12-18 06:52:12 syskin Exp $ |
* $Id: estimation_pvop.c,v 1.19 2006-02-24 08:46:22 syskin Exp $ |
25 |
* |
* |
26 |
****************************************************************************/ |
****************************************************************************/ |
27 |
|
|
39 |
#include "motion.h" |
#include "motion.h" |
40 |
#include "sad.h" |
#include "sad.h" |
41 |
#include "motion_inlines.h" |
#include "motion_inlines.h" |
42 |
|
#include "motion_smp.h" |
43 |
|
|
44 |
|
|
45 |
static const int xvid_me_lambda_vec8[32] = |
static const int xvid_me_lambda_vec8[32] = |
46 |
{ 0 ,(int)(1.0 * NEIGH_TEND_8X8 + 0.5), |
{ 0 ,(int)(1.0 * NEIGH_TEND_8X8 + 0.5), |
983 |
current->sStat.iMvSum = mvSum; |
current->sStat.iMvSum = mvSum; |
984 |
current->sStat.iMvCount = mvCount; |
current->sStat.iMvCount = mvCount; |
985 |
} |
} |
986 |
|
|
987 |
|
void |
988 |
|
MotionEstimateSMP(SMPmotionData * h) |
989 |
|
{ |
990 |
|
const MBParam * const pParam = h->pParam; |
991 |
|
const FRAMEINFO * const current = h->current; |
992 |
|
const FRAMEINFO * const reference = h->reference; |
993 |
|
const IMAGE * const pRefH = h->pRefH; |
994 |
|
const IMAGE * const pRefV = h->pRefV; |
995 |
|
const IMAGE * const pRefHV = h->pRefHV; |
996 |
|
const IMAGE * const pGMC = h->pGMC; |
997 |
|
uint32_t MotionFlags = MakeGoodMotionFlags(current->motion_flags, |
998 |
|
current->vop_flags, |
999 |
|
current->vol_flags); |
1000 |
|
|
1001 |
|
MACROBLOCK *const pMBs = current->mbs; |
1002 |
|
const IMAGE *const pCurrent = ¤t->image; |
1003 |
|
const IMAGE *const pRef = &reference->image; |
1004 |
|
|
1005 |
|
const uint32_t mb_width = pParam->mb_width; |
1006 |
|
const uint32_t mb_height = pParam->mb_height; |
1007 |
|
const uint32_t iEdgedWidth = pParam->edged_width; |
1008 |
|
int stat_thresh = 0; |
1009 |
|
int MVmax = 0, mvSum = 0, mvCount = 0; |
1010 |
|
int y_step = h->y_step; |
1011 |
|
int start_y = h->start_y; |
1012 |
|
|
1013 |
|
uint32_t x, y; |
1014 |
|
int sad00; |
1015 |
|
int skip_thresh = INITIAL_SKIP_THRESH * \ |
1016 |
|
(current->vop_flags & XVID_VOP_MODEDECISION_RD ? 2:1); |
1017 |
|
int block = start_y*mb_width; |
1018 |
|
int * complete_count_self = h->complete_count_self; |
1019 |
|
const int * complete_count_above = h->complete_count_above; |
1020 |
|
int max_mbs; |
1021 |
|
int current_mb = 0; |
1022 |
|
|
1023 |
|
/* some pre-initialized thingies for SearchP */ |
1024 |
|
DECLARE_ALIGNED_MATRIX(dct_space, 3, 64, int16_t, CACHE_LINE); |
1025 |
|
SearchData Data; |
1026 |
|
memset(&Data, 0, sizeof(SearchData)); |
1027 |
|
Data.iEdgedWidth = iEdgedWidth; |
1028 |
|
Data.iFcode = current->fcode; |
1029 |
|
Data.rounding = pParam->m_rounding_type; |
1030 |
|
Data.qpel = (current->vol_flags & XVID_VOL_QUARTERPEL ? 1:0); |
1031 |
|
Data.chroma = MotionFlags & XVID_ME_CHROMA_PVOP; |
1032 |
|
Data.dctSpace = dct_space; |
1033 |
|
Data.quant_type = !(pParam->vol_flags & XVID_VOL_MPEGQUANT); |
1034 |
|
Data.mpeg_quant_matrices = pParam->mpeg_quant_matrices; |
1035 |
|
|
1036 |
|
/* todo: sort out temp memory space */ |
1037 |
|
Data.RefQ = h->RefQ; |
1038 |
|
if (sadInit) (*sadInit) (); |
1039 |
|
|
1040 |
|
max_mbs = 0; |
1041 |
|
|
1042 |
|
for (y = start_y; y < mb_height; y += y_step) { |
1043 |
|
if (y == 0) max_mbs = mb_width; /* we can process all blocks of the first row */ |
1044 |
|
|
1045 |
|
for (x = 0; x < mb_width; x++) { |
1046 |
|
|
1047 |
|
MACROBLOCK *pMB, *prevMB; |
1048 |
|
int skip; |
1049 |
|
|
1050 |
|
pMB = &pMBs[block]; |
1051 |
|
prevMB = &reference->mbs[block]; |
1052 |
|
|
1053 |
|
pMB->sad16 = |
1054 |
|
sad16v(pCurrent->y + (x + y * iEdgedWidth) * 16, |
1055 |
|
pRef->y + (x + y * iEdgedWidth) * 16, |
1056 |
|
pParam->edged_width, pMB->sad8); |
1057 |
|
|
1058 |
|
sad00 = 4*MAX(MAX(pMB->sad8[0], pMB->sad8[1]), MAX(pMB->sad8[2], pMB->sad8[3])); |
1059 |
|
|
1060 |
|
if (Data.chroma) { |
1061 |
|
Data.chromaSAD = sad8(pCurrent->u + x*8 + y*(iEdgedWidth/2)*8, |
1062 |
|
pRef->u + x*8 + y*(iEdgedWidth/2)*8, iEdgedWidth/2) |
1063 |
|
+ sad8(pCurrent->v + (x + y*(iEdgedWidth/2))*8, |
1064 |
|
pRef->v + (x + y*(iEdgedWidth/2))*8, iEdgedWidth/2); |
1065 |
|
pMB->sad16 += Data.chromaSAD; |
1066 |
|
sad00 += Data.chromaSAD; |
1067 |
|
} |
1068 |
|
|
1069 |
|
if (current_mb >= max_mbs) { |
1070 |
|
/* we ME-ed all macroblocks we safely could. grab next portion */ |
1071 |
|
int above_count = *complete_count_above; /* sync point */ |
1072 |
|
if (above_count == mb_width) { |
1073 |
|
/* full line above is ready */ |
1074 |
|
above_count = mb_width+1; |
1075 |
|
if (y < mb_height-y_step) { |
1076 |
|
/* this is not last line, grab a portion of MBs from the next line too */ |
1077 |
|
above_count += MAX(0, complete_count_above[1] - 1); |
1078 |
|
} |
1079 |
|
} |
1080 |
|
|
1081 |
|
max_mbs = current_mb + above_count - x - 1; |
1082 |
|
|
1083 |
|
if (current_mb >= max_mbs) { |
1084 |
|
/* current workload is zero */ |
1085 |
|
x--; |
1086 |
|
sched_yield(); |
1087 |
|
continue; |
1088 |
|
} |
1089 |
|
} |
1090 |
|
|
1091 |
|
skip = InitialSkipDecisionP(sad00, pParam, current, pMB, prevMB, x, y, &Data, pGMC, |
1092 |
|
pCurrent, pRef, MotionFlags); |
1093 |
|
if (current_mb >= max_mbs) { |
1094 |
|
/* we ME-ed all macroblocks we safely could. grab next portion */ |
1095 |
|
int above_count = *complete_count_above; /* sync point */ |
1096 |
|
if (above_count == mb_width) { |
1097 |
|
/* full line above is ready */ |
1098 |
|
above_count = mb_width+1; |
1099 |
|
if (y < mb_height-y_step) { |
1100 |
|
/* this is not last line, grab a portion of MBs from the next line too */ |
1101 |
|
above_count += MAX(0, complete_count_above[1] - 1); |
1102 |
|
} |
1103 |
|
} |
1104 |
|
|
1105 |
|
max_mbs = current_mb + above_count - x - 1; |
1106 |
|
|
1107 |
|
if (current_mb >= max_mbs) { |
1108 |
|
/* current workload is zero */ |
1109 |
|
x--; |
1110 |
|
sched_yield(); |
1111 |
|
continue; |
1112 |
|
} |
1113 |
|
} |
1114 |
|
|
1115 |
|
if (skip) { |
1116 |
|
current_mb++; |
1117 |
|
block++; |
1118 |
|
*complete_count_self = x+1; |
1119 |
|
continue; |
1120 |
|
} |
1121 |
|
|
1122 |
|
SearchP(pRef, pRefH->y, pRefV->y, pRefHV->y, pCurrent, x, |
1123 |
|
y, MotionFlags, current->vop_flags, |
1124 |
|
&Data, pParam, pMBs, reference->mbs, pMB); |
1125 |
|
|
1126 |
|
if (current->vop_flags & XVID_VOP_MODEDECISION_RD) |
1127 |
|
xvid_me_ModeDecision_RD(&Data, pMB, pMBs, x, y, pParam, |
1128 |
|
MotionFlags, current->vop_flags, current->vol_flags, |
1129 |
|
pCurrent, pRef, pGMC, current->coding_type); |
1130 |
|
|
1131 |
|
else if (current->vop_flags & XVID_VOP_FAST_MODEDECISION_RD) |
1132 |
|
xvid_me_ModeDecision_Fast(&Data, pMB, pMBs, x, y, pParam, |
1133 |
|
MotionFlags, current->vop_flags, current->vol_flags, |
1134 |
|
pCurrent, pRef, pGMC, current->coding_type); |
1135 |
|
else |
1136 |
|
ModeDecision_SAD(&Data, pMB, pMBs, x, y, pParam, |
1137 |
|
MotionFlags, current->vop_flags, current->vol_flags, |
1138 |
|
pCurrent, pRef, pGMC, current->coding_type, sad00); |
1139 |
|
|
1140 |
|
*complete_count_self = x+1; |
1141 |
|
|
1142 |
|
current_mb++; |
1143 |
|
block++; |
1144 |
|
|
1145 |
|
motionStatsPVOP(&MVmax, &mvCount, &mvSum, pMB, Data.qpel); |
1146 |
|
|
1147 |
|
} |
1148 |
|
block += (y_step-1)*pParam->mb_width; |
1149 |
|
complete_count_self++; |
1150 |
|
complete_count_above++; |
1151 |
|
} |
1152 |
|
|
1153 |
|
h->MVmax = MVmax; |
1154 |
|
h->mvSum = mvSum; |
1155 |
|
h->mvCount = mvCount; |
1156 |
|
} |
1157 |
|
|
1158 |
|
|