4 |
* - Motion Estimation for B-VOPs - |
* - Motion Estimation for B-VOPs - |
5 |
* |
* |
6 |
* Copyright(C) 2002 Christoph Lampert <gruel@web.de> |
* Copyright(C) 2002 Christoph Lampert <gruel@web.de> |
7 |
* 2002 Michael Militzer <michael@xvid.org> |
* 2002-2010 Michael Militzer <michael@xvid.org> |
8 |
* 2002-2003 Radoslaw Czyz <xvid@syskin.cjb.net> |
* 2002-2003 Radoslaw Czyz <xvid@syskin.cjb.net> |
9 |
* |
* |
10 |
* This program is free software ; you can redistribute it and/or modify |
* This program is free software ; you can redistribute it and/or modify |
21 |
* along with this program ; if not, write to the Free Software |
* along with this program ; if not, write to the Free Software |
22 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
23 |
* |
* |
24 |
* $Id: estimation_bvop.c,v 1.14 2004-09-04 14:16:24 edgomez Exp $ |
* $Id: estimation_bvop.c,v 1.28.2.1 2010-12-29 22:29:44 Isibaar Exp $ |
25 |
* |
* |
26 |
****************************************************************************/ |
****************************************************************************/ |
27 |
|
|
137 |
xcb = xb/2; ycb = yb/2; |
xcb = xb/2; ycb = yb/2; |
138 |
} |
} |
139 |
|
|
140 |
t = d_mv_bits(xf, yf, data->predMV, data->iFcode, data->qpel^data->qpel_precision, 0) |
t = d_mv_bits(xf, yf, data->predMV, data->iFcode, data->qpel^data->qpel_precision) |
141 |
+ d_mv_bits(xb, yb, data->bpredMV, data->iFcode, data->qpel^data->qpel_precision, 0); |
+ d_mv_bits(xb, yb, data->bpredMV, data->iFcode, data->qpel^data->qpel_precision); |
142 |
|
|
143 |
sad = sad16bi(data->Cur, ReferenceF, ReferenceB, data->iEdgedWidth); |
sad = sad16bi(data->Cur, ReferenceF, ReferenceB, data->iEdgedWidth); |
144 |
sad += (data->lambda16 * t * sad)>>10; |
sad += (data->lambda16 * t); |
145 |
|
|
146 |
if (data->chroma && sad < *data->iMinSAD) |
if (data->chroma && sad < *data->iMinSAD) |
147 |
sad += ChromaSAD2((xcf >> 1) + roundtab_79[xcf & 0x3], |
sad += ChromaSAD2((xcf >> 1) + roundtab_79[xcf & 0x3], |
207 |
if (sad > *(data->iMinSAD)) return; |
if (sad > *(data->iMinSAD)) return; |
208 |
} |
} |
209 |
|
|
210 |
sad += (data->lambda16 * d_mv_bits(x, y, zeroMV, 1, 0, 0) * sad)>>10; |
sad += (data->lambda16 * d_mv_bits(x, y, zeroMV, 1, 0)); |
211 |
|
|
212 |
if (data->chroma && sad < *data->iMinSAD) |
if (data->chroma && sad < *data->iMinSAD) |
213 |
sad += ChromaSAD2((xcf >> 3) + roundtab_76[xcf & 0xf], |
sad += ChromaSAD2((xcf >> 3) + roundtab_76[xcf & 0xf], |
265 |
|
|
266 |
done: |
done: |
267 |
sad = sad16bi(data->Cur, ReferenceF, ReferenceB, data->iEdgedWidth); |
sad = sad16bi(data->Cur, ReferenceF, ReferenceB, data->iEdgedWidth); |
268 |
sad += (data->lambda16 * d_mv_bits(x, y, zeroMV, 1, 0, 0) * sad)>>10; |
sad += (data->lambda16 * d_mv_bits(x, y, zeroMV, 1, 0)); |
269 |
|
|
270 |
if (data->chroma && sad < *data->iMinSAD) |
if (data->chroma && sad < *data->iMinSAD) |
271 |
sad += ChromaSAD2((xcf >> 3) + roundtab_76[xcf & 0xf], |
sad += ChromaSAD2((xcf >> 3) + roundtab_76[xcf & 0xf], |
291 |
if ( (x > data->max_dx) || ( x < data->min_dx) |
if ( (x > data->max_dx) || ( x < data->min_dx) |
292 |
|| (y > data->max_dy) || (y < data->min_dy) ) return; |
|| (y > data->max_dy) || (y < data->min_dy) ) return; |
293 |
|
|
|
if (data->rrv && (!(x&1) && x !=0) | (!(y&1) && y !=0) ) return; /* non-zero even value */ |
|
|
|
|
294 |
if (data->qpel_precision) { /* x and y are in 1/4 precision */ |
if (data->qpel_precision) { /* x and y are in 1/4 precision */ |
295 |
Reference = xvid_me_interpolate16x16qpel(x, y, 0, data); |
Reference = xvid_me_interpolate16x16qpel(x, y, 0, data); |
296 |
current = data->currentQMV; |
current = data->currentQMV; |
301 |
xc = x; yc = y; |
xc = x; yc = y; |
302 |
} |
} |
303 |
t = d_mv_bits(x, y, data->predMV, data->iFcode, |
t = d_mv_bits(x, y, data->predMV, data->iFcode, |
304 |
data->qpel^data->qpel_precision, data->rrv); |
data->qpel^data->qpel_precision); |
305 |
|
|
306 |
sad = sad16(data->Cur, Reference, data->iEdgedWidth, 256*4096); |
sad = sad16(data->Cur, Reference, data->iEdgedWidth, 256*4096); |
307 |
sad += (data->lambda16 * t * sad)>>10; |
sad += (data->lambda16 * t); |
308 |
|
|
309 |
if (data->chroma && sad < *data->iMinSAD) |
if (data->chroma && sad < *data->iMinSAD) |
310 |
sad += xvid_me_ChromaSAD((xc >> 1) + roundtab_79[xc & 0x3], |
sad += xvid_me_ChromaSAD((xc >> 1) + roundtab_79[xc & 0x3], |
396 |
const uint32_t iWcount, |
const uint32_t iWcount, |
397 |
const MACROBLOCK * const pMB, |
const MACROBLOCK * const pMB, |
398 |
const uint32_t mode_curr, |
const uint32_t mode_curr, |
399 |
const VECTOR hint) |
const VECTOR hint, const int bound) |
400 |
{ |
{ |
401 |
|
int lx, ly; /* left */ |
402 |
|
int tx, ty; /* top */ |
403 |
|
int rtx, rty; /* top-right */ |
404 |
|
int ltx, lty; /* top-left */ |
405 |
|
int lpos, tpos, rtpos, ltpos; |
406 |
|
|
407 |
|
lx = x - 1; ly = y; |
408 |
|
tx = x; ty = y - 1; |
409 |
|
rtx = x + 1; rty = y - 1; |
410 |
|
ltx = x - 1; lty = y - 1; |
411 |
|
|
412 |
|
lpos = lx + ly * iWcount; |
413 |
|
rtpos = rtx + rty * iWcount; |
414 |
|
tpos = tx + ty * iWcount; |
415 |
|
ltpos = ltx + lty * iWcount; |
416 |
|
|
417 |
|
|
418 |
/* [0] is prediction */ |
/* [0] is prediction */ |
419 |
/* [1] is zero */ |
/* [1] is zero */ |
420 |
pmv[1].x = pmv[1].y = 0; |
pmv[1].x = pmv[1].y = 0; |
421 |
|
|
422 |
pmv[2].x = hint.x; pmv[2].y = hint.y; |
pmv[2].x = hint.x; pmv[2].y = hint.y; |
423 |
|
|
424 |
if ((y != 0)&&(x != (int)(iWcount+1))) { /* [3] top-right neighbour */ |
if (rtpos >= bound && rtx < (int)iWcount) { /* [3] top-right neighbour */ |
425 |
pmv[3] = ChoosePred(pMB+1-iWcount, mode_curr); |
pmv[3] = ChoosePred(pMB+1-iWcount, mode_curr); |
426 |
} else pmv[3].x = pmv[3].y = 0; |
} else pmv[3].x = pmv[3].y = 0; |
427 |
|
|
428 |
if (y != 0) { |
if (tpos >= bound) { |
429 |
pmv[4] = ChoosePred(pMB-iWcount, mode_curr); |
pmv[4] = ChoosePred(pMB-iWcount, mode_curr); /* [4] top */ |
430 |
} else pmv[4].x = pmv[4].y = 0; |
} else pmv[4].x = pmv[4].y = 0; |
431 |
|
|
432 |
if (x != 0) { |
if (lpos >= bound && lx >= 0) { |
433 |
pmv[5] = ChoosePred(pMB-1, mode_curr); |
pmv[5] = ChoosePred(pMB-1, mode_curr); /* [5] left */ |
434 |
} else pmv[5].x = pmv[5].y = 0; |
} else pmv[5].x = pmv[5].y = 0; |
435 |
|
|
436 |
if (x != 0 && y != 0) { |
if (ltpos >= bound && ltx >= 0) { |
437 |
pmv[6] = ChoosePred(pMB-1-iWcount, mode_curr); |
pmv[6] = ChoosePred(pMB-1-iWcount, mode_curr); /* [6] top-left */ |
438 |
} else pmv[6].x = pmv[6].y = 0; |
} else pmv[6].x = pmv[6].y = 0; |
439 |
} |
} |
440 |
|
|
449 |
int32_t * const best_sad, |
int32_t * const best_sad, |
450 |
const int32_t mode_current, |
const int32_t mode_current, |
451 |
SearchData * const Data, |
SearchData * const Data, |
452 |
VECTOR hint) |
VECTOR hint, const int bound) |
453 |
{ |
{ |
454 |
|
|
455 |
int i; |
int i; |
460 |
Data->predMV = *predMV; |
Data->predMV = *predMV; |
461 |
|
|
462 |
get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 4, |
get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 4, |
463 |
pParam->width, pParam->height, iFcode - Data->qpel, 1, 0); |
pParam->width, pParam->height, iFcode - Data->qpel, 1); |
464 |
|
|
465 |
pmv[0] = Data->predMV; |
pmv[0] = Data->predMV; |
466 |
if (Data->qpel) { |
if (Data->qpel) { |
468 |
hint.x /= 2; hint.y /= 2; |
hint.x /= 2; hint.y /= 2; |
469 |
} |
} |
470 |
|
|
471 |
PreparePredictionsBF(pmv, x, y, pParam->mb_width, pMB, mode_current, hint); |
PreparePredictionsBF(pmv, x, y, pParam->mb_width, pMB, mode_current, hint, bound); |
472 |
|
|
473 |
Data->currentMV->x = Data->currentMV->y = 0; |
Data->currentMV->x = Data->currentMV->y = 0; |
474 |
|
|
507 |
if(MotionFlags & XVID_ME_FASTREFINE16) { |
if(MotionFlags & XVID_ME_FASTREFINE16) { |
508 |
/* fast */ |
/* fast */ |
509 |
get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 4, |
get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 4, |
510 |
pParam->width, pParam->height, Data->iFcode, 2, 0); |
pParam->width, pParam->height, Data->iFcode, 2); |
511 |
FullRefine_Fast(Data, CheckCandidate16no4v, 0); |
FullRefine_Fast(Data, CheckCandidate16no4v, 0); |
512 |
|
|
513 |
} else { |
} else { |
522 |
Data->currentQMV->y = 2*Data->currentMV->y; |
Data->currentQMV->y = 2*Data->currentMV->y; |
523 |
} |
} |
524 |
get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 4, |
get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 4, |
525 |
pParam->width, pParam->height, Data->iFcode, 2, 0); |
pParam->width, pParam->height, Data->iFcode, 2); |
526 |
Data->qpel_precision = 1; |
Data->qpel_precision = 1; |
527 |
xvid_me_SubpelRefine(Data->currentQMV[0], Data, CheckCandidate16no4v, 0); /* qpel part */ |
xvid_me_SubpelRefine(Data->currentQMV[0], Data, CheckCandidate16no4v, 0); /* qpel part */ |
528 |
} |
} |
556 |
b_dx = (b_dx >> 3) + roundtab_76[b_dx & 0xf]; |
b_dx = (b_dx >> 3) + roundtab_76[b_dx & 0xf]; |
557 |
|
|
558 |
sum = sad8bi(Data->CurU, |
sum = sad8bi(Data->CurU, |
559 |
Data->RefP[4] + (dy/2) * stride + dx/2, |
Data->RefP[4] + (dy/2) * (int)stride + dx/2, |
560 |
Data->b_RefP[4] + (b_dy/2) * stride + b_dx/2, |
Data->b_RefP[4] + (b_dy/2) * (int)stride + b_dx/2, |
561 |
stride); |
stride); |
562 |
|
|
563 |
if (sum >= MAX_CHROMA_SAD_FOR_SKIP * (int)Data->iQuant) return; /* no skip */ |
if (sum >= MAX_CHROMA_SAD_FOR_SKIP * (int)Data->iQuant) return; /* no skip */ |
564 |
|
|
565 |
sum += sad8bi(Data->CurV, |
sum += sad8bi(Data->CurV, |
566 |
Data->RefP[5] + (dy/2) * stride + dx/2, |
Data->RefP[5] + (dy/2) * (int)stride + dx/2, |
567 |
Data->b_RefP[5] + (b_dy/2) * stride + b_dx/2, |
Data->b_RefP[5] + (b_dy/2) * (int)stride + b_dx/2, |
568 |
stride); |
stride); |
569 |
|
|
570 |
if (sum >= MAX_CHROMA_SAD_FOR_SKIP * (int)Data->iQuant) return; /* no skip */ |
if (sum >= MAX_CHROMA_SAD_FOR_SKIP * (int)Data->iQuant) return; /* no skip */ |
658 |
Data->directmvB[0].y |
Data->directmvB[0].y |
659 |
: Data->currentMV[1].y - Data->referencemv[0].y); |
: Data->currentMV[1].y - Data->referencemv[0].y); |
660 |
|
|
661 |
|
*best_sad = Data->iMinSAD[0]; |
662 |
|
|
663 |
return skip_sad; |
return skip_sad; |
664 |
} |
} |
665 |
|
|
728 |
Data->currentMV[0] = startF; |
Data->currentMV[0] = startF; |
729 |
Data->currentMV[1] = startB; |
Data->currentMV[1] = startB; |
730 |
|
|
731 |
get_range(f_range, f_range+1, f_range+2, f_range+3, x, y, 4, pParam->width, pParam->height, Data->iFcode - Data->qpel, 1, 0); |
get_range(f_range, f_range+1, f_range+2, f_range+3, x, y, 4, pParam->width, pParam->height, Data->iFcode - Data->qpel, 1); |
732 |
get_range(b_range, b_range+1, b_range+2, b_range+3, x, y, 4, pParam->width, pParam->height, Data->bFcode - Data->qpel, 1, 0); |
get_range(b_range, b_range+1, b_range+2, b_range+3, x, y, 4, pParam->width, pParam->height, Data->bFcode - Data->qpel, 1); |
733 |
|
|
734 |
if (Data->currentMV[0].x > f_range[1]) Data->currentMV[0].x = f_range[1]; |
if (Data->currentMV[0].x > f_range[1]) Data->currentMV[0].x = f_range[1]; |
735 |
if (Data->currentMV[0].x < f_range[0]) Data->currentMV[0].x = f_range[0]; |
if (Data->currentMV[0].x < f_range[0]) Data->currentMV[0].x = f_range[0]; |
758 |
int i, j; |
int i, j; |
759 |
int b_range[4], f_range[4]; |
int b_range[4], f_range[4]; |
760 |
|
|
761 |
get_range(f_range, f_range+1, f_range+2, f_range+3, x, y, 4, pParam->width, pParam->height, Data->iFcode - Data->qpel, 1, 0); |
get_range(f_range, f_range+1, f_range+2, f_range+3, x, y, 4, pParam->width, pParam->height, Data->iFcode - Data->qpel, 1); |
762 |
get_range(b_range, b_range+1, b_range+2, b_range+3, x, y, 4, pParam->width, pParam->height, Data->bFcode - Data->qpel, 1, 0); |
get_range(b_range, b_range+1, b_range+2, b_range+3, x, y, 4, pParam->width, pParam->height, Data->bFcode - Data->qpel, 1); |
763 |
|
|
764 |
/* diamond */ |
/* diamond */ |
765 |
do { |
do { |
789 |
if (Data->qpel) { |
if (Data->qpel) { |
790 |
Data->qpel_precision = 1; |
Data->qpel_precision = 1; |
791 |
get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, |
get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, |
792 |
x, y, 4, pParam->width, pParam->height, Data->iFcode, 2, 0); |
x, y, 4, pParam->width, pParam->height, Data->iFcode, 2); |
793 |
|
|
794 |
Data->currentQMV[0].x = 2 * Data->currentMV[0].x; |
Data->currentQMV[0].x = 2 * Data->currentMV[0].x; |
795 |
Data->currentQMV[0].y = 2 * Data->currentMV[0].y; |
Data->currentQMV[0].y = 2 * Data->currentMV[0].y; |
800 |
xvid_me_SubpelRefine(Data->currentQMV[0], Data, CheckCandidateInt, 1); |
xvid_me_SubpelRefine(Data->currentQMV[0], Data, CheckCandidateInt, 1); |
801 |
|
|
802 |
get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, |
get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, |
803 |
x, y, 4, pParam->width, pParam->height, Data->bFcode, 2, 0); |
x, y, 4, pParam->width, pParam->height, Data->bFcode, 2); |
804 |
|
|
805 |
xvid_me_SubpelRefine(Data->currentQMV[1], Data, CheckCandidateInt, 2); |
xvid_me_SubpelRefine(Data->currentQMV[1], Data, CheckCandidateInt, 2); |
806 |
} |
} |
817 |
MACROBLOCK * const pMB, |
MACROBLOCK * const pMB, |
818 |
const MACROBLOCK * const b_mb, |
const MACROBLOCK * const b_mb, |
819 |
VECTOR * f_predMV, |
VECTOR * f_predMV, |
820 |
VECTOR * b_predMV) |
VECTOR * b_predMV, |
821 |
|
int force_direct) |
822 |
{ |
{ |
823 |
int mode = MODE_DIRECT, k; |
int mode = MODE_DIRECT, k; |
824 |
int best_sad, f_sad, b_sad, i_sad; |
int best_sad, f_sad, b_sad, i_sad; |
830 |
f_sad = Data_f->iMinSAD[0] + 4*Data_d->lambda16; |
f_sad = Data_f->iMinSAD[0] + 4*Data_d->lambda16; |
831 |
i_sad = Data_i->iMinSAD[0] + 2*Data_d->lambda16; |
i_sad = Data_i->iMinSAD[0] + 2*Data_d->lambda16; |
832 |
|
|
833 |
|
if (force_direct) |
834 |
|
goto set_mode; /* bypass checks for non-direct modes */ |
835 |
|
|
836 |
if (b_sad < best_sad) { |
if (b_sad < best_sad) { |
837 |
mode = MODE_BACKWARD; |
mode = MODE_BACKWARD; |
838 |
best_sad = b_sad; |
best_sad = b_sad; |
848 |
best_sad = i_sad; |
best_sad = i_sad; |
849 |
} |
} |
850 |
|
|
851 |
|
set_mode: |
852 |
pMB->sad16 = best_sad; |
pMB->sad16 = best_sad; |
853 |
pMB->mode = mode; |
pMB->mode = mode; |
854 |
|
pMB->cbp = 63; |
855 |
|
|
856 |
switch (mode) { |
switch (mode) { |
857 |
|
|
941 |
} |
} |
942 |
} |
} |
943 |
|
|
944 |
|
static __inline void |
945 |
|
maxMotionBVOP(int * const MVmaxF, int * const MVmaxB, const MACROBLOCK * const pMB, const int qpel) |
946 |
|
{ |
947 |
|
if (pMB->mode == MODE_FORWARD || pMB->mode == MODE_INTERPOLATE) { |
948 |
|
const VECTOR * const mv = qpel ? pMB->qmvs : pMB->mvs; |
949 |
|
int max = *MVmaxF; |
950 |
|
if (mv[0].x > max) max = mv[0].x; |
951 |
|
else if (-mv[0].x - 1 > max) max = -mv[0].x - 1; |
952 |
|
if (mv[0].y > max) max = mv[0].y; |
953 |
|
else if (-mv[0].y - 1 > max) max = -mv[0].y - 1; |
954 |
|
|
955 |
|
*MVmaxF = max; |
956 |
|
} |
957 |
|
|
958 |
|
if (pMB->mode == MODE_BACKWARD || pMB->mode == MODE_INTERPOLATE) { |
959 |
|
const VECTOR * const mv = qpel ? pMB->b_qmvs : pMB->b_mvs; |
960 |
|
int max = *MVmaxB; |
961 |
|
if (mv[0].x > max) max = mv[0].x; |
962 |
|
else if (-mv[0].x - 1 > max) max = -mv[0].x - 1; |
963 |
|
if (mv[0].y > max) max = mv[0].y; |
964 |
|
else if (-mv[0].y - 1 > max) max = -mv[0].y - 1; |
965 |
|
*MVmaxB = max; |
966 |
|
} |
967 |
|
} |
968 |
|
|
969 |
|
|
970 |
void |
void |
971 |
MotionEstimationBVOP(MBParam * const pParam, |
MotionEstimationBVOP(MBParam * const pParam, |
972 |
FRAMEINFO * const frame, |
FRAMEINFO * const frame, |
983 |
const IMAGE * const b_ref, |
const IMAGE * const b_ref, |
984 |
const IMAGE * const b_refH, |
const IMAGE * const b_refH, |
985 |
const IMAGE * const b_refV, |
const IMAGE * const b_refV, |
986 |
const IMAGE * const b_refHV) |
const IMAGE * const b_refHV, |
987 |
|
const int num_slices) |
988 |
{ |
{ |
989 |
uint32_t i, j; |
uint32_t i, j; |
990 |
int32_t best_sad = 256*4096; |
int32_t best_sad = 256*4096; |
|
int32_t sad2; |
|
991 |
uint32_t skip_sad; |
uint32_t skip_sad; |
992 |
|
int fb_thresh; |
993 |
const MACROBLOCK * const b_mbs = b_reference->mbs; |
const MACROBLOCK * const b_mbs = b_reference->mbs; |
994 |
|
|
995 |
VECTOR f_predMV, b_predMV; |
VECTOR f_predMV, b_predMV; |
996 |
|
|
997 |
|
int mb_width = pParam->mb_width; |
998 |
|
int mb_height = pParam->mb_height; |
999 |
|
int MVmaxF = 0, MVmaxB = 0; |
1000 |
const int32_t TRB = time_pp - time_bp; |
const int32_t TRB = time_pp - time_bp; |
1001 |
const int32_t TRD = time_pp; |
const int32_t TRD = time_pp; |
1002 |
DECLARE_ALIGNED_MATRIX(dct_space, 3, 64, int16_t, CACHE_LINE); |
DECLARE_ALIGNED_MATRIX(dct_space, 3, 64, int16_t, CACHE_LINE); |
1010 |
Data_d.rounding = 0; |
Data_d.rounding = 0; |
1011 |
Data_d.chroma = frame->motion_flags & XVID_ME_CHROMA_BVOP; |
Data_d.chroma = frame->motion_flags & XVID_ME_CHROMA_BVOP; |
1012 |
Data_d.iQuant = frame->quant; |
Data_d.iQuant = frame->quant; |
1013 |
|
Data_d.quant_sq = frame->quant*frame->quant; |
1014 |
Data_d.dctSpace = dct_space; |
Data_d.dctSpace = dct_space; |
1015 |
Data_d.quant_type = !(pParam->vol_flags & XVID_VOL_MPEGQUANT); |
Data_d.quant_type = !(pParam->vol_flags & XVID_VOL_MPEGQUANT); |
1016 |
Data_d.mpeg_quant_matrices = pParam->mpeg_quant_matrices; |
Data_d.mpeg_quant_matrices = pParam->mpeg_quant_matrices; |
1021 |
memcpy(&Data_b, &Data_d, sizeof(SearchData)); |
memcpy(&Data_b, &Data_d, sizeof(SearchData)); |
1022 |
memcpy(&Data_i, &Data_d, sizeof(SearchData)); |
memcpy(&Data_i, &Data_d, sizeof(SearchData)); |
1023 |
|
|
1024 |
|
Data_f.iFcode = Data_i.iFcode = frame->fcode = b_reference->fcode; |
1025 |
|
Data_b.iFcode = Data_i.bFcode = frame->bcode = b_reference->fcode; |
1026 |
|
|
1027 |
|
for (j = 0; j < pParam->mb_height; j++) { |
1028 |
|
int new_bound = mb_width * ((((j*num_slices) / mb_height) * mb_height + (num_slices-1)) / num_slices); |
1029 |
|
|
1030 |
|
f_predMV = b_predMV = zeroMV; /* prediction is reset at left boundary */ |
1031 |
|
|
1032 |
|
for (i = 0; i < pParam->mb_width; i++) { |
1033 |
|
MACROBLOCK * const pMB = frame->mbs + i + j * pParam->mb_width; |
1034 |
|
const MACROBLOCK * const b_mb = b_mbs + i + j * pParam->mb_width; |
1035 |
|
int force_direct = (((j*mb_width+i)==new_bound) && (j > 0)) ? 1 : 0; /* MTK decoder chipsets do NOT reset predMVs upon resync marker in BVOPs. We workaround this problem |
1036 |
|
by placing the slice border on second MB in a row and then force the first MB to be direct mode */ |
1037 |
|
|
1038 |
|
pMB->mode = -1; |
1039 |
|
|
1040 |
|
initialize_searchData(&Data_d, &Data_f, &Data_b, &Data_i, |
1041 |
|
i, j, f_ref, f_refH->y, f_refV->y, f_refHV->y, |
1042 |
|
b_ref, b_refH->y, b_refV->y, b_refHV->y, |
1043 |
|
&frame->image, b_mb); |
1044 |
|
|
1045 |
|
/* special case, if collocated block is SKIPed in P-VOP: encoding is forward (0,0), cpb=0 without further ado */ |
1046 |
|
if (b_reference->coding_type != S_VOP) |
1047 |
|
if (b_mb->mode == MODE_NOT_CODED) { |
1048 |
|
pMB->mode = MODE_NOT_CODED; |
1049 |
|
pMB->mvs[0] = pMB->b_mvs[0] = zeroMV; |
1050 |
|
pMB->sad16 = 0; |
1051 |
|
continue; |
1052 |
|
} |
1053 |
|
|
1054 |
|
/* direct search comes first, because it (1) checks for SKIP-mode |
1055 |
|
and (2) sets very good predictions for forward and backward search */ |
1056 |
|
skip_sad = SearchDirect_initial(i, j, frame->motion_flags, TRB, TRD, pParam, pMB, |
1057 |
|
b_mb, &best_sad, &Data_d); |
1058 |
|
|
1059 |
|
if (pMB->mode == MODE_DIRECT_NONE_MV) { |
1060 |
|
pMB->sad16 = best_sad; |
1061 |
|
pMB->cbp = 0; |
1062 |
|
continue; |
1063 |
|
} |
1064 |
|
|
1065 |
|
SearchBF_initial(i, j, frame->motion_flags, frame->fcode, pParam, pMB, |
1066 |
|
&f_predMV, &best_sad, MODE_FORWARD, &Data_f, Data_d.currentMV[1], new_bound); |
1067 |
|
|
1068 |
|
SearchBF_initial(i, j, frame->motion_flags, frame->bcode, pParam, pMB, |
1069 |
|
&b_predMV, &best_sad, MODE_BACKWARD, &Data_b, Data_d.currentMV[2], new_bound); |
1070 |
|
|
1071 |
|
if (frame->motion_flags&XVID_ME_BFRAME_EARLYSTOP) |
1072 |
|
fb_thresh = best_sad; |
1073 |
|
else |
1074 |
|
fb_thresh = best_sad + (best_sad>>1); |
1075 |
|
|
1076 |
|
if (Data_f.iMinSAD[0] <= fb_thresh) |
1077 |
|
SearchBF_final(i, j, frame->motion_flags, pParam, &best_sad, &Data_f); |
1078 |
|
|
1079 |
|
if (Data_b.iMinSAD[0] <= fb_thresh) |
1080 |
|
SearchBF_final(i, j, frame->motion_flags, pParam, &best_sad, &Data_b); |
1081 |
|
|
1082 |
|
SearchInterpolate_initial(i, j, frame->motion_flags, pParam, &f_predMV, &b_predMV, &best_sad, |
1083 |
|
&Data_i, Data_f.currentMV[0], Data_b.currentMV[0]); |
1084 |
|
|
1085 |
|
if (((Data_i.iMinSAD[0] < best_sad +(best_sad>>3)) && !(frame->motion_flags&XVID_ME_FAST_MODEINTERPOLATE)) |
1086 |
|
|| Data_i.iMinSAD[0] <= best_sad) |
1087 |
|
|
1088 |
|
SearchInterpolate_final(i, j, frame->motion_flags, pParam, &best_sad, &Data_i); |
1089 |
|
|
1090 |
|
if (Data_d.iMinSAD[0] <= 2*best_sad) |
1091 |
|
if ((!(frame->motion_flags&XVID_ME_SKIP_DELTASEARCH) && (best_sad > 750)) |
1092 |
|
|| (best_sad > 1000)) |
1093 |
|
|
1094 |
|
SearchDirect_final(frame->motion_flags, b_mb, &best_sad, &Data_d); |
1095 |
|
|
1096 |
|
/* final skip decision */ |
1097 |
|
if ( (skip_sad < 2 * Data_d.iQuant * MAX_SAD00_FOR_SKIP ) |
1098 |
|
&& ((100*best_sad)/(skip_sad+1) > FINAL_SKIP_THRESH) ) { |
1099 |
|
|
1100 |
|
Data_d.chromaSAD = 0; /* green light for chroma check */ |
1101 |
|
|
1102 |
|
SkipDecisionB(pMB, &Data_d); |
1103 |
|
|
1104 |
|
if (pMB->mode == MODE_DIRECT_NONE_MV) { /* skipped? */ |
1105 |
|
pMB->sad16 = skip_sad; |
1106 |
|
pMB->cbp = 0; |
1107 |
|
continue; |
1108 |
|
} |
1109 |
|
} |
1110 |
|
|
1111 |
|
if (frame->vop_flags & XVID_VOP_RD_BVOP) |
1112 |
|
ModeDecision_BVOP_RD(&Data_d, &Data_b, &Data_f, &Data_i, |
1113 |
|
pMB, b_mb, &f_predMV, &b_predMV, frame->motion_flags, frame->vop_flags, pParam, i, j, best_sad, force_direct); |
1114 |
|
else |
1115 |
|
ModeDecision_BVOP_SAD(&Data_d, &Data_b, &Data_f, &Data_i, pMB, b_mb, &f_predMV, &b_predMV, force_direct); |
1116 |
|
|
1117 |
|
maxMotionBVOP(&MVmaxF, &MVmaxB, pMB, Data_d.qpel); |
1118 |
|
|
1119 |
|
} |
1120 |
|
} |
1121 |
|
|
1122 |
|
frame->fcode = getMinFcode(MVmaxF); |
1123 |
|
frame->bcode = getMinFcode(MVmaxB); |
1124 |
|
} |
1125 |
|
|
1126 |
|
|
1127 |
|
|
1128 |
|
void |
1129 |
|
SMPMotionEstimationBVOP(SMPData * h) |
1130 |
|
{ |
1131 |
|
Encoder *pEnc = (Encoder *) h->pEnc; |
1132 |
|
|
1133 |
|
const MBParam * const pParam = &pEnc->mbParam; |
1134 |
|
const FRAMEINFO * const frame = h->current; |
1135 |
|
const int32_t time_bp = (int32_t)(pEnc->current->stamp - frame->stamp); |
1136 |
|
const int32_t time_pp = (int32_t)(pEnc->current->stamp - pEnc->reference->stamp); |
1137 |
|
/* forward (past) reference */ |
1138 |
|
const IMAGE * const f_ref = &pEnc->reference->image; |
1139 |
|
const IMAGE * const f_refH = &pEnc->f_refh; |
1140 |
|
const IMAGE * const f_refV = &pEnc->f_refv; |
1141 |
|
const IMAGE * const f_refHV = &pEnc->f_refhv; |
1142 |
|
/* backward (future) reference */ |
1143 |
|
const FRAMEINFO * const b_reference = pEnc->current; |
1144 |
|
const IMAGE * const b_ref = &pEnc->current->image; |
1145 |
|
const IMAGE * const b_refH = &pEnc->vInterH; |
1146 |
|
const IMAGE * const b_refV = &pEnc->vInterV; |
1147 |
|
const IMAGE * const b_refHV = &pEnc->vInterHV; |
1148 |
|
|
1149 |
|
int mb_width = pParam->mb_width; |
1150 |
|
int mb_height = pParam->mb_height; |
1151 |
|
int num_slices = pEnc->num_slices; |
1152 |
|
int y_row = h->y_row; |
1153 |
|
int y_step = h->y_step; |
1154 |
|
int start_y = h->start_y; |
1155 |
|
int stop_y = h->stop_y; |
1156 |
|
int * complete_count_self = h->complete_count_self; |
1157 |
|
const int * complete_count_above = h->complete_count_above; |
1158 |
|
int max_mbs; |
1159 |
|
int current_mb = 0; |
1160 |
|
|
1161 |
|
int32_t i, j; |
1162 |
|
int32_t best_sad = 256*4096; |
1163 |
|
uint32_t skip_sad; |
1164 |
|
int fb_thresh; |
1165 |
|
const MACROBLOCK * const b_mbs = b_reference->mbs; |
1166 |
|
|
1167 |
|
VECTOR f_predMV, b_predMV; |
1168 |
|
|
1169 |
|
int MVmaxF = 0, MVmaxB = 0; |
1170 |
|
const int32_t TRB = time_pp - time_bp; |
1171 |
|
const int32_t TRD = time_pp; |
1172 |
|
DECLARE_ALIGNED_MATRIX(dct_space, 3, 64, int16_t, CACHE_LINE); |
1173 |
|
|
1174 |
|
/* some pre-inintialized data for the rest of the search */ |
1175 |
|
SearchData Data_d, Data_f, Data_b, Data_i; |
1176 |
|
memset(&Data_d, 0, sizeof(SearchData)); |
1177 |
|
|
1178 |
|
Data_d.iEdgedWidth = pParam->edged_width; |
1179 |
|
Data_d.qpel = pParam->vol_flags & XVID_VOL_QUARTERPEL ? 1 : 0; |
1180 |
|
Data_d.rounding = 0; |
1181 |
|
Data_d.chroma = frame->motion_flags & XVID_ME_CHROMA_BVOP; |
1182 |
|
Data_d.iQuant = frame->quant; |
1183 |
|
Data_d.quant_sq = frame->quant*frame->quant; |
1184 |
|
Data_d.dctSpace = dct_space; |
1185 |
|
Data_d.quant_type = !(pParam->vol_flags & XVID_VOL_MPEGQUANT); |
1186 |
|
Data_d.mpeg_quant_matrices = pParam->mpeg_quant_matrices; |
1187 |
|
|
1188 |
|
Data_d.RefQ = h->RefQ; |
1189 |
|
|
1190 |
|
memcpy(&Data_f, &Data_d, sizeof(SearchData)); |
1191 |
|
memcpy(&Data_b, &Data_d, sizeof(SearchData)); |
1192 |
|
memcpy(&Data_i, &Data_d, sizeof(SearchData)); |
1193 |
|
|
1194 |
Data_f.iFcode = Data_i.iFcode = frame->fcode; |
Data_f.iFcode = Data_i.iFcode = frame->fcode; |
1195 |
Data_b.iFcode = Data_i.bFcode = frame->bcode; |
Data_b.iFcode = Data_i.bFcode = frame->bcode; |
1196 |
|
|
1197 |
|
max_mbs = 0; |
1198 |
|
|
1199 |
for (j = 0; j < pParam->mb_height; j++) { |
for (j = (start_y+y_row); j < stop_y; j += y_step) { |
1200 |
|
int new_bound = mb_width * ((((j*num_slices) / mb_height) * mb_height + (num_slices-1)) / num_slices); |
1201 |
|
|
1202 |
|
if (j == start_y) max_mbs = pParam->mb_width; /* we can process all blocks of the first row */ |
1203 |
|
|
1204 |
f_predMV = b_predMV = zeroMV; /* prediction is reset at left boundary */ |
f_predMV = b_predMV = zeroMV; /* prediction is reset at left boundary */ |
1205 |
|
|
1206 |
for (i = 0; i < pParam->mb_width; i++) { |
for (i = 0; i < (int) pParam->mb_width; i++) { |
1207 |
MACROBLOCK * const pMB = frame->mbs + i + j * pParam->mb_width; |
MACROBLOCK * const pMB = frame->mbs + i + j * pParam->mb_width; |
1208 |
const MACROBLOCK * const b_mb = b_mbs + i + j * pParam->mb_width; |
const MACROBLOCK * const b_mb = b_mbs + i + j * pParam->mb_width; |
1209 |
|
int force_direct = (((j*mb_width+i)==new_bound) && (j > 0)) ? 1 : 0; /* MTK decoder chipsets do NOT reset predMVs upon resync marker in BVOPs. We workaround this problem |
1210 |
|
by placing the slice border on second MB in a row and then force the first MB to be direct mode */ |
1211 |
pMB->mode = -1; |
pMB->mode = -1; |
1212 |
|
|
1213 |
initialize_searchData(&Data_d, &Data_f, &Data_b, &Data_i, |
initialize_searchData(&Data_d, &Data_f, &Data_b, &Data_i, |
1215 |
b_ref, b_refH->y, b_refV->y, b_refHV->y, |
b_ref, b_refH->y, b_refV->y, b_refHV->y, |
1216 |
&frame->image, b_mb); |
&frame->image, b_mb); |
1217 |
|
|
1218 |
|
if (current_mb >= max_mbs) { |
1219 |
|
/* we ME-ed all macroblocks we safely could. grab next portion */ |
1220 |
|
int above_count = *complete_count_above; /* sync point */ |
1221 |
|
if (above_count == pParam->mb_width) { |
1222 |
|
/* full line above is ready */ |
1223 |
|
above_count = pParam->mb_width+1; |
1224 |
|
if (j < stop_y-y_step) { |
1225 |
|
/* this is not last line, grab a portion of MBs from the next line too */ |
1226 |
|
above_count += MAX(0, complete_count_above[1] - 1); |
1227 |
|
} |
1228 |
|
} |
1229 |
|
|
1230 |
|
max_mbs = current_mb + above_count - i - 1; |
1231 |
|
|
1232 |
|
if (current_mb >= max_mbs) { |
1233 |
|
/* current workload is zero */ |
1234 |
|
i--; |
1235 |
|
sched_yield(); |
1236 |
|
continue; |
1237 |
|
} |
1238 |
|
} |
1239 |
|
|
1240 |
/* special case, if collocated block is SKIPed in P-VOP: encoding is forward (0,0), cpb=0 without further ado */ |
/* special case, if collocated block is SKIPed in P-VOP: encoding is forward (0,0), cpb=0 without further ado */ |
1241 |
if (b_reference->coding_type != S_VOP) |
if (b_reference->coding_type != S_VOP) |
1242 |
if (b_mb->mode == MODE_NOT_CODED) { |
if (b_mb->mode == MODE_NOT_CODED) { |
1243 |
pMB->mode = MODE_NOT_CODED; |
pMB->mode = MODE_NOT_CODED; |
1244 |
pMB->mvs[0] = pMB->b_mvs[0] = zeroMV; |
pMB->mvs[0] = pMB->b_mvs[0] = zeroMV; |
1245 |
pMB->sad16 = 0; |
pMB->sad16 = 0; |
1246 |
|
*complete_count_self = i+1; |
1247 |
|
current_mb++; |
1248 |
continue; |
continue; |
1249 |
} |
} |
1250 |
|
|
1255 |
|
|
1256 |
if (pMB->mode == MODE_DIRECT_NONE_MV) { |
if (pMB->mode == MODE_DIRECT_NONE_MV) { |
1257 |
pMB->sad16 = best_sad; |
pMB->sad16 = best_sad; |
1258 |
|
pMB->cbp = 0; |
1259 |
|
*complete_count_self = i+1; |
1260 |
|
current_mb++; |
1261 |
continue; |
continue; |
1262 |
} |
} |
1263 |
|
|
1264 |
SearchBF_initial(i, j, frame->motion_flags, frame->fcode, pParam, pMB, |
SearchBF_initial(i, j, frame->motion_flags, frame->fcode, pParam, pMB, |
1265 |
&f_predMV, &best_sad, MODE_FORWARD, &Data_f, Data_d.currentMV[1]); |
&f_predMV, &best_sad, MODE_FORWARD, &Data_f, Data_d.currentMV[1], new_bound); |
1266 |
|
|
1267 |
SearchBF_initial(i, j, frame->motion_flags, frame->bcode, pParam, pMB, |
SearchBF_initial(i, j, frame->motion_flags, frame->bcode, pParam, pMB, |
1268 |
&b_predMV, &best_sad, MODE_BACKWARD, &Data_b, Data_d.currentMV[2]); |
&b_predMV, &best_sad, MODE_BACKWARD, &Data_b, Data_d.currentMV[2], new_bound); |
1269 |
|
|
1270 |
sad2 = best_sad; |
if (frame->motion_flags&XVID_ME_BFRAME_EARLYSTOP) |
1271 |
|
fb_thresh = best_sad; |
1272 |
|
else |
1273 |
|
fb_thresh = best_sad + (best_sad>>1); |
1274 |
|
|
1275 |
if (Data_f.iMinSAD[0] < 2*sad2+2000) |
if (Data_f.iMinSAD[0] <= fb_thresh) |
1276 |
SearchBF_final(i, j, frame->motion_flags, pParam, &best_sad, &Data_f); |
SearchBF_final(i, j, frame->motion_flags, pParam, &best_sad, &Data_f); |
1277 |
|
|
1278 |
if (Data_b.iMinSAD[0] < 2*sad2+2000) |
if (Data_b.iMinSAD[0] <= fb_thresh) |
1279 |
SearchBF_final(i, j, frame->motion_flags, pParam, &best_sad, &Data_b); |
SearchBF_final(i, j, frame->motion_flags, pParam, &best_sad, &Data_b); |
1280 |
|
|
1281 |
SearchInterpolate_initial(i, j, frame->motion_flags, pParam, &f_predMV, &b_predMV, &best_sad, |
SearchInterpolate_initial(i, j, frame->motion_flags, pParam, &f_predMV, &b_predMV, &best_sad, |
1282 |
&Data_i, Data_f.currentMV[0], Data_b.currentMV[0]); |
&Data_i, Data_f.currentMV[0], Data_b.currentMV[0]); |
1283 |
|
|
1284 |
if (((Data_i.iMinSAD[0] < 2*best_sad+2000) && !(frame->motion_flags&XVID_ME_FAST_MODEINTERPOLATE)) |
if (((Data_i.iMinSAD[0] < best_sad +(best_sad>>3)) && !(frame->motion_flags&XVID_ME_FAST_MODEINTERPOLATE)) |
1285 |
|| Data_i.iMinSAD[0] <= best_sad) |
|| Data_i.iMinSAD[0] <= best_sad) |
1286 |
|
|
1287 |
SearchInterpolate_final(i, j, frame->motion_flags, pParam, &best_sad, &Data_i); |
SearchInterpolate_final(i, j, frame->motion_flags, pParam, &best_sad, &Data_i); |
1288 |
|
|
1289 |
if ((Data_d.iMinSAD[0] <= 2*best_sad) && (!frame->motion_flags&XVID_ME_SKIP_DELTASEARCH)) |
if (Data_d.iMinSAD[0] <= 2*best_sad) |
1290 |
|
if ((!(frame->motion_flags&XVID_ME_SKIP_DELTASEARCH) && (best_sad > 750)) |
1291 |
|
|| (best_sad > 1000)) |
1292 |
|
|
1293 |
SearchDirect_final(frame->motion_flags, b_mb, &best_sad, &Data_d); |
SearchDirect_final(frame->motion_flags, b_mb, &best_sad, &Data_d); |
1294 |
|
|
1295 |
/* final skip decision */ |
/* final skip decision */ |
1302 |
|
|
1303 |
if (pMB->mode == MODE_DIRECT_NONE_MV) { /* skipped? */ |
if (pMB->mode == MODE_DIRECT_NONE_MV) { /* skipped? */ |
1304 |
pMB->sad16 = skip_sad; |
pMB->sad16 = skip_sad; |
1305 |
|
pMB->cbp = 0; |
1306 |
|
*complete_count_self = i+1; |
1307 |
|
current_mb++; |
1308 |
continue; |
continue; |
1309 |
} |
} |
1310 |
} |
} |
1311 |
|
|
1312 |
if (frame->vop_flags & XVID_VOP_RD_BVOP) |
if (frame->vop_flags & XVID_VOP_RD_BVOP) |
1313 |
ModeDecision_BVOP_RD(&Data_d, &Data_b, &Data_f, &Data_i, |
ModeDecision_BVOP_RD(&Data_d, &Data_b, &Data_f, &Data_i, |
1314 |
pMB, b_mb, &f_predMV, &b_predMV, frame->motion_flags, pParam, i, j); |
pMB, b_mb, &f_predMV, &b_predMV, frame->motion_flags, frame->vop_flags, pParam, i, j, best_sad, force_direct); |
1315 |
else |
else |
1316 |
ModeDecision_BVOP_SAD(&Data_d, &Data_b, &Data_f, &Data_i, pMB, b_mb, &f_predMV, &b_predMV); |
ModeDecision_BVOP_SAD(&Data_d, &Data_b, &Data_f, &Data_i, pMB, b_mb, &f_predMV, &b_predMV, force_direct); |
1317 |
|
|
1318 |
|
*complete_count_self = i+1; |
1319 |
|
current_mb++; |
1320 |
|
maxMotionBVOP(&MVmaxF, &MVmaxB, pMB, Data_d.qpel); |
1321 |
} |
} |
1322 |
|
|
1323 |
|
complete_count_self++; |
1324 |
|
complete_count_above++; |
1325 |
} |
} |
1326 |
|
|
1327 |
|
h->minfcode = getMinFcode(MVmaxF); |
1328 |
|
h->minbcode = getMinFcode(MVmaxB); |
1329 |
} |
} |