--- trunk/xvidcore/src/encoder.c 2006/02/23 07:22:43 1680 +++ trunk/xvidcore/src/encoder.c 2006/12/14 13:09:00 1766 @@ -21,7 +21,7 @@ * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: encoder.c,v 1.125 2006-02-23 07:22:43 syskin Exp $ + * $Id: encoder.c,v 1.129 2006-12-14 13:09:00 Isibaar Exp $ * ****************************************************************************/ @@ -49,6 +49,9 @@ #include "quant/quant_matrix.h" #include "utils/mem_align.h" +# include "motion/motion_smp.h" + + /***************************************************************************** * Local function prototypes ****************************************************************************/ @@ -135,7 +138,7 @@ enc_create(xvid_enc_create_t * create) { Encoder *pEnc; - int n; + int n; if (XVID_VERSION_MAJOR(create->version) != 1) /* v1.x.x */ return XVID_ERR_VERSION; @@ -441,6 +444,36 @@ pEnc->iFrameNum = 0; pEnc->fMvPrevSigma = -1; + /* multithreaded stuff */ + if (create->num_threads > 0) { + int t = create->num_threads; + int rows_per_thread = (pEnc->mbParam.mb_height+t-1)/t; + pEnc->num_threads = t; + pEnc->motionData = xvid_malloc(t*sizeof(SMPmotionData), CACHE_LINE); + if (!pEnc->motionData) + goto xvid_err_nosmp; + + for (n = 0; n < t; n++) { + pEnc->motionData[n].complete_count_self = + xvid_malloc(rows_per_thread * sizeof(int), CACHE_LINE); + + if (!pEnc->motionData[n].complete_count_self) + goto xvid_err_nosmp; + + if (n != 0) + pEnc->motionData[n].complete_count_above = + pEnc->motionData[n-1].complete_count_self; + } + pEnc->motionData[0].complete_count_above = + pEnc->motionData[t-1].complete_count_self - 1; + + } else { + xvid_err_nosmp: + /* no SMP */ + create->num_threads = 0; + pEnc->motionData = NULL; + } + create->handle = (void *) pEnc; init_timer(); @@ -629,6 +662,9 @@ xvid_free(pEnc->temp_dquants); } + if ((pEnc->mbParam.plugin_flags & XVID_REQLAMBDA)) { + xvid_free(pEnc->temp_lambda); + } if (pEnc->num_plugins>0) { xvid_plg_destroy_t pdestroy; @@ -647,9 +683,16 @@ xvid_free(pEnc->mbParam.mpeg_quant_matrices); - if (pEnc->num_plugins>0) + if (pEnc->num_zones > 0) xvid_free(pEnc->zones); + if (pEnc->num_threads > 0) { + for (i = 0; i < pEnc->num_threads; i++) + xvid_free(pEnc->motionData[i].complete_count_self); + + xvid_free(pEnc->motionData); + } + xvid_free(pEnc); return 0; /* ok */ @@ -727,6 +770,7 @@ if(pEnc->mbParam.plugin_flags & XVID_REQLAMBDA) { int block = 0; + emms(); data.lambda = pEnc->temp_lambda; for(i = 0;i < pEnc->mbParam.mb_height; i++) for(j = 0;j < pEnc->mbParam.mb_width; j++) @@ -1666,10 +1710,54 @@ } } - MotionEstimation(&pEnc->mbParam, current, reference, - &pEnc->vInterH, &pEnc->vInterV, &pEnc->vInterHV, - &pEnc->vGMC, 256*4096); + if (pEnc->num_threads > 0) { + /* multithreaded motion estimation - dispatch threads */ + + void * status; + int rows_per_thread = (pParam->mb_height + pEnc->num_threads - 1)/pEnc->num_threads; + + for (k = 0; k < pEnc->num_threads; k++) { + memset(pEnc->motionData[k].complete_count_self, 0, rows_per_thread * sizeof(int)); + pEnc->motionData[k].pParam = &pEnc->mbParam; + pEnc->motionData[k].current = current; + pEnc->motionData[k].reference = reference; + pEnc->motionData[k].pRefH = &pEnc->vInterH; + pEnc->motionData[k].pRefV = &pEnc->vInterV; + pEnc->motionData[k].pRefHV = &pEnc->vInterHV; + pEnc->motionData[k].pGMC = &pEnc->vGMC; + pEnc->motionData[k].y_step = pEnc->num_threads; + pEnc->motionData[k].start_y = k; + /* todo: sort out temp space once and for all */ + pEnc->motionData[k].RefQ = pEnc->vInterH.u + 16*k*pParam->edged_width; + } + + for (k = 1; k < pEnc->num_threads; k++) { + pthread_create(&pEnc->motionData[k].handle, NULL, + (void*)MotionEstimateSMP, (void*)&pEnc->motionData[k]); + } + + MotionEstimateSMP(&pEnc->motionData[0]); + + for (k = 1; k < pEnc->num_threads; k++) { + pthread_join(pEnc->motionData[k].handle, &status); + } + + current->fcode = 0; + for (k = 0; k < pEnc->num_threads; k++) { + current->sStat.iMvSum += pEnc->motionData[k].mvSum; + current->sStat.iMvCount += pEnc->motionData[k].mvCount; + if (pEnc->motionData[k].minfcode > current->fcode) + current->fcode = pEnc->motionData[k].minfcode; + } + + } else { + /* regular ME */ + + MotionEstimation(&pEnc->mbParam, current, reference, + &pEnc->vInterH, &pEnc->vInterV, &pEnc->vInterHV, + &pEnc->vGMC, 256*4096); + } stop_motion_timer(); @@ -1916,14 +2004,63 @@ frame->coding_type = B_VOP; call_plugins(pEnc, frame, NULL, XVID_PLG_FRAME, NULL, NULL, NULL); + frame->fcode = frame->bcode = pEnc->current->fcode; + start_timer(); - MotionEstimationBVOP(&pEnc->mbParam, frame, - ((int32_t)(pEnc->current->stamp - frame->stamp)), /* time_bp */ - ((int32_t)(pEnc->current->stamp - pEnc->reference->stamp)), /* time_pp */ - pEnc->reference->mbs, f_ref, - &pEnc->f_refh, &pEnc->f_refv, &pEnc->f_refhv, - pEnc->current, b_ref, &pEnc->vInterH, - &pEnc->vInterV, &pEnc->vInterHV); + if (pEnc->num_threads > 0) { + void * status; + int k; + /* multithreaded motion estimation - dispatch threads */ + int rows_per_thread = (pEnc->mbParam.mb_height + pEnc->num_threads - 1)/pEnc->num_threads; + + for (k = 0; k < pEnc->num_threads; k++) { + memset(pEnc->motionData[k].complete_count_self, 0, rows_per_thread * sizeof(int)); + pEnc->motionData[k].pParam = &pEnc->mbParam; + pEnc->motionData[k].current = frame; + pEnc->motionData[k].reference = pEnc->current; + pEnc->motionData[k].fRef = f_ref; + pEnc->motionData[k].fRefH = &pEnc->f_refh; + pEnc->motionData[k].fRefV = &pEnc->f_refv; + pEnc->motionData[k].fRefHV = &pEnc->f_refhv; + pEnc->motionData[k].pRef = b_ref; + pEnc->motionData[k].pRefH = &pEnc->vInterH; + pEnc->motionData[k].pRefV = &pEnc->vInterV; + pEnc->motionData[k].pRefHV = &pEnc->vInterHV; + pEnc->motionData[k].time_bp = (int32_t)(pEnc->current->stamp - frame->stamp); + pEnc->motionData[k].time_pp = (int32_t)(pEnc->current->stamp - pEnc->reference->stamp); + pEnc->motionData[k].y_step = pEnc->num_threads; + pEnc->motionData[k].start_y = k; + /* todo: sort out temp space once and for all */ + pEnc->motionData[k].RefQ = pEnc->vInterH.u + 16*k*pEnc->mbParam.edged_width; + } + + for (k = 1; k < pEnc->num_threads; k++) { + pthread_create(&pEnc->motionData[k].handle, NULL, + (void*)SMPMotionEstimationBVOP, (void*)&pEnc->motionData[k]); + } + + SMPMotionEstimationBVOP(&pEnc->motionData[0]); + + for (k = 1; k < pEnc->num_threads; k++) { + pthread_join(pEnc->motionData[k].handle, &status); + } + + frame->fcode = frame->bcode = 0; + for (k = 0; k < pEnc->num_threads; k++) { + if (pEnc->motionData[k].minfcode > frame->fcode) + frame->fcode = pEnc->motionData[k].minfcode; + if (pEnc->motionData[k].minbcode > frame->bcode) + frame->bcode = pEnc->motionData[k].minbcode; + } + } else { + MotionEstimationBVOP(&pEnc->mbParam, frame, + ((int32_t)(pEnc->current->stamp - frame->stamp)), /* time_bp */ + ((int32_t)(pEnc->current->stamp - pEnc->reference->stamp)), /* time_pp */ + pEnc->reference->mbs, f_ref, + &pEnc->f_refh, &pEnc->f_refv, &pEnc->f_refhv, + pEnc->current, b_ref, &pEnc->vInterH, + &pEnc->vInterV, &pEnc->vInterHV); + } stop_motion_timer(); set_timecodes(frame, pEnc->reference,pEnc->mbParam.fbase); @@ -1981,7 +2118,6 @@ stop_coding_timer(); } } - emms(); BitstreamPadAlways(bs); /* next_start_code() at the end of VideoObjectPlane() */