--- trunk/xvidcore/src/plugins/plugin_psnrhvsm.c 2010/10/29 16:39:07 1902 +++ trunk/xvidcore/src/plugins/plugin_psnrhvsm.c 2010/11/08 20:20:39 1903 @@ -19,7 +19,7 @@ * along with this program ; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * - * $Id: plugin_psnrhvsm.c,v 1.1 2010-10-10 19:19:46 Isibaar Exp $ + * $Id: plugin_psnrhvsm.c,v 1.2 2010-11-08 20:20:39 Isibaar Exp $ * ****************************************************************************/ @@ -48,7 +48,6 @@ typedef struct { - int pixels; /* width*height */ uint64_t mse_sum; /* for avrg psnrhvsm */ long frame_cnt; @@ -76,7 +75,7 @@ 0.019290f, 0.011815f, 0.011080f, 0.010412f, 0.007972f, 0.010000f, 0.009426f, 0.010203f }; -#if 1 /* Floating-point implementation */ +#if 0 /* Floating-point implementation */ static uint32_t Calc_MSE_H(int16_t *DCT_A, int16_t *DCT_B, uint8_t *IMG_A, uint8_t *IMG_B, int stride) { @@ -85,6 +84,7 @@ uint32_t Local[8] = {0, 0, 0, 0, 0, 0, 0, 0}; uint32_t Local_Square[8] = {0, 0, 0, 0, 0, 0, 0, 0}; float MASK_A = 0.f, MASK_B = 0.f; + float Mult1 = 1.f, Mult2 = 1.f; uint32_t MSE_H = 0; /* Step 1: Calculate CSF weighted energy of DCT coefficients */ @@ -98,14 +98,16 @@ /* Step 2: Determine local variances compared to entire block variance */ for (y = 0; y < 2; y++) { for (x = 0; x < 2; x++) { - for (i = 0; i < 4; i++) { - uint8_t A = IMG_A[y*4*stride + 4*x + i]; - uint8_t B = IMG_B[y*4*stride + 4*x + i]; - - Local[y*2 + x] += A; - Local[y*2 + x + 4] += B; - Local_Square[y*2 + x] += A*A; - Local_Square[y*2 + x + 4] += B*B; + for (j = 0; j < 4; j++) { + for (i = 0; i < 4; i++) { + uint8_t A = IMG_A[(y*4+j)*stride + 4*x + i]; + uint8_t B = IMG_B[(y*4+j)*stride + 4*x + i]; + + Local[y*2 + x] += A; + Local[y*2 + x + 4] += B; + Local_Square[y*2 + x] += A*A; + Local_Square[y*2 + x + 4] += B*B; + } } } } @@ -123,8 +125,14 @@ Global_B = (Local_Square[4]<<6) - Global_B*Global_B; /* 64*Var(D) */ /* Step 3: Calculate contrast masking threshold */ - MASK_A = (float)sqrt(MASK_A*((float)(Local[0]+Local[1]+Local[2]+Local[3])/((float)Global_A/4.f)))/32.f; - MASK_B = (float)sqrt(MASK_B*((float)(Local[4]+Local[5]+Local[6]+Local[7])/((float)Global_B/4.f)))/32.f; + if (Global_A) + Mult1 = (float)(Local[0]+Local[1]+Local[2]+Local[3])/((float)(Global_A)/4.f); + + if (Global_B) + Mult2 = (float)(Local[4]+Local[5]+Local[6]+Local[7])/((float)(Global_B)/4.f); + + MASK_A = (float)sqrt(MASK_A * Mult1) / 32.f; + MASK_B = (float)sqrt(MASK_B * Mult2) / 32.f; if (MASK_B > MASK_A) MASK_A = MASK_B; /* MAX(MASK_A, MASK_B) */ @@ -182,6 +190,21 @@ 366, 286, 277, 269, 235, 264, 256, 266 }; +static __inline uint32_t isqrt(unsigned long n) +{ + uint32_t c = 0x8000; + uint32_t g = 0x8000; + + for(;;) { + if(g*g > n) + g ^= c; + c >>= 1; + if(c == 0) + return g; + g |= c; + } +} + static uint32_t Calc_MSE_H(int16_t *DCT_A, int16_t *DCT_B, uint8_t *IMG_A, uint8_t *IMG_B, int stride) { int x, y, i, j; @@ -205,14 +228,16 @@ /* Step 2: Determine local variances compared to entire block variance */ for (y = 0; y < 2; y++) { for (x = 0; x < 2; x++) { - for (i = 0; i < 4; i++) { - uint8_t A = IMG_A[y*4*stride + 4*x + i]; - uint8_t B = IMG_B[y*4*stride + 4*x + i]; - - Local[y*2 + x] += A; - Local[y*2 + x + 4] += B; - Local_Square[y*2 + x] += A*A; - Local_Square[y*2 + x + 4] += B*B; + for (j = 0; j < 4; j++) { + for (i = 0; i < 4; i++) { + uint8_t A = IMG_A[(y*4+j)*stride + 4*x + i]; + uint8_t B = IMG_B[(y*4+j)*stride + 4*x + i]; + + Local[y*2 + x] += A; + Local[y*2 + x + 4] += B; + Local_Square[y*2 + x] += A*A; + Local_Square[y*2 + x + 4] += B*B; + } } } } @@ -231,18 +256,22 @@ /* Step 3: Calculate contrast masking threshold */ { - float MASK_A, MASK_B; /* TODO */ + uint32_t MASK_A, MASK_B; + uint32_t Mult1 = 64, Mult2 = 64; + + if (Global_A) + Mult1 = ((Local[0]+Local[1]+Local[2]+Local[3])<<8) / Global_A; - MASK_A = (float)((double)(Sum_A + 4) / 8.); - MASK_B = (float)((double)(Sum_B + 4) / 8.); + if (Global_B) + Mult2 = ((Local[4]+Local[5]+Local[6]+Local[7])<<8) / Global_B; - MASK_A = sqrt(MASK_A*((float)(Local[0]+Local[1]+Local[2]+Local[3])/((float)Global_A/4.f)))/32.f; - MASK_B = sqrt(MASK_B*((float)(Local[4]+Local[5]+Local[6]+Local[7])/((float)Global_B/4.f)))/32.f; + MASK_A = isqrt(2*Sum_A*Mult1) + 16; + MASK_B = isqrt(2*Sum_B*Mult2) + 16; if (MASK_B > MASK_A) /* MAX(MASK_A, MASK_B) */ - MASK = (uint32_t) (MASK_B * 1024.f + 0.5f); + MASK = (uint32_t) MASK_B; else - MASK = (uint32_t) (MASK_A * 1024.f + 0.5f); + MASK = (uint32_t) MASK_A; } /* Step 4: Calculate MSE of DCT coeffs reduced by masking effect */ @@ -272,13 +301,11 @@ static void psnrhvsm_after(xvid_plg_data_t *data, psnrhvsm_data_t *psnrhvsm) { DECLARE_ALIGNED_MATRIX(DCT, 2, 64, int16_t, CACHE_LINE); - int x, y, stride = data->original.stride[0]; + uint32_t x, y, stride = data->original.stride[0]; int16_t *DCT_A = &DCT[0], *DCT_B = &DCT[64]; uint8_t *IMG_A = (uint8_t *) data->original.plane[0]; uint8_t *IMG_B = (uint8_t *) data->current.plane[0]; - uint32_t MSE_H = 0; - - psnrhvsm->pixels = data->width * data->height; + uint64_t MSE_H = 0; for (y = 0; y < data->height; y += 8) { for (x = 0; x < data->width; x += 8) { @@ -301,10 +328,11 @@ } } - psnrhvsm->mse_sum += MSE_H; + x = 4*MSE_H / (data->width * data->height); + psnrhvsm->mse_sum += x; psnrhvsm->frame_cnt++; - printf(" psnrhvsm: %2.2f\n", sse_to_PSNR(MSE_H, 256*psnrhvsm->pixels)); + printf(" psnrhvsm: %2.2f\n", sse_to_PSNR(x, 1024)); } static int psnrhvsm_create(xvid_plg_create_t *create, void **handle) @@ -315,8 +343,6 @@ psnrhvsm->mse_sum = 0; psnrhvsm->frame_cnt = 0; - psnrhvsm->pixels = 0; - *(handle) = (void*) psnrhvsm; return 0; } @@ -345,7 +371,7 @@ MSE_H = (uint32_t) (psnrhvsm->mse_sum / psnrhvsm->frame_cnt); emms(); - printf("Average psnrhvsm: %2.2f\n", sse_to_PSNR(MSE_H, 256*psnrhvsm->pixels)); + printf("Average psnrhvsm: %2.2f\n", sse_to_PSNR(MSE_H, 1024)); free(psnrhvsm); } }