19 |
* along with this program ; if not, write to the Free Software |
* along with this program ; if not, write to the Free Software |
20 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
21 |
* |
* |
22 |
* $Id: plugin_psnrhvsm.c,v 1.3 2010-11-10 21:25:16 Isibaar Exp $ |
* $Id: plugin_psnrhvsm.c,v 1.4 2010-11-28 15:18:21 Isibaar Exp $ |
23 |
* |
* |
24 |
****************************************************************************/ |
****************************************************************************/ |
25 |
|
|
43 |
#include "../xvid.h" |
#include "../xvid.h" |
44 |
#include "../dct/fdct.h" |
#include "../dct/fdct.h" |
45 |
#include "../image/image.h" |
#include "../image/image.h" |
46 |
|
#include "../motion/sad.h" |
47 |
#include "../utils/mem_transfer.h" |
#include "../utils/mem_transfer.h" |
48 |
#include "../utils/emms.h" |
#include "../utils/emms.h" |
49 |
|
|
57 |
|
|
58 |
} psnrhvsm_data_t; /* internal plugin data */ |
} psnrhvsm_data_t; /* internal plugin data */ |
59 |
|
|
60 |
static const float CSF_Coeff[64] = |
|
61 |
{ 1.608443f, 2.339554f, 2.573509f, 1.608443f, 1.072295f, 0.643377f, 0.504610f, 0.421887f, |
#if 0 /* Floating-point implementation: Slow but accurate */ |
62 |
|
|
63 |
|
static const float CSF_Coeff[64] = { |
64 |
|
1.608443f, 2.339554f, 2.573509f, 1.608443f, 1.072295f, 0.643377f, 0.504610f, 0.421887f, |
65 |
2.144591f, 2.144591f, 1.838221f, 1.354478f, 0.989811f, 0.443708f, 0.428918f, 0.467911f, |
2.144591f, 2.144591f, 1.838221f, 1.354478f, 0.989811f, 0.443708f, 0.428918f, 0.467911f, |
66 |
1.838221f, 1.979622f, 1.608443f, 1.072295f, 0.643377f, 0.451493f, 0.372972f, 0.459555f, |
1.838221f, 1.979622f, 1.608443f, 1.072295f, 0.643377f, 0.451493f, 0.372972f, 0.459555f, |
67 |
1.838221f, 1.513829f, 1.169777f, 0.887417f, 0.504610f, 0.295806f, 0.321689f, 0.415082f, |
1.838221f, 1.513829f, 1.169777f, 0.887417f, 0.504610f, 0.295806f, 0.321689f, 0.415082f, |
71 |
0.357432f, 0.279729f, 0.270896f, 0.262603f, 0.229778f, 0.257351f, 0.249855f, 0.259950f |
0.357432f, 0.279729f, 0.270896f, 0.262603f, 0.229778f, 0.257351f, 0.249855f, 0.259950f |
72 |
}; |
}; |
73 |
|
|
74 |
static const float Mask_Coeff[64] = |
static const float Mask_Coeff[64] = { |
75 |
{ 0.000000f, 0.826446f, 1.000000f, 0.390625f, 0.173611f, 0.062500f, 0.038447f, 0.026874f, |
0.000000f, 0.826446f, 1.000000f, 0.390625f, 0.173611f, 0.062500f, 0.038447f, 0.026874f, |
76 |
0.694444f, 0.694444f, 0.510204f, 0.277008f, 0.147929f, 0.029727f, 0.027778f, 0.033058f, |
0.694444f, 0.694444f, 0.510204f, 0.277008f, 0.147929f, 0.029727f, 0.027778f, 0.033058f, |
77 |
0.510204f, 0.591716f, 0.390625f, 0.173611f, 0.062500f, 0.030779f, 0.021004f, 0.031888f, |
0.510204f, 0.591716f, 0.390625f, 0.173611f, 0.062500f, 0.030779f, 0.021004f, 0.031888f, |
78 |
0.510204f, 0.346021f, 0.206612f, 0.118906f, 0.038447f, 0.013212f, 0.015625f, 0.026015f, |
0.510204f, 0.346021f, 0.206612f, 0.118906f, 0.038447f, 0.013212f, 0.015625f, 0.026015f, |
82 |
0.019290f, 0.011815f, 0.011080f, 0.010412f, 0.007972f, 0.010000f, 0.009426f, 0.010203f |
0.019290f, 0.011815f, 0.011080f, 0.010412f, 0.007972f, 0.010000f, 0.009426f, 0.010203f |
83 |
}; |
}; |
84 |
|
|
85 |
#if 0 /* Floating-point implementation */ |
static uint32_t calc_SSE_H(int16_t *DCT_A, int16_t *DCT_B, uint8_t *IMG_A, uint8_t *IMG_B, int stride) |
|
|
|
|
static uint32_t Calc_MSE_H(int16_t *DCT_A, int16_t *DCT_B, uint8_t *IMG_A, uint8_t *IMG_B, int stride) |
|
86 |
{ |
{ |
87 |
int x, y, i, j; |
int x, y, i, j; |
88 |
uint32_t Global_A, Global_B, Sum_A = 0, Sum_B = 0; |
uint32_t Global_A, Global_B, Sum_A = 0, Sum_B = 0; |
153 |
u -= (MASK_A / Mask_Coeff[j*8 + i]); |
u -= (MASK_A / Mask_Coeff[j*8 + i]); |
154 |
} |
} |
155 |
|
|
156 |
MSE_H += (uint32_t) ((256.f*(u * CSF_Coeff[j*8 + i])*(u * CSF_Coeff[j*8 + i])) + 0.5f); |
MSE_H += (uint32_t) ((16.f*(u * CSF_Coeff[j*8 + i])*(u * CSF_Coeff[j*8 + i])) + 0.5f); |
157 |
} |
} |
158 |
} |
} |
159 |
return MSE_H; /* Fixed-point value right-shifted by eight */ |
return MSE_H; /* Fixed-point value right-shifted by four */ |
160 |
} |
} |
161 |
|
|
162 |
#else |
#else |
163 |
|
|
164 |
/* First draft of a fixed-point implementation. |
static uint32_t calc_SSE_H(int16_t *DCT_A, int16_t *DCT_B, uint8_t *IMG_A, uint8_t *IMG_B, int stride) |
|
Might serve as a template for MMX/SSE code */ |
|
|
|
|
|
static const uint16_t iMask_Coeff[64] = |
|
|
{ 0, 59577, 65535, 40959, 27306, 16384, 12850, 10743, |
|
|
54612, 54612, 46811, 34492, 25206, 11299, 10923, 11915, |
|
|
46811, 50412, 40959, 27306, 16384, 11497, 9498, 11703, |
|
|
46811, 38550, 29789, 22598, 12850, 7533, 8192, 10570, |
|
|
36408, 29789, 17712, 11703, 9637, 6012, 6363, 8511, |
|
|
27306, 18724, 11915, 10240, 8091, 6302, 5799, 7123, |
|
|
13374, 10240, 8402, 7533, 6363, 5416, 5461, 6489, |
|
|
9102, 7123, 6898, 6687, 5851, 6553, 6363, 6620 |
|
|
}; |
|
|
|
|
|
static const uint16_t Inv_iMask_Coeff[64] = |
|
|
{ 0, 310, 256, 655, 1475, 4096, 6659, 9526, |
|
|
369, 369, 502, 924, 1731, 8612, 9216, 7744, |
|
|
502, 433, 655, 1475, 4096, 8317, 12188, 8028, |
|
|
502, 740, 1239, 2153, 6659, 19376, 16384, 9840, |
|
|
829, 1239, 3505, 8028, 11838, 30415, 27159, 15178, |
|
|
1475, 3136, 7744, 10486, 16796, 27688, 32691, 21667, |
|
|
6147, 10486, 15575, 19376, 27159, 37482, 36866, 26114, |
|
|
13271, 21667, 23105, 24587, 32112, 25600, 27159, 25091 |
|
|
}; |
|
|
|
|
|
static const uint16_t iCSF_Coeff[64] = |
|
|
{ 1647, 2396, 2635, 1647, 1098, 659, 517, 432, |
|
|
2196, 2196, 1882, 1387, 1014, 454, 439, 479, |
|
|
1882, 2027, 1647, 1098, 659, 462, 382, 471, |
|
|
1882, 1550, 1198, 909, 517, 303, 329, 425, |
|
|
1464, 1198, 712, 471, 388, 242, 256, 342, |
|
|
1098, 753, 479, 412, 325, 253, 233, 286, |
|
|
538, 412, 338, 303, 256, 218, 220, 261, |
|
|
366, 286, 277, 269, 235, 264, 256, 266 |
|
|
}; |
|
|
|
|
|
static __inline uint32_t isqrt(unsigned long n) |
|
|
{ |
|
|
uint32_t c = 0x8000; |
|
|
uint32_t g = 0x8000; |
|
|
|
|
|
for(;;) { |
|
|
if(g*g > n) |
|
|
g ^= c; |
|
|
c >>= 1; |
|
|
if(c == 0) |
|
|
return g; |
|
|
g |= c; |
|
|
} |
|
|
} |
|
|
|
|
|
static uint32_t Calc_MSE_H(int16_t *DCT_A, int16_t *DCT_B, uint8_t *IMG_A, uint8_t *IMG_B, int stride) |
|
165 |
{ |
{ |
166 |
int x, y, i, j; |
DECLARE_ALIGNED_MATRIX(sums, 1, 8, uint16_t, CACHE_LINE); |
167 |
uint32_t Global_A, Global_B, Sum_A = 0, Sum_B = 0; |
DECLARE_ALIGNED_MATRIX(squares, 1, 8, uint32_t, CACHE_LINE); |
168 |
uint32_t Local[8] = {0, 0, 0, 0, 0, 0, 0, 0}; |
uint32_t i, Global_A, Global_B, Sum_A = 0, Sum_B = 0; |
169 |
uint32_t Local_Square[8] = {0, 0, 0, 0, 0, 0, 0, 0}; |
uint32_t local[8], MASK_A, MASK_B, Mult1 = 64, Mult2 = 64; |
|
uint32_t MASK; |
|
|
uint32_t MSE_H = 0; |
|
170 |
|
|
171 |
/* Step 1: Calculate CSF weighted energy of DCT coefficients */ |
/* Step 1: Calculate CSF weighted energy of DCT coefficients */ |
|
for (y = 0; y < 8; y++) { |
|
|
for (x = 0; x < 8; x++) { |
|
|
uint16_t A = (abs(DCT_A[y*8 + x]) * iMask_Coeff[y*8 + x] + 4096) >> 13; |
|
|
uint16_t B = (abs(DCT_B[y*8 + x]) * iMask_Coeff[y*8 + x] + 4096) >> 13; |
|
172 |
|
|
173 |
Sum_A += ((A*A) >> 3); /* PMADDWD */ |
Sum_A = coeff8_energy(DCT_A); |
174 |
Sum_B += ((B*B) >> 3); |
Sum_B = coeff8_energy(DCT_B); |
|
} |
|
|
} |
|
175 |
|
|
176 |
/* Step 2: Determine local variances compared to entire block variance */ |
/* Step 2: Determine local variances compared to entire block variance */ |
|
for (y = 0; y < 2; y++) { |
|
|
for (x = 0; x < 2; x++) { |
|
|
for (j = 0; j < 4; j++) { |
|
|
for (i = 0; i < 4; i++) { |
|
|
uint8_t A = IMG_A[(y*4+j)*stride + 4*x + i]; |
|
|
uint8_t B = IMG_B[(y*4+j)*stride + 4*x + i]; |
|
177 |
|
|
178 |
Local[y*2 + x] += A; |
Global_A = blocksum8(IMG_A, stride, sums, squares); |
179 |
Local[y*2 + x + 4] += B; |
Global_B = blocksum8(IMG_B, stride, &sums[4], &squares[4]); |
|
Local_Square[y*2 + x] += A*A; |
|
|
Local_Square[y*2 + x + 4] += B*B; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
Global_A = Local[0] + Local[1] + Local[2] + Local[3]; |
|
|
Global_B = Local[4] + Local[5] + Local[6] + Local[7]; |
|
180 |
|
|
181 |
for (i = 0; i < 8; i++) |
for (i = 0; i < 8; i++) |
182 |
Local[i] = (Local_Square[i]<<4) - (Local[i]*Local[i]); /* 16*Var(Di) */ |
local[i] = (squares[i]<<4) - (sums[i]*sums[i]); /* 16*Var(Di) */ |
183 |
|
|
184 |
Local_Square[0] += (Local_Square[1] + Local_Square[2] + Local_Square[3]); |
squares[0] += (squares[1] + squares[2] + squares[3]); |
185 |
Local_Square[4] += (Local_Square[5] + Local_Square[6] + Local_Square[7]); |
squares[4] += (squares[5] + squares[6] + squares[7]); |
186 |
|
|
187 |
Global_A = (Local_Square[0]<<6) - Global_A*Global_A; /* 64*Var(D) */ |
Global_A = (squares[0]<<6) - Global_A*Global_A; /* 64*Var(D) */ |
188 |
Global_B = (Local_Square[4]<<6) - Global_B*Global_B; /* 64*Var(D) */ |
Global_B = (squares[4]<<6) - Global_B*Global_B; /* 64*Var(D) */ |
189 |
|
|
190 |
/* Step 3: Calculate contrast masking threshold */ |
/* Step 3: Calculate contrast masking threshold */ |
|
{ |
|
|
uint32_t MASK_A, MASK_B; |
|
|
uint32_t Mult1 = 64, Mult2 = 64; |
|
191 |
|
|
192 |
if (Global_A) |
if (Global_A) |
193 |
Mult1 = ((Local[0]+Local[1]+Local[2]+Local[3])<<8) / Global_A; |
Mult1 = ((local[0]+local[1]+local[2]+local[3])<<8) / Global_A; |
194 |
|
|
195 |
if (Global_B) |
if (Global_B) |
196 |
Mult2 = ((Local[4]+Local[5]+Local[6]+Local[7])<<8) / Global_B; |
Mult2 = ((local[4]+local[5]+local[6]+local[7])<<8) / Global_B; |
197 |
|
|
198 |
MASK_A = isqrt(2*Sum_A*Mult1) + 16; |
MASK_A = isqrt(2*Sum_A*Mult1) + 16; |
199 |
MASK_B = isqrt(2*Sum_B*Mult2) + 16; |
MASK_B = isqrt(2*Sum_B*Mult2) + 16; |
200 |
|
|
201 |
if (MASK_B > MASK_A) /* MAX(MASK_A, MASK_B) */ |
if (MASK_B > MASK_A) /* MAX(MASK_A, MASK_B) */ |
202 |
MASK = (uint32_t) MASK_B; |
MASK_A = ((MASK_B + 32) >> 6); |
203 |
else |
else |
204 |
MASK = (uint32_t) MASK_A; |
MASK_A = ((MASK_A + 32) >> 6); |
|
} |
|
205 |
|
|
206 |
/* Step 4: Calculate MSE of DCT coeffs reduced by masking effect */ |
/* Step 4: Calculate MSE of DCT coeffs reduced by masking effect */ |
|
for (j = 0; j < 8; j++) { |
|
|
for (i = 0; i < 8; i++) { |
|
|
uint32_t Thresh = (MASK * Inv_iMask_Coeff[j*8 + i] + 128) >> 8; |
|
|
uint32_t u = abs(DCT_A[j*8 + i] - DCT_B[j*8 + i]) << 10; |
|
|
|
|
|
if ((i|j)>0) { |
|
|
if (u < Thresh) |
|
|
u = 0; /* The error is not perceivable */ |
|
|
else |
|
|
u -= Thresh; |
|
|
} |
|
207 |
|
|
208 |
{ |
return sseh8_16bit(DCT_A, DCT_B, (uint16_t) MASK_A); |
|
uint64_t tmp = (u * iCSF_Coeff[j*8 + i] + 512) >> 10; |
|
|
MSE_H += (uint32_t) ((tmp * tmp) >> 12); |
|
|
} |
|
|
} |
|
|
} |
|
|
return MSE_H; |
|
209 |
} |
} |
210 |
|
|
211 |
#endif |
#endif |
237 |
|
|
238 |
emms(); |
emms(); |
239 |
|
|
240 |
/* Calculate MSE_H reduced by contrast masking effect */ |
/* Calculate SSE_H reduced by contrast masking effect */ |
241 |
sse_y += Calc_MSE_H(DCT_A, DCT_B, IMG_A + offset, IMG_B + offset, stride); |
sse_y += calc_SSE_H(DCT_A, DCT_B, IMG_A + offset, IMG_B + offset, stride); |
242 |
} |
} |
243 |
} |
} |
244 |
|
|
264 |
|
|
265 |
emms(); |
emms(); |
266 |
|
|
267 |
/* Calculate MSE_H reduced by contrast masking effect */ |
/* Calculate SSE_H reduced by contrast masking effect */ |
268 |
sse_u += Calc_MSE_H(DCT_A, DCT_B, U_A + offset, U_B + offset, stride_uv); |
sse_u += calc_SSE_H(DCT_A, DCT_B, U_A + offset, U_B + offset, stride_uv); |
269 |
|
|
270 |
emms(); |
emms(); |
271 |
|
|
279 |
|
|
280 |
emms(); |
emms(); |
281 |
|
|
282 |
/* Calculate MSE_H reduced by contrast masking effect */ |
/* Calculate SSE_H reduced by contrast masking effect */ |
283 |
sse_v += Calc_MSE_H(DCT_A, DCT_B, V_A + offset, V_B + offset, stride_uv); |
sse_v += calc_SSE_H(DCT_A, DCT_B, V_A + offset, V_B + offset, stride_uv); |
284 |
} |
} |
285 |
} |
} |
286 |
|
|
287 |
y = (int32_t) ( 4*sse_y / (data->width * data->height)); |
y = (int32_t) ( 4*16*sse_y / (data->width * data->height)); |
288 |
u = (int32_t) (16*sse_u / (data->width * data->height)); |
u = (int32_t) (16*16*sse_u / (data->width * data->height)); |
289 |
v = (int32_t) (16*sse_v / (data->width * data->height)); |
v = (int32_t) (16*16*sse_v / (data->width * data->height)); |
290 |
|
|
291 |
psnrhvsm->mse_sum_y += y; |
psnrhvsm->mse_sum_y += y; |
292 |
psnrhvsm->mse_sum_u += u; |
psnrhvsm->mse_sum_u += u; |