251 |
int k,sum; |
int k,sum; |
252 |
int x = i; |
int x = i; |
253 |
int y = j; |
int y = j; |
254 |
|
uint32_t quarterpel = pParam->m_quarterpel; |
255 |
|
|
256 |
switch (mb->mode) { |
switch (mb->mode) { |
257 |
case MODE_FORWARD: |
case MODE_FORWARD: |
258 |
|
|
259 |
|
if (quarterpel) { |
260 |
|
dx = mb->qmvs[0].x; |
261 |
|
dy = mb->qmvs[0].y; |
262 |
|
} else { |
263 |
dx = mb->mvs[0].x; |
dx = mb->mvs[0].x; |
264 |
dy = mb->mvs[0].y; |
dy = mb->mvs[0].y; |
265 |
|
} |
266 |
|
|
267 |
transfer_8to16sub(&dct_codes[0 * 64], |
compensate16x16_interpolate(&dct_codes[0 * 64], cur->y, f_ref->y, f_refh->y, |
268 |
cur->y + (j * 16) * edged_width + (i * 16), |
f_refv->y, f_refhv->y, 16 * i, 16 * j, dx, |
269 |
get_ref(f_ref->y, f_refh->y, f_refv->y, f_refhv->y, |
dy, edged_width, quarterpel, 0); |
270 |
i * 16, j * 16, 1, dx, dy, edged_width), |
|
271 |
edged_width); |
if (quarterpel) { |
272 |
|
dx /= 2; |
273 |
transfer_8to16sub(&dct_codes[1 * 64], |
dy /= 2; |
274 |
cur->y + (j * 16) * edged_width + (i * 16 + 8), |
} |
|
get_ref(f_ref->y, f_refh->y, f_refv->y, f_refhv->y, |
|
|
i * 16 + 8, j * 16, 1, dx, dy, edged_width), |
|
|
edged_width); |
|
|
|
|
|
transfer_8to16sub(&dct_codes[2 * 64], |
|
|
cur->y + (j * 16 + 8) * edged_width + (i * 16), |
|
|
get_ref(f_ref->y, f_refh->y, f_refv->y, f_refhv->y, |
|
|
i * 16, j * 16 + 8, 1, dx, dy, edged_width), |
|
|
edged_width); |
|
|
|
|
|
transfer_8to16sub(&dct_codes[3 * 64], |
|
|
cur->y + (j * 16 + 8) * edged_width + (i * 16 + 8), |
|
|
get_ref(f_ref->y, f_refh->y, f_refv->y, f_refhv->y, |
|
|
i * 16 + 8, j * 16 + 8, 1, dx, dy, edged_width), |
|
|
edged_width); |
|
|
|
|
275 |
|
|
276 |
dx = (dx & 3) ? (dx >> 1) | 1 : dx / 2; |
dx = (dx >> 1) + roundtab_79[dx & 0x3]; |
277 |
dy = (dy & 3) ? (dy >> 1) | 1 : dy / 2; |
dy = (dy >> 1) + roundtab_79[dy & 0x3]; |
278 |
|
|
279 |
/* uv-block-based compensation */ |
/* uv-block-based compensation */ |
280 |
transfer_8to16sub(&dct_codes[4 * 64], |
transfer_8to16sub(&dct_codes[4 * 64], |
294 |
break; |
break; |
295 |
|
|
296 |
case MODE_BACKWARD: |
case MODE_BACKWARD: |
297 |
|
if (quarterpel) { |
298 |
|
b_dx = mb->b_qmvs[0].x; |
299 |
|
b_dy = mb->b_qmvs[0].y; |
300 |
|
} else { |
301 |
b_dx = mb->b_mvs[0].x; |
b_dx = mb->b_mvs[0].x; |
302 |
b_dy = mb->b_mvs[0].y; |
b_dy = mb->b_mvs[0].y; |
303 |
|
} |
304 |
|
|
305 |
transfer_8to16sub(&dct_codes[0 * 64], |
compensate16x16_interpolate(&dct_codes[0 * 64], cur->y, b_ref->y, b_refh->y, |
306 |
cur->y + (j * 16) * edged_width + (i * 16), |
b_refv->y, b_refhv->y, 16 * i, 16 * j, b_dx, |
307 |
get_ref(b_ref->y, b_refh->y, b_refv->y, b_refhv->y, |
b_dy, edged_width, quarterpel, 0); |
308 |
i * 16, j * 16, 1, b_dx, b_dy, |
|
309 |
edged_width), edged_width); |
if (quarterpel) { |
310 |
|
b_dx /= 2; |
311 |
transfer_8to16sub(&dct_codes[1 * 64], |
b_dy /= 2; |
312 |
cur->y + (j * 16) * edged_width + (i * 16 + 8), |
} |
|
get_ref(b_ref->y, b_refh->y, b_refv->y, b_refhv->y, |
|
|
i * 16 + 8, j * 16, 1, b_dx, b_dy, |
|
|
edged_width), edged_width); |
|
|
|
|
|
transfer_8to16sub(&dct_codes[2 * 64], |
|
|
cur->y + (j * 16 + 8) * edged_width + (i * 16), |
|
|
get_ref(b_ref->y, b_refh->y, b_refv->y, b_refhv->y, |
|
|
i * 16, j * 16 + 8, 1, b_dx, b_dy, |
|
|
edged_width), edged_width); |
|
313 |
|
|
314 |
transfer_8to16sub(&dct_codes[3 * 64], |
b_dx = (b_dx >> 1) + roundtab_79[b_dx & 0x3]; |
315 |
cur->y + (j * 16 + 8) * edged_width + (i * 16 + 8), |
b_dy = (b_dy >> 1) + roundtab_79[b_dy & 0x3]; |
|
get_ref(b_ref->y, b_refh->y, b_refv->y, b_refhv->y, |
|
|
i * 16 + 8, j * 16 + 8, 1, b_dx, b_dy, |
|
|
edged_width), edged_width); |
|
316 |
|
|
|
b_dx = (b_dx & 3) ? (b_dx >> 1) | 1 : b_dx / 2; |
|
|
b_dy = (b_dy & 3) ? (b_dy >> 1) | 1 : b_dy / 2; |
|
317 |
|
|
318 |
/* uv-block-based compensation */ |
/* uv-block-based compensation */ |
319 |
transfer_8to16sub(&dct_codes[4 * 64], |
transfer_8to16sub(&dct_codes[4 * 64], |
332 |
|
|
333 |
break; |
break; |
334 |
|
|
|
|
|
335 |
case MODE_INTERPOLATE: /* _could_ use DIRECT, but would be overkill (no 4MV there) */ |
case MODE_INTERPOLATE: /* _could_ use DIRECT, but would be overkill (no 4MV there) */ |
336 |
case MODE_DIRECT_NO4V: |
case MODE_DIRECT_NO4V: |
337 |
|
|
338 |
|
if (quarterpel) { |
339 |
|
dx = mb->qmvs[0].x; |
340 |
|
dy = mb->qmvs[0].y; |
341 |
|
b_dx = mb->b_qmvs[0].x; |
342 |
|
b_dy = mb->b_qmvs[0].y; |
343 |
|
|
344 |
|
interpolate16x16_quarterpel((uint8_t *) f_refv->y, (uint8_t *) f_ref->y, (uint8_t *) f_refh->y, |
345 |
|
(uint8_t *) f_refh->y + 64, (uint8_t *) f_refhv->y, 16*i, 16*j, dx, dy, edged_width, 0); |
346 |
|
interpolate16x16_quarterpel((uint8_t *) b_refv->y, (uint8_t *) b_ref->y, (uint8_t *) b_refh->y, |
347 |
|
(uint8_t *) b_refh->y + 64, (uint8_t *) b_refhv->y, 16*i, 16*j, b_dx, b_dy, edged_width, 0); |
348 |
|
|
349 |
|
for (k = 0; k < 4; k++) { |
350 |
|
transfer_8to16sub2(&dct_codes[k * 64], |
351 |
|
cur->y + (i * 16+(k&1)*8) + (j * 16+((k>>1)*8)) * edged_width, |
352 |
|
f_refv->y + (i * 16+(k&1)*8) + (j * 16+((k>>1)*8)) * edged_width, |
353 |
|
b_refv->y + (i * 16+(k&1)*8) + (j * 16+((k>>1)*8)) * edged_width, |
354 |
|
edged_width); |
355 |
|
} |
356 |
|
b_dx /= 2; |
357 |
|
b_dy /= 2; |
358 |
|
dx /= 2; |
359 |
|
dy /= 2; |
360 |
|
|
361 |
|
} else { |
362 |
dx = mb->mvs[0].x; |
dx = mb->mvs[0].x; |
363 |
dy = mb->mvs[0].y; |
dy = mb->mvs[0].y; |
|
|
|
364 |
b_dx = mb->b_mvs[0].x; |
b_dx = mb->b_mvs[0].x; |
365 |
b_dy = mb->b_mvs[0].y; |
b_dy = mb->b_mvs[0].y; |
366 |
|
|
376 |
edged_width); |
edged_width); |
377 |
} |
} |
378 |
|
|
379 |
dx = (dx & 3) ? (dx >> 1) | 1 : dx / 2; |
} |
380 |
dy = (dy & 3) ? (dy >> 1) | 1 : dy / 2; |
|
381 |
|
|
382 |
|
dx = (dx >> 1) + roundtab_79[dx & 0x3]; |
383 |
|
dy = (dy >> 1) + roundtab_79[dy & 0x3]; |
384 |
|
|
385 |
b_dx = (b_dx & 3) ? (b_dx >> 1) | 1 : b_dx / 2; |
b_dx = (b_dx >> 1) + roundtab_79[b_dx & 0x3]; |
386 |
b_dy = (b_dy & 3) ? (b_dy >> 1) | 1 : b_dy / 2; |
b_dy = (b_dy >> 1) + roundtab_79[b_dy & 0x3]; |
387 |
|
|
388 |
transfer_8to16sub2(&dct_codes[4 * 64], |
transfer_8to16sub2(&dct_codes[4 * 64], |
389 |
cur->u + (y * 8) * edged_width / 2 + (x * 8), |
cur->u + (y * 8) * edged_width / 2 + (x * 8), |
404 |
break; |
break; |
405 |
|
|
406 |
case MODE_DIRECT: |
case MODE_DIRECT: |
407 |
|
if (quarterpel) { |
408 |
|
for (k=0;k<4;k++) { |
409 |
|
|
410 |
for (k=0;k<4;k++) |
dx = mb->qmvs[k].x; |
411 |
{ |
dy = mb->qmvs[k].y; |
412 |
|
b_dx = mb->b_qmvs[k].x; |
413 |
|
b_dy = mb->b_qmvs[k].y; |
414 |
|
|
415 |
|
interpolate8x8_quarterpel((uint8_t *) f_refv->y, |
416 |
|
(uint8_t *) f_ref->y, |
417 |
|
(uint8_t *) f_refh->y, |
418 |
|
(uint8_t *) f_refh->y + 64, |
419 |
|
(uint8_t *) f_refhv->y, |
420 |
|
16*i + (k&1)*8, 16*j + (k>>1)*8, dx, dy, edged_width, 0); |
421 |
|
interpolate8x8_quarterpel((uint8_t *) b_refv->y, |
422 |
|
(uint8_t *) b_ref->y, |
423 |
|
(uint8_t *) b_refh->y, |
424 |
|
(uint8_t *) b_refh->y + 64, |
425 |
|
(uint8_t *) b_refhv->y, |
426 |
|
16*i + (k&1)*8, 16*j + (k>>1)*8, b_dx, b_dy, edged_width, 0); |
427 |
|
|
428 |
|
|
429 |
|
transfer_8to16sub2(&dct_codes[k * 64], |
430 |
|
cur->y + (i * 16+(k&1)*8) + (j * 16+((k>>1)*8)) * edged_width, |
431 |
|
f_refv->y + (i * 16+(k&1)*8) + (j * 16+((k>>1)*8)) * edged_width, |
432 |
|
b_refv->y + (i * 16+(k&1)*8) + (j * 16+((k>>1)*8)) * edged_width, |
433 |
|
edged_width); |
434 |
|
} |
435 |
|
sum = mb->qmvs[0].y/2 + mb->qmvs[1].y/2 + mb->qmvs[2].y/2 + mb->qmvs[3].y/2; |
436 |
|
dy = (sum >> 3) + roundtab_76[sum & 0xf]; |
437 |
|
sum = mb->qmvs[0].x/2 + mb->qmvs[1].x/2 + mb->qmvs[2].x/2 + mb->qmvs[3].x/2; |
438 |
|
dx = (sum >> 3) + roundtab_76[sum & 0xf]; |
439 |
|
|
440 |
|
sum = mb->b_qmvs[0].y/2 + mb->b_qmvs[1].y/2 + mb->b_qmvs[2].y/2 + mb->b_qmvs[3].y/2; |
441 |
|
b_dy = (sum >> 3) + roundtab_76[sum & 0xf]; |
442 |
|
sum = mb->b_qmvs[0].x/2 + mb->b_qmvs[1].x/2 + mb->b_qmvs[2].x/2 + mb->b_qmvs[3].x/2; |
443 |
|
b_dx = (sum >> 3) + roundtab_76[sum & 0xf]; |
444 |
|
|
445 |
|
} else { |
446 |
|
for (k=0;k<4;k++) { |
447 |
dx = mb->mvs[k].x; |
dx = mb->mvs[k].x; |
448 |
dy = mb->mvs[k].y; |
dy = mb->mvs[k].y; |
449 |
|
|
450 |
b_dx = mb->b_mvs[k].x; |
b_dx = mb->b_mvs[k].x; |
451 |
b_dy = mb->b_mvs[k].y; |
b_dy = mb->b_mvs[k].y; |
452 |
|
|
|
// fprintf(stderr,"Direct Vector %d -- %d:%d %d:%d\n",k,dx,dy,b_dx,b_dy); |
|
|
|
|
453 |
transfer_8to16sub2(&dct_codes[k * 64], |
transfer_8to16sub2(&dct_codes[k * 64], |
454 |
cur->y + (i*16 + (k&1)*8) + (j*16 + (k>>1)*8 ) * edged_width, |
cur->y + (i*16 + (k&1)*8) + (j*16 + (k>>1)*8 ) * edged_width, |
455 |
get_ref(f_ref->y, f_refh->y, f_refv->y, f_refhv->y, |
get_ref(f_ref->y, f_refh->y, f_refv->y, f_refhv->y, |
467 |
sum = mb->mvs[0].y + mb->mvs[1].y + mb->mvs[2].y + mb->mvs[3].y; |
sum = mb->mvs[0].y + mb->mvs[1].y + mb->mvs[2].y + mb->mvs[3].y; |
468 |
dy = (sum == 0 ? 0 : SIGN(sum) * (roundtab[ABS(sum) % 16] + (ABS(sum) / 16) * 2)); |
dy = (sum == 0 ? 0 : SIGN(sum) * (roundtab[ABS(sum) % 16] + (ABS(sum) / 16) * 2)); |
469 |
|
|
|
|
|
470 |
sum = mb->b_mvs[0].x + mb->b_mvs[1].x + mb->b_mvs[2].x + mb->b_mvs[3].x; |
sum = mb->b_mvs[0].x + mb->b_mvs[1].x + mb->b_mvs[2].x + mb->b_mvs[3].x; |
471 |
b_dx = (sum == 0 ? 0 : SIGN(sum) * (roundtab[ABS(sum) % 16] + (ABS(sum) / 16) * 2)); |
b_dx = (sum == 0 ? 0 : SIGN(sum) * (roundtab[ABS(sum) % 16] + (ABS(sum) / 16) * 2)); |
472 |
|
|
473 |
sum = mb->b_mvs[0].y + mb->b_mvs[1].y + mb->b_mvs[2].y + mb->b_mvs[3].y; |
sum = mb->b_mvs[0].y + mb->b_mvs[1].y + mb->b_mvs[2].y + mb->b_mvs[3].y; |
474 |
b_dy = (sum == 0 ? 0 : SIGN(sum) * (roundtab[ABS(sum) % 16] + (ABS(sum) / 16) * 2)); |
b_dy = (sum == 0 ? 0 : SIGN(sum) * (roundtab[ABS(sum) % 16] + (ABS(sum) / 16) * 2)); |
475 |
|
|
476 |
/* // for QPel don't forget to always do |
} |
|
|
|
|
if (quarterpel) |
|
|
sum /= 2; |
|
|
*/ |
|
477 |
transfer_8to16sub2(&dct_codes[4 * 64], |
transfer_8to16sub2(&dct_codes[4 * 64], |
478 |
cur->u + (y * 8) * edged_width / 2 + (x * 8), |
cur->u + (y * 8) * edged_width / 2 + (x * 8), |
479 |
interpolate8x8_switch2(f_refv->u, b_ref->u, 8 * i, 8 * j, |
interpolate8x8_switch2(f_refv->u, b_ref->u, 8 * i, 8 * j, |