358 |
ret |
ret |
359 |
|
|
360 |
|
|
361 |
|
;=========================================================================== |
362 |
|
; |
363 |
|
; void transfer_8to16sub2_xmm(int16_t * const dct, |
364 |
|
; uint8_t * const cur, |
365 |
|
; const uint8_t * ref1, |
366 |
|
; const uint8_t * ref2, |
367 |
|
; const uint32_t stride); |
368 |
|
; |
369 |
|
;=========================================================================== |
370 |
|
|
371 |
|
align 16 |
372 |
|
cglobal transfer_8to16sub2_xmm |
373 |
|
transfer_8to16sub2_xmm |
374 |
|
|
375 |
|
push edi |
376 |
|
push esi |
377 |
|
push ebx |
378 |
|
|
379 |
|
mov edi, [esp + 12 + 4] ; edi = &dct |
380 |
|
mov esi, [esp + 12 + 8] ; esi = &cur |
381 |
|
mov ebx, [esp + 12 + 12] ; ebx = &ref1 |
382 |
|
mov edx, [esp + 12 + 16] ; edx = &ref2 |
383 |
|
mov eax, [esp + 12 + 20] ; eax = stride |
384 |
|
|
385 |
|
pxor mm7, mm7 ; mm7 = 0 |
386 |
|
shl eax, 1 ; eax = stride<<1 |
387 |
|
|
388 |
|
; Row processing |
389 |
|
; One row at a time |
390 |
|
movq mm0, [esi + 0] ; mm0 = cur row |
391 |
|
movq mm2, [ebx + 0] ; mm2 = ref1 row |
392 |
|
movq mm3, [edx + 0] ; mm3 = ref2 row |
393 |
|
movq mm1, mm0 ; mm1 = cur row |
394 |
|
|
395 |
|
pavgb mm2, mm3 ; mm2 = (ref1 + ref2 + 1)/2 (== avg) |
396 |
|
punpcklbw mm0, mm7 ; mm0 = cur(3-0) <-> 16bit |
397 |
|
|
398 |
|
movq mm3,mm2 ; mm3 = avg |
399 |
|
punpckhbw mm1, mm7 ; mm1 = cur(7-4) <-> 16bit |
400 |
|
|
401 |
|
punpcklbw mm2, mm7 ; mm2 = avg(3-0) <-> 16bit |
402 |
|
punpckhbw mm3, mm7 ; mm3 = avg(7-4) <-> 16bit |
403 |
|
|
404 |
|
psubw mm0, mm2 ; mm0 = cur(3-0) - avg(3-0) |
405 |
|
psubw mm1, mm3 ; mm1 = cur(7-4) - avg(7-4) |
406 |
|
|
407 |
|
movq [edi + 0], mm0 ; dct(3-0) = mm0 |
408 |
|
movq [edi + 8], mm1 ; dct(7-4) = mm1 |
409 |
|
|
410 |
|
; Increment all pointers |
411 |
|
add edi, eax ; edi = &(next dct row) |
412 |
|
|
413 |
|
; Row processing |
414 |
|
; One row at a time |
415 |
|
movq mm0, [esi + 8] ; mm0 = cur row |
416 |
|
movq mm2, [ebx + 8] ; mm2 = ref1 row |
417 |
|
movq mm3, [edx + 8] ; mm3 = ref2 row |
418 |
|
movq mm1, mm0 ; mm1 = cur row |
419 |
|
|
420 |
|
pavgb mm2, mm3 ; mm2 = (ref1 + ref2 + 1)/2 (== avg) |
421 |
|
punpcklbw mm0, mm7 ; mm0 = cur(3-0) <-> 16bit |
422 |
|
|
423 |
|
movq mm3,mm2 ; mm3 = avg |
424 |
|
punpckhbw mm1, mm7 ; mm1 = cur(7-4) <-> 16bit |
425 |
|
|
426 |
|
punpcklbw mm2, mm7 ; mm2 = avg(3-0) <-> 16bit |
427 |
|
punpckhbw mm3, mm7 ; mm3 = avg(7-4) <-> 16bit |
428 |
|
|
429 |
|
psubw mm0, mm2 ; mm0 = cur(3-0) - avg(3-0) |
430 |
|
psubw mm1, mm3 ; mm1 = cur(7-4) - avg(7-4) |
431 |
|
|
432 |
|
movq [edi + 0], mm0 ; dct(3-0) = mm0 |
433 |
|
movq [edi + 8], mm1 ; dct(7-4) = mm1 |
434 |
|
|
435 |
|
; Increment all pointers |
436 |
|
add edi, eax ; edi = &(next dct row) |
437 |
|
|
438 |
|
; Row processing |
439 |
|
; One row at a time |
440 |
|
movq mm0, [esi + 16] ; mm0 = cur row |
441 |
|
movq mm2, [ebx + 16] ; mm2 = ref1 row |
442 |
|
movq mm3, [edx + 16] ; mm3 = ref2 row |
443 |
|
movq mm1, mm0 ; mm1 = cur row |
444 |
|
|
445 |
|
pavgb mm2, mm3 ; mm2 = (ref1 + ref2 + 1)/2 (== avg) |
446 |
|
punpcklbw mm0, mm7 ; mm0 = cur(3-0) <-> 16bit |
447 |
|
|
448 |
|
movq mm3,mm2 ; mm3 = avg |
449 |
|
punpckhbw mm1, mm7 ; mm1 = cur(7-4) <-> 16bit |
450 |
|
|
451 |
|
punpcklbw mm2, mm7 ; mm2 = avg(3-0) <-> 16bit |
452 |
|
punpckhbw mm3, mm7 ; mm3 = avg(7-4) <-> 16bit |
453 |
|
|
454 |
|
psubw mm0, mm2 ; mm0 = cur(3-0) - avg(3-0) |
455 |
|
psubw mm1, mm3 ; mm1 = cur(7-4) - avg(7-4) |
456 |
|
|
457 |
|
movq [edi + 0], mm0 ; dct(3-0) = mm0 |
458 |
|
movq [edi + 8], mm1 ; dct(7-4) = mm1 |
459 |
|
|
460 |
|
; Increment all pointers |
461 |
|
add edi, eax ; edi = &(next dct row) |
462 |
|
|
463 |
|
; Row processing |
464 |
|
; One row at a time |
465 |
|
movq mm0, [esi + 24] ; mm0 = cur row |
466 |
|
movq mm2, [ebx + 24] ; mm2 = ref1 row |
467 |
|
movq mm3, [edx + 24] ; mm3 = ref2 row |
468 |
|
movq mm1, mm0 ; mm1 = cur row |
469 |
|
|
470 |
|
pavgb mm2, mm3 ; mm2 = (ref1 + ref2 + 1)/2 (== avg) |
471 |
|
punpcklbw mm0, mm7 ; mm0 = cur(3-0) <-> 16bit |
472 |
|
|
473 |
|
movq mm3,mm2 ; mm3 = avg |
474 |
|
punpckhbw mm1, mm7 ; mm1 = cur(7-4) <-> 16bit |
475 |
|
|
476 |
|
punpcklbw mm2, mm7 ; mm2 = avg(3-0) <-> 16bit |
477 |
|
punpckhbw mm3, mm7 ; mm3 = avg(7-4) <-> 16bit |
478 |
|
|
479 |
|
psubw mm0, mm2 ; mm0 = cur(3-0) - avg(3-0) |
480 |
|
psubw mm1, mm3 ; mm1 = cur(7-4) - avg(7-4) |
481 |
|
|
482 |
|
movq [edi + 0], mm0 ; dct(3-0) = mm0 |
483 |
|
movq [edi + 8], mm1 ; dct(7-4) = mm1 |
484 |
|
|
485 |
|
; Increment all pointers |
486 |
|
add edi, eax ; edi = &(next dct row) |
487 |
|
|
488 |
|
; Row processing |
489 |
|
; One row at a time |
490 |
|
movq mm0, [esi + 32] ; mm0 = cur row |
491 |
|
movq mm2, [ebx + 32] ; mm2 = ref1 row |
492 |
|
movq mm3, [edx + 32] ; mm3 = ref2 row |
493 |
|
movq mm1, mm0 ; mm1 = cur row |
494 |
|
|
495 |
|
pavgb mm2, mm3 ; mm2 = (ref1 + ref2 + 1)/2 (== avg) |
496 |
|
punpcklbw mm0, mm7 ; mm0 = cur(3-0) <-> 16bit |
497 |
|
|
498 |
|
movq mm3,mm2 ; mm3 = avg |
499 |
|
punpckhbw mm1, mm7 ; mm1 = cur(7-4) <-> 16bit |
500 |
|
|
501 |
|
punpcklbw mm2, mm7 ; mm2 = avg(3-0) <-> 16bit |
502 |
|
punpckhbw mm3, mm7 ; mm3 = avg(7-4) <-> 16bit |
503 |
|
|
504 |
|
psubw mm0, mm2 ; mm0 = cur(3-0) - avg(3-0) |
505 |
|
psubw mm1, mm3 ; mm1 = cur(7-4) - avg(7-4) |
506 |
|
|
507 |
|
movq [edi + 0], mm0 ; dct(3-0) = mm0 |
508 |
|
movq [edi + 8], mm1 ; dct(7-4) = mm1 |
509 |
|
|
510 |
|
; Increment all pointers |
511 |
|
add edi, eax ; edi = &(next dct row) |
512 |
|
|
513 |
|
; Row processing |
514 |
|
; One row at a time |
515 |
|
movq mm0, [esi + 40] ; mm0 = cur row |
516 |
|
movq mm2, [ebx + 40] ; mm2 = ref1 row |
517 |
|
movq mm3, [edx + 40] ; mm3 = ref2 row |
518 |
|
movq mm1, mm0 ; mm1 = cur row |
519 |
|
|
520 |
|
pavgb mm2, mm3 ; mm2 = (ref1 + ref2 + 1)/2 (== avg) |
521 |
|
punpcklbw mm0, mm7 ; mm0 = cur(3-0) <-> 16bit |
522 |
|
|
523 |
|
movq mm3,mm2 ; mm3 = avg |
524 |
|
punpckhbw mm1, mm7 ; mm1 = cur(7-4) <-> 16bit |
525 |
|
|
526 |
|
punpcklbw mm2, mm7 ; mm2 = avg(3-0) <-> 16bit |
527 |
|
punpckhbw mm3, mm7 ; mm3 = avg(7-4) <-> 16bit |
528 |
|
|
529 |
|
psubw mm0, mm2 ; mm0 = cur(3-0) - avg(3-0) |
530 |
|
psubw mm1, mm3 ; mm1 = cur(7-4) - avg(7-4) |
531 |
|
|
532 |
|
movq [edi + 0], mm0 ; dct(3-0) = mm0 |
533 |
|
movq [edi + 8], mm1 ; dct(7-4) = mm1 |
534 |
|
|
535 |
|
; Increment all pointers |
536 |
|
add edi, eax ; edi = &(next dct row) |
537 |
|
|
538 |
|
; Row processing |
539 |
|
; One row at a time |
540 |
|
movq mm0, [esi + 48] ; mm0 = cur row |
541 |
|
movq mm2, [ebx + 48] ; mm2 = ref1 row |
542 |
|
movq mm3, [edx + 48] ; mm3 = ref2 row |
543 |
|
movq mm1, mm0 ; mm1 = cur row |
544 |
|
|
545 |
|
pavgb mm2, mm3 ; mm2 = (ref1 + ref2 + 1)/2 (== avg) |
546 |
|
punpcklbw mm0, mm7 ; mm0 = cur(3-0) <-> 16bit |
547 |
|
|
548 |
|
movq mm3,mm2 ; mm3 = avg |
549 |
|
punpckhbw mm1, mm7 ; mm1 = cur(7-4) <-> 16bit |
550 |
|
|
551 |
|
punpcklbw mm2, mm7 ; mm2 = avg(3-0) <-> 16bit |
552 |
|
punpckhbw mm3, mm7 ; mm3 = avg(7-4) <-> 16bit |
553 |
|
|
554 |
|
psubw mm0, mm2 ; mm0 = cur(3-0) - avg(3-0) |
555 |
|
psubw mm1, mm3 ; mm1 = cur(7-4) - avg(7-4) |
556 |
|
|
557 |
|
movq [edi + 0], mm0 ; dct(3-0) = mm0 |
558 |
|
movq [edi + 8], mm1 ; dct(7-4) = mm1 |
559 |
|
|
560 |
|
; Increment all pointers |
561 |
|
add edi, eax ; edi = &(next dct row) |
562 |
|
|
563 |
|
; Row processing |
564 |
|
; One row at a time |
565 |
|
movq mm0, [esi + 56] ; mm0 = cur row |
566 |
|
movq mm2, [ebx + 56] ; mm2 = ref1 row |
567 |
|
movq mm3, [edx + 56] ; mm3 = ref2 row |
568 |
|
movq mm1, mm0 ; mm1 = cur row |
569 |
|
|
570 |
|
pavgb mm2, mm3 ; mm2 = (ref1 + ref2 + 1)/2 (== avg) |
571 |
|
punpcklbw mm0, mm7 ; mm0 = cur(3-0) <-> 16bit |
572 |
|
|
573 |
|
movq mm3,mm2 ; mm3 = avg |
574 |
|
punpckhbw mm1, mm7 ; mm1 = cur(7-4) <-> 16bit |
575 |
|
|
576 |
|
punpcklbw mm2, mm7 ; mm2 = avg(3-0) <-> 16bit |
577 |
|
punpckhbw mm3, mm7 ; mm3 = avg(7-4) <-> 16bit |
578 |
|
|
579 |
|
psubw mm0, mm2 ; mm0 = cur(3-0) - avg(3-0) |
580 |
|
psubw mm1, mm3 ; mm1 = cur(7-4) - avg(7-4) |
581 |
|
|
582 |
|
movq [edi + 0], mm0 ; dct(3-0) = mm0 |
583 |
|
movq [edi + 8], mm1 ; dct(7-4) = mm1 |
584 |
|
|
585 |
|
; Exit |
586 |
|
|
587 |
|
pop ebx |
588 |
|
pop esi |
589 |
|
pop edi |
590 |
|
|
591 |
|
ret |
592 |
|
|
593 |
;=========================================================================== |
;=========================================================================== |
594 |
; |
; |