20 |
; * along with this program; if not, write to the Free Software |
; * along with this program; if not, write to the Free Software |
21 |
; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
22 |
; * |
; * |
23 |
; * $Id: sad_mmx.asm,v 1.11.2.2 2003-11-03 15:51:50 edgomez Exp $ |
; * $Id: sad_mmx.asm,v 1.11.2.3 2003-11-13 23:11:24 edgomez Exp $ |
24 |
; * |
; * |
25 |
; ***************************************************************************/ |
; ***************************************************************************/ |
26 |
|
|
269 |
cglobal sad16bi_mmx |
cglobal sad16bi_mmx |
270 |
cglobal sad8bi_mmx |
cglobal sad8bi_mmx |
271 |
cglobal dev16_mmx |
cglobal dev16_mmx |
272 |
|
cglobal sse8_16bit_mmx |
273 |
|
|
274 |
;----------------------------------------------------------------------------- |
;----------------------------------------------------------------------------- |
275 |
; |
; |
621 |
movd eax, mm6 |
movd eax, mm6 |
622 |
|
|
623 |
ret |
ret |
624 |
|
|
625 |
|
;----------------------------------------------------------------------------- |
626 |
|
; |
627 |
|
; uint32_t sse8_16bit_mmx(const int16_t *b1, |
628 |
|
; const int16_t *b2, |
629 |
|
; const uint32_t stride); |
630 |
|
; |
631 |
|
;----------------------------------------------------------------------------- |
632 |
|
|
633 |
|
%macro ROW_SSE_MMX 2 |
634 |
|
movq mm0, [%1] |
635 |
|
movq mm1, [%1+8] |
636 |
|
psubw mm0, [%2] |
637 |
|
psubw mm1, [%2+8] |
638 |
|
pmaddwd mm0, mm0 |
639 |
|
pmaddwd mm1, mm1 |
640 |
|
paddd mm2, mm0 |
641 |
|
paddd mm2, mm1 |
642 |
|
%endmacro |
643 |
|
|
644 |
|
sse8_16bit_mmx: |
645 |
|
push esi |
646 |
|
push edi |
647 |
|
|
648 |
|
;; Load the function params |
649 |
|
mov esi, [esp+8+4] |
650 |
|
mov edi, [esp+8+8] |
651 |
|
mov edx, [esp+8+12] |
652 |
|
|
653 |
|
;; Reset the sse accumulator |
654 |
|
pxor mm2, mm2 |
655 |
|
|
656 |
|
;; Let's go |
657 |
|
ROW_SSE_MMX esi, edi |
658 |
|
lea esi, [esi+edx] |
659 |
|
lea edi, [edi+edx] |
660 |
|
ROW_SSE_MMX esi, edi |
661 |
|
lea esi, [esi+edx] |
662 |
|
lea edi, [edi+edx] |
663 |
|
ROW_SSE_MMX esi, edi |
664 |
|
lea esi, [esi+edx] |
665 |
|
lea edi, [edi+edx] |
666 |
|
ROW_SSE_MMX esi, edi |
667 |
|
lea esi, [esi+edx] |
668 |
|
lea edi, [edi+edx] |
669 |
|
ROW_SSE_MMX esi, edi |
670 |
|
lea esi, [esi+edx] |
671 |
|
lea edi, [edi+edx] |
672 |
|
ROW_SSE_MMX esi, edi |
673 |
|
lea esi, [esi+edx] |
674 |
|
lea edi, [edi+edx] |
675 |
|
ROW_SSE_MMX esi, edi |
676 |
|
lea esi, [esi+edx] |
677 |
|
lea edi, [edi+edx] |
678 |
|
ROW_SSE_MMX esi, edi |
679 |
|
lea esi, [esi+edx] |
680 |
|
lea edi, [edi+edx] |
681 |
|
|
682 |
|
;; Finish adding each dword of the accumulator |
683 |
|
movq mm3, mm2 |
684 |
|
psrlq mm2, 32 |
685 |
|
paddd mm2, mm3 |
686 |
|
movd eax, mm2 |
687 |
|
|
688 |
|
;; All done |
689 |
|
pop edi |
690 |
|
pop esi |
691 |
|
ret |