[svn] / trunk / xvidcore / src / motion / x86_asm / sad_mmx.asm Repository:
ViewVC logotype

Diff of /trunk/xvidcore/src/motion/x86_asm/sad_mmx.asm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1423, Mon Apr 12 14:05:08 2004 UTC revision 1424, Mon Apr 12 15:49:56 2004 UTC
# Line 20  Line 20 
20  ; *  along with this program; if not, write to the Free Software  ; *  along with this program; if not, write to the Free Software
21  ; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA  ; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
22  ; *  ; *
23  ; * $Id: sad_mmx.asm,v 1.12 2004-03-22 22:36:24 edgomez Exp $  ; * $Id: sad_mmx.asm,v 1.13 2004-04-12 15:49:56 edgomez Exp $
24  ; *  ; *
25  ; ***************************************************************************/  ; ***************************************************************************/
26    
# Line 270  Line 270 
270  cglobal sad8bi_mmx  cglobal sad8bi_mmx
271  cglobal dev16_mmx  cglobal dev16_mmx
272  cglobal sse8_16bit_mmx  cglobal sse8_16bit_mmx
273    cglobal sse8_8bit_mmx
274    
275  ;-----------------------------------------------------------------------------  ;-----------------------------------------------------------------------------
276  ;  ;
# Line 630  Line 631 
631  ;  ;
632  ;-----------------------------------------------------------------------------  ;-----------------------------------------------------------------------------
633    
634  %macro ROW_SSE_MMX 2  %macro ROW_SSE_16bit_MMX 2
635    movq mm0, [%1]    movq mm0, [%1]
636    movq mm1, [%1+8]    movq mm1, [%1+8]
637    psubw mm0, [%2]    psubw mm0, [%2]
# Line 654  Line 655 
655    pxor mm2, mm2    pxor mm2, mm2
656    
657    ;; Let's go    ;; Let's go
658    ROW_SSE_MMX esi, edi  %rep 8
659    lea esi, [esi+edx]    ROW_SSE_16bit_MMX esi, edi
   lea edi, [edi+edx]  
   ROW_SSE_MMX esi, edi  
   lea esi, [esi+edx]  
   lea edi, [edi+edx]  
   ROW_SSE_MMX esi, edi  
   lea esi, [esi+edx]  
   lea edi, [edi+edx]  
   ROW_SSE_MMX esi, edi  
   lea esi, [esi+edx]  
   lea edi, [edi+edx]  
   ROW_SSE_MMX esi, edi  
   lea esi, [esi+edx]  
   lea edi, [edi+edx]  
   ROW_SSE_MMX esi, edi  
   lea esi, [esi+edx]  
   lea edi, [edi+edx]  
   ROW_SSE_MMX esi, edi  
   lea esi, [esi+edx]  
   lea edi, [edi+edx]  
   ROW_SSE_MMX esi, edi  
660    lea esi, [esi+edx]    lea esi, [esi+edx]
661    lea edi, [edi+edx]    lea edi, [edi+edx]
662    %endrep
663    
664    ;; Finish adding each dword of the accumulator    ;; Finish adding each dword of the accumulator
665    movq mm3, mm2    movq mm3, mm2
# Line 689  Line 671 
671    pop edi    pop edi
672    pop esi    pop esi
673    ret    ret
674    
675    ;-----------------------------------------------------------------------------
676    ;
677    ; uint32_t sse8_8bit_mmx(const int8_t *b1,
678    ;                        const int8_t *b2,
679    ;                        const uint32_t stride);
680    ;
681    ;-----------------------------------------------------------------------------
682    
683    %macro ROW_SSE_8bit_MMX 2
684      movq mm0, [%1] ; load a row
685      movq mm2, [%2] ; load a row
686    
687      movq mm1, mm0  ; copy row
688      movq mm3, mm2  ; copy row
689    
690      punpcklbw mm0, mm7 ; turn the 4low elements into 16bit
691      punpckhbw mm1, mm7 ; turn the 4high elements into 16bit
692    
693      punpcklbw mm2, mm7 ; turn the 4low elements into 16bit
694      punpckhbw mm3, mm7 ; turn the 4high elements into 16bit
695    
696      psubw mm0, mm2 ; low  part of src-dst
697      psubw mm1, mm3 ; high part of src-dst
698    
699      pmaddwd mm0, mm0 ; compute the square sum
700      pmaddwd mm1, mm1 ; compute the square sum
701    
702      paddd mm6, mm0 ; add to the accumulator
703      paddd mm6, mm1 ; add to the accumulator
704    %endmacro
705    
706    sse8_8bit_mmx:
707      push esi
708      push edi
709    
710      ;; Load the function params
711      mov esi, [esp+8+4]
712      mov edi, [esp+8+8]
713      mov edx, [esp+8+12]
714    
715      ;; Reset the sse accumulator
716      pxor mm6, mm6
717    
718      ;; Used to interleave 8bit data with 0x00 values
719      pxor mm7, mm7
720    
721      ;; Let's go
722    %rep 8
723      ROW_SSE_8bit_MMX esi, edi
724      lea esi, [esi+edx]
725      lea edi, [edi+edx]
726    %endrep
727    
728      ;; Finish adding each dword of the accumulator
729      movq mm7, mm6
730      psrlq mm6, 32
731      paddd mm6, mm7
732      movd eax, mm6
733    
734      ;; All done
735      pop edi
736      pop esi
737      ret

Legend:
Removed from v.1423  
changed lines
  Added in v.1424

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4