[svn] / trunk / xvidcore / src / dct / x86_asm / fdct_sse2_skal.asm Repository:
ViewVC logotype

Diff of /trunk/xvidcore/src/dct/x86_asm/fdct_sse2_skal.asm

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1627, Mon Aug 1 10:53:46 2005 UTC revision 1848, Fri Dec 5 10:33:47 2008 UTC
# Line 19  Line 19 
19  ; *  along with this program; if not, write to the Free Software  ; *  along with this program; if not, write to the Free Software
20  ; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA  ; *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
21  ; *  ; *
22  ; * $Id: fdct_sse2_skal.asm,v 1.7 2005-08-01 10:53:46 Isibaar Exp $  ; * $Id: fdct_sse2_skal.asm,v 1.14 2008-12-05 10:33:47 Isibaar Exp $
23  ; *  ; *
24  ; ***************************************************************************/  ; ***************************************************************************/
25    
26  BITS 32  %include "nasm.inc"
   
 %macro cglobal 1  
         %ifdef PREFIX  
                 %ifdef MARK_FUNCS  
                         global _%1:function %1.endfunc-%1  
                         %define %1 _%1:function %1.endfunc-%1  
                 %else  
                         global _%1  
                         %define %1 _%1  
                 %endif  
         %else  
                 %ifdef MARK_FUNCS  
                         global %1:function %1.endfunc-%1  
                 %else  
                         global %1  
                 %endif  
         %endif  
 %endmacro  
27    
28  ;-----------------------------------------------------------------------------  ;-----------------------------------------------------------------------------
29  ;  ;
# Line 88  Line 70 
70  ; Read only data  ; Read only data
71  ;=============================================================================  ;=============================================================================
72    
73  %ifdef FORMAT_COFF  DATA
 SECTION .rodata  
 %else  
 SECTION .rodata align=16  
 %endif  
74    
75  ALIGN 16  ALIGN SECTION_ALIGN
76  tan1:    times 8 dw 0x32ec    ; tan( pi/16)  tan1:    times 8 dw 0x32ec    ; tan( pi/16)
77  tan2:    times 8 dw 0x6a0a    ; tan(2pi/16)  (=sqrt(2)-1)  tan2:    times 8 dw 0x6a0a    ; tan(2pi/16)  (=sqrt(2)-1)
78  tan3:    times 8 dw 0xab0e    ; tan(3pi/16)-1  tan3:    times 8 dw 0xab0e    ; tan(3pi/16)-1
# Line 104  Line 82 
82  ; Inverse DCT tables  ; Inverse DCT tables
83  ;-----------------------------------------------------------------------------  ;-----------------------------------------------------------------------------
84    
85  ALIGN 16  ALIGN SECTION_ALIGN
86  iTab1:  iTab1:
87    dw 0x4000, 0x539f, 0x4000, 0x22a3    dw 0x4000, 0x539f, 0x4000, 0x22a3
88    dw 0x4000, 0xdd5d, 0x4000, 0xac61    dw 0x4000, 0xdd5d, 0x4000, 0xac61
# Line 145  Line 123 
123    dw 0x3b21, 0x14c3, 0x979e, 0xc4df    dw 0x3b21, 0x14c3, 0x979e, 0xc4df
124    dw 0x14c3, 0x587e, 0x587e, 0x979e    dw 0x14c3, 0x587e, 0x587e, 0x979e
125    
126  ALIGN 16  ALIGN SECTION_ALIGN
127  Walken_Idct_Rounders:  Walken_Idct_Rounders:
128    dd  65536, 65536, 65536, 65536    dd  65536, 65536, 65536, 65536
129    dd   3597,  3597,  3597,  3597    dd   3597,  3597,  3597,  3597
# Line 165  Line 143 
143  ; Forward DCT tables  ; Forward DCT tables
144  ;-----------------------------------------------------------------------------  ;-----------------------------------------------------------------------------
145    
146  ALIGN 16  ALIGN SECTION_ALIGN
147  fTab1:  fTab1:
148    dw 0x4000, 0x4000, 0x58c5, 0x4b42,    dw 0x4000, 0x4000, 0x58c5, 0x4b42,
149    dw 0xdd5d, 0xac61, 0xa73b, 0xcdb7,    dw 0xdd5d, 0xac61, 0xa73b, 0xcdb7,
# Line 207  Line 185 
185    dw 0x28ba, 0x9dac, 0x14c3, 0xc4df    dw 0x28ba, 0x9dac, 0x14c3, 0xc4df
186    
187    
188  ALIGN 16  ALIGN SECTION_ALIGN
189  Fdct_Rnd0: dw  6,8,8,8, 6,8,8,8  Fdct_Rnd0: dw  6,8,8,8, 6,8,8,8
190  Fdct_Rnd1: dw  8,8,8,8, 8,8,8,8  Fdct_Rnd1: dw  8,8,8,8, 8,8,8,8
191  Fdct_Rnd2: dw 10,8,8,8, 8,8,8,8  Fdct_Rnd2: dw 10,8,8,8, 8,8,8,8
# Line 217  Line 195 
195  ; Code  ; Code
196  ;=============================================================================  ;=============================================================================
197    
198  SECTION .text  TEXT
199    
200  cglobal idct_sse2_skal  cglobal idct_sse2_skal
201  cglobal fdct_sse2_skal  cglobal fdct_sse2_skal
# Line 228  Line 206 
206    
207  %macro iMTX_MULT 4   ; %1=src, %2 = Table to use, %3=rounder, %4=Shift  %macro iMTX_MULT 4   ; %1=src, %2 = Table to use, %3=rounder, %4=Shift
208    
209    movdqa  xmm0, [ecx+%1*16]     ; xmm0 = [01234567]    movdqa  xmm0, [_ECX+%1*16]     ; xmm0 = [01234567]
210    
211    pshuflw xmm0, xmm0, 11011000b ; [02134567]  ; these two shufflings could be    pshuflw xmm0, xmm0, 11011000b ; [02134567]  ; these two shufflings could be
212    pshufhw xmm0, xmm0, 11011000b ; [02134657]  ; integrated in zig-zag orders    pshufhw xmm0, xmm0, 11011000b ; [02134657]  ; integrated in zig-zag orders
# Line 257  Line 235 
235    
236    pshufhw xmm6, xmm6, 00011011b ; [01234567]    pshufhw xmm6, xmm6, 00011011b ; [01234567]
237    
238    movdqa  [ecx+%1*16], xmm6    movdqa  [_ECX+%1*16], xmm6
239    
240  %endmacro  %endmacro
241    
# Line 387  Line 365 
365  ;-----------------------------------------------------------------------------  ;-----------------------------------------------------------------------------
366    
367  %macro TEST_ROW 2     ; %1:src,  %2:label x8  %macro TEST_ROW 2     ; %1:src,  %2:label x8
368    mov eax, [%1   ]    mov _EAX, [%1   ]
369    mov edx, [%1+ 8]    mov _EDX, [%1+ 8]
370    or  eax, [%1+ 4]    or  _EAX, [%1+ 4]
371    or  edx, [%1+12]    or  _EDX, [%1+12]
372    or  eax, edx    or  _EAX, _EDX
373    jz near %2    jz near %2
374  %endmacro  %endmacro
375    
# Line 400  Line 378 
378  ;-----------------------------------------------------------------------------  ;-----------------------------------------------------------------------------
379  ; IEEE1180 and Walken compatible version  ; IEEE1180 and Walken compatible version
380    
381  ALIGN 16  ALIGN SECTION_ALIGN
382  idct_sse2_skal:  idct_sse2_skal:
383    
384    mov ecx, [esp+ 4]  ; Src    PUSH_XMM6_XMM7
385    
386    TEST_ROW ecx, .Row0_Round    mov _ECX, prm1  ; Src
387    
388      TEST_ROW _ECX, .Row0_Round
389    iMTX_MULT  0, iTab1, Walken_Idct_Rounders + 16*0, 11    iMTX_MULT  0, iTab1, Walken_Idct_Rounders + 16*0, 11
390    jmp .Row1    jmp .Row1
391  .Row0_Round  .Row0_Round:
392    movdqa xmm0, [Walken_Idct_Rounders + 16*8 + 8*0]    movdqa xmm0, [Walken_Idct_Rounders + 16*8 + 8*0]
393    movdqa [ecx  ], xmm0    movdqa [_ECX  ], xmm0
394    
395  .Row1  .Row1:
396    TEST_ROW ecx+16, .Row1_Round    TEST_ROW _ECX+16, .Row1_Round
397    iMTX_MULT  1, iTab2, Walken_Idct_Rounders + 16*1, 11    iMTX_MULT  1, iTab2, Walken_Idct_Rounders + 16*1, 11
398    jmp .Row2    jmp .Row2
399  .Row1_Round  .Row1_Round:
400    movdqa xmm0, [Walken_Idct_Rounders + 16*8 + 16*1]    movdqa xmm0, [Walken_Idct_Rounders + 16*8 + 16*1]
401    movdqa [ecx+16  ], xmm0    movdqa [_ECX+16  ], xmm0
402    
403  .Row2  .Row2:
404    TEST_ROW ecx+32, .Row2_Round    TEST_ROW _ECX+32, .Row2_Round
405    iMTX_MULT  2, iTab3, Walken_Idct_Rounders + 16*2, 11    iMTX_MULT  2, iTab3, Walken_Idct_Rounders + 16*2, 11
406    jmp .Row3    jmp .Row3
407  .Row2_Round  .Row2_Round:
408    movdqa xmm0, [Walken_Idct_Rounders + 16*8 + 16*2]    movdqa xmm0, [Walken_Idct_Rounders + 16*8 + 16*2]
409    movdqa [ecx+32  ], xmm0    movdqa [_ECX+32  ], xmm0
410    
411  .Row3  .Row3:
412    TEST_ROW ecx+48, .Row4    TEST_ROW _ECX+48, .Row4
413    iMTX_MULT  3, iTab4, Walken_Idct_Rounders + 16*3, 11    iMTX_MULT  3, iTab4, Walken_Idct_Rounders + 16*3, 11
414    
415  .Row4  .Row4:
416    TEST_ROW ecx+64, .Row5    TEST_ROW _ECX+64, .Row5
417    iMTX_MULT  4, iTab1, Walken_Idct_Rounders + 16*4, 11    iMTX_MULT  4, iTab1, Walken_Idct_Rounders + 16*4, 11
418    
419  .Row5  .Row5:
420    TEST_ROW ecx+80, .Row6    TEST_ROW _ECX+80, .Row6
421    iMTX_MULT  5, iTab4, Walken_Idct_Rounders + 16*5, 11    iMTX_MULT  5, iTab4, Walken_Idct_Rounders + 16*5, 11
422    
423  .Row6  .Row6:
424    TEST_ROW ecx+96, .Row7    TEST_ROW _ECX+96, .Row7
425    iMTX_MULT  6, iTab3, Walken_Idct_Rounders + 16*6, 11    iMTX_MULT  6, iTab3, Walken_Idct_Rounders + 16*6, 11
426    
427  .Row7  .Row7:
428    TEST_ROW ecx+112, .End    TEST_ROW _ECX+112, .End
429    iMTX_MULT  7, iTab2, Walken_Idct_Rounders + 16*7, 11    iMTX_MULT  7, iTab2, Walken_Idct_Rounders + 16*7, 11
430  .End  .End:
431    
432    iLLM_PASS ecx    iLLM_PASS _ECX
433    
434      POP_XMM6_XMM7
435    ret    ret
436  .endfunc  ENDFUNC
437    
438  ;-----------------------------------------------------------------------------  ;-----------------------------------------------------------------------------
439  ; Helper macro fLLM_PASS  ; Helper macro fLLM_PASS
# Line 564  Line 545 
545    
546  %macro fMTX_MULT 3   ; %1=src, %2 = Coeffs, %3=rounders  %macro fMTX_MULT 3   ; %1=src, %2 = Coeffs, %3=rounders
547    
548    movdqa   xmm0, [ecx+%1*16+0]   ; xmm0 = [0123][4567]    movdqa   xmm0, [_ECX+%1*16+0]   ; xmm0 = [0123][4567]
549    pshufhw  xmm1, xmm0, 00011011b ; xmm1 = [----][7654]    pshufhw  xmm1, xmm0, 00011011b ; xmm1 = [----][7654]
550    pshufd   xmm0, xmm0, 01000100b    pshufd   xmm0, xmm0, 01000100b
551    pshufd   xmm1, xmm1, 11101110b    pshufd   xmm1, xmm1, 11101110b
# Line 598  Line 579 
579    
580    psraw    xmm0, 4               ; => [-2048, 2047]    psraw    xmm0, 4               ; => [-2048, 2047]
581    
582    movdqa  [ecx+%1*16+0], xmm0    movdqa  [_ECX+%1*16+0], xmm0
583  %endmacro  %endmacro
584    
585  ;-----------------------------------------------------------------------------  ;-----------------------------------------------------------------------------
586  ; Function Forward DCT  ; Function Forward DCT
587  ;-----------------------------------------------------------------------------  ;-----------------------------------------------------------------------------
588    
589  ALIGN 16  ALIGN SECTION_ALIGN
590  fdct_sse2_skal:  fdct_sse2_skal:
591    mov ecx, [esp+4]    PUSH_XMM6_XMM7
592    fLLM_PASS ecx+0, 3    mov _ECX, prm1
593      fLLM_PASS _ECX+0, 3
594    fMTX_MULT  0, fTab1, Fdct_Rnd0    fMTX_MULT  0, fTab1, Fdct_Rnd0
595    fMTX_MULT  1, fTab2, Fdct_Rnd2    fMTX_MULT  1, fTab2, Fdct_Rnd2
596    fMTX_MULT  2, fTab3, Fdct_Rnd1    fMTX_MULT  2, fTab3, Fdct_Rnd1
# Line 617  Line 599 
599    fMTX_MULT  5, fTab4, Fdct_Rnd1    fMTX_MULT  5, fTab4, Fdct_Rnd1
600    fMTX_MULT  6, fTab3, Fdct_Rnd1    fMTX_MULT  6, fTab3, Fdct_Rnd1
601    fMTX_MULT  7, fTab2, Fdct_Rnd1    fMTX_MULT  7, fTab2, Fdct_Rnd1
602    
603      POP_XMM6_XMM7
604    ret    ret
605  .endfunc  ENDFUNC
606    
607    ; Mac-specific workaround for misaligned DCT tables
608    ALIGN SECTION_ALIGN
609      times 8 dw 0
610    
611    %ifidn __OUTPUT_FORMAT__,elf
612    section ".note.GNU-stack" noalloc noexec nowrite progbits
613    %endif
614    

Legend:
Removed from v.1627  
changed lines
  Added in v.1848

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4