Parent Directory | Revision Log
Revision 851 -
(view)
(download)
Original Path: trunk/xvidcore/src/bitstream/x86_asm/cbp_3dne.asm
1 : | edgomez | 851 | ;/************************************************************************** |
2 : | ; * | ||
3 : | ; * XVID MPEG-4 VIDEO CODEC | ||
4 : | ; * mmx cbp calc | ||
5 : | ; * | ||
6 : | ; * This program is an implementation of a part of one or more MPEG-4 | ||
7 : | ; * Video tools as specified in ISO/IEC 14496-2 standard. Those intending | ||
8 : | ; * to use this software module in hardware or software products are | ||
9 : | ; * advised that its use may infringe existing patents or copyrights, and | ||
10 : | ; * any such use would be at such party's own risk. The original | ||
11 : | ; * developer of this software module and his/her company, and subsequent | ||
12 : | ; * editors and their companies, will have no liability for use of this | ||
13 : | ; * software or modifications or derivatives thereof. | ||
14 : | ; * | ||
15 : | ; * This program is free software; you can redistribute it and/or modify | ||
16 : | ; * it under the terms of the GNU General Public License as published by | ||
17 : | ; * the Free Software Foundation; either version 2 of the License, or | ||
18 : | ; * (at your option) any later version. | ||
19 : | ; * | ||
20 : | ; * This program is distributed in the hope that it will be useful, | ||
21 : | ; * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
22 : | ; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
23 : | ; * GNU General Public License for more details. | ||
24 : | ; * | ||
25 : | ; * You should have received a copy of the GNU General Public License | ||
26 : | ; * along with this program; if not, write to the Free Software | ||
27 : | ; * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
28 : | ; * | ||
29 : | ; *************************************************************************/ | ||
30 : | |||
31 : | |||
32 : | ; these 3dne functions are compatible with iSSE, but are optimized specifically for | ||
33 : | ; K7 pipelines | ||
34 : | ; | ||
35 : | ;------------------------------------------------------------------------------ | ||
36 : | ; 09.12.2002 Athlon optimizations contributed by Jaan Kalda | ||
37 : | ;------------------------------------------------------------------------------ | ||
38 : | |||
39 : | bits 32 | ||
40 : | |||
41 : | section .data | ||
42 : | %macro cglobal 1 | ||
43 : | %ifdef PREFIX | ||
44 : | global _%1 | ||
45 : | %define %1 _%1 | ||
46 : | %else | ||
47 : | global %1 | ||
48 : | %endif | ||
49 : | %endmacro | ||
50 : | |||
51 : | section .text | ||
52 : | |||
53 : | cglobal calc_cbp_3dne | ||
54 : | |||
55 : | ;=========================================================================== | ||
56 : | ; | ||
57 : | ; uint32_t calc_cbp_3dne(const int16_t coeff[6][64]); | ||
58 : | ; | ||
59 : | ;=========================================================================== | ||
60 : | %macro calc_cbp 1 | ||
61 : | pshufw mm0, [eax], 229 ; =11100101 | ||
62 : | movq mm1, [eax+8] | ||
63 : | por mm0, [eax+64] | ||
64 : | por mm1, [eax+72] | ||
65 : | movq mm2, [eax+16] | ||
66 : | movq mm3, [eax+24] | ||
67 : | por mm2, [eax+80] | ||
68 : | por mm3, [eax+88] | ||
69 : | movq mm4, [eax+32] | ||
70 : | movq mm5, [eax+40] | ||
71 : | por mm4, [eax+96] | ||
72 : | por mm5, [eax+104] | ||
73 : | movq mm6, [eax+48] | ||
74 : | movq mm7, [eax+56] | ||
75 : | por mm6, [eax+112] | ||
76 : | por mm7, [eax+120] | ||
77 : | por mm1, mm0 | ||
78 : | %if %1 | ||
79 : | sub eax,byte -128 ;ecx ;+= 128; needed 3 bytes for alignment | ||
80 : | %else | ||
81 : | xor eax,eax | ||
82 : | xor edx,edx | ||
83 : | %endif | ||
84 : | por mm3, mm2 | ||
85 : | por mm5, mm4 | ||
86 : | por mm7, mm6 | ||
87 : | por mm3, mm1 | ||
88 : | por mm7, mm5 | ||
89 : | por mm7, mm3 | ||
90 : | packsswb mm7,mm7 | ||
91 : | movd [esp+%1*4],mm7 | ||
92 : | %endmacro | ||
93 : | |||
94 : | |||
95 : | |||
96 : | align 16 ;AMD K7, in cache: ca 80 clk | ||
97 : | calc_cbp_3dne: | ||
98 : | mov eax, [esp+ 4] ; coeff | ||
99 : | lea esp,[esp-24] | ||
100 : | calc_cbp 5 ;bit 5 | ||
101 : | calc_cbp 4 ;b4 | ||
102 : | calc_cbp 3 ;b3 | ||
103 : | calc_cbp 2 ;b2 | ||
104 : | calc_cbp 1 ;b1 | ||
105 : | calc_cbp 0 ;b0 | ||
106 : | cmp eax,[esp+5*4] | ||
107 : | adc eax,eax | ||
108 : | cmp edx,[esp+4*4] | ||
109 : | adc eax,eax | ||
110 : | cmp edx,[esp+3*4] | ||
111 : | adc eax,eax | ||
112 : | cmp edx,[esp+2*4] | ||
113 : | adc eax,eax | ||
114 : | cmp edx,[esp+1*4] | ||
115 : | adc eax,eax | ||
116 : | cmp edx,[esp+0*4] | ||
117 : | adc eax,eax | ||
118 : | add esp,byte 24 | ||
119 : | ret |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |