Parent Directory | Revision Log
Revision 1793 -
(view)
(download)
Original Path: trunk/xvidcore/src/image/x86_asm/gmc_mmx.asm
1 : | Skal | 1709 | ;/***************************************************************************** |
2 : | ; * | ||
3 : | ; * XVID MPEG-4 VIDEO CODEC | ||
4 : | ; * - GMC core functions - | ||
5 : | ; * Copyright(C) 2006 Pascal Massimino <skal@planet-d.net> | ||
6 : | ; * | ||
7 : | ; * This file is part of XviD, a free MPEG-4 video encoder/decoder | ||
8 : | ; * | ||
9 : | ; * XviD is free software; you can redistribute it and/or modify it | ||
10 : | ; * under the terms of the GNU General Public License as published by | ||
11 : | ; * the Free Software Foundation; either version 2 of the License, or | ||
12 : | ; * (at your option) any later version. | ||
13 : | ; * | ||
14 : | ; * This program is distributed in the hope that it will be useful, | ||
15 : | ; * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 : | ; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 : | ; * GNU General Public License for more details. | ||
18 : | ; * | ||
19 : | ; * You should have received a copy of the GNU General Public License | ||
20 : | ; * along with this program; if not, write to the Free Software | ||
21 : | ; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
22 : | ; * | ||
23 : | Isibaar | 1793 | ; * $Id: gmc_mmx.asm,v 1.4 2008-11-11 20:46:24 Isibaar Exp $ |
24 : | Skal | 1709 | ; * |
25 : | ; *************************************************************************/ | ||
26 : | |||
27 : | ;/************************************************************************** | ||
28 : | ; * | ||
29 : | ; * History: | ||
30 : | ; * | ||
31 : | ; * Jun 14 2006: initial version (during Germany/Poland match;) | ||
32 : | ; * | ||
33 : | ; *************************************************************************/ | ||
34 : | |||
35 : | bits 32 | ||
36 : | |||
37 : | %macro cglobal 1 | ||
38 : | %ifdef PREFIX | ||
39 : | %ifdef MARK_FUNCS | ||
40 : | global _%1:function %1.endfunc-%1 | ||
41 : | %define %1 _%1:function %1.endfunc-%1 | ||
42 : | Isibaar | 1793 | %define ENDFUNC .endfunc |
43 : | Skal | 1709 | %else |
44 : | global _%1 | ||
45 : | %define %1 _%1 | ||
46 : | Isibaar | 1793 | %define ENDFUNC |
47 : | Skal | 1709 | %endif |
48 : | %else | ||
49 : | %ifdef MARK_FUNCS | ||
50 : | global %1:function %1.endfunc-%1 | ||
51 : | Isibaar | 1793 | %define ENDFUNC .endfunc |
52 : | Skal | 1709 | %else |
53 : | global %1 | ||
54 : | Isibaar | 1793 | %define ENDFUNC |
55 : | Skal | 1709 | %endif |
56 : | %endif | ||
57 : | %endmacro | ||
58 : | |||
59 : | ;////////////////////////////////////////////////////////////////////// | ||
60 : | |||
61 : | cglobal xvid_GMC_Core_Lin_8_mmx | ||
62 : | cglobal xvid_GMC_Core_Lin_8_sse2 | ||
63 : | |||
64 : | ;////////////////////////////////////////////////////////////////////// | ||
65 : | |||
66 : | %ifdef FORMAT_COFF | ||
67 : | SECTION .rodata | ||
68 : | %else | ||
69 : | SECTION .rodata align=16 | ||
70 : | %endif | ||
71 : | |||
72 : | align 16 | ||
73 : | Cst16: | ||
74 : | times 8 dw 16 | ||
75 : | |||
76 : | SECTION .text | ||
77 : | |||
78 : | ;////////////////////////////////////////////////////////////////////// | ||
79 : | ;// mmx version | ||
80 : | |||
81 : | %macro GMC_4_SSE 2 ; %1: i %2: out reg (mm5 or mm6) | ||
82 : | |||
83 : | pcmpeqw mm0, mm0 | ||
84 : | movq mm1, [eax+2*(%1) ] ; u0 | u1 | u2 | u3 | ||
85 : | psrlw mm0, 12 ; mask 0x000f | ||
86 : | movq mm2, [eax+2*(%1)+2*16] ; v0 | v1 | v2 | v3 | ||
87 : | |||
88 : | pand mm1, mm0 ; u0 | ||
89 : | pand mm2, mm0 ; v0 | ||
90 : | |||
91 : | movq mm0, [Cst16] | ||
92 : | movq mm3, mm1 ; u | ... | ||
93 : | movq mm4, mm0 | ||
94 : | pmullw mm3, mm2 ; u.v | ||
95 : | psubw mm0, mm1 ; 16-u | ||
96 : | psubw mm4, mm2 ; 16-v | ||
97 : | pmullw mm2, mm0 ; (16-u).v | ||
98 : | pmullw mm0, mm4 ; (16-u).(16-v) | ||
99 : | pmullw mm1, mm4 ; u .(16-v) | ||
100 : | |||
101 : | movd mm4, [ecx+edx +%1] ; src2 | ||
102 : | movd %2, [ecx+edx+1+%1] ; src3 | ||
103 : | punpcklbw mm4, mm7 | ||
104 : | punpcklbw %2, mm7 | ||
105 : | pmullw mm2, mm4 | ||
106 : | pmullw mm3, %2 | ||
107 : | |||
108 : | movd mm4, [ecx +%1] ; src0 | ||
109 : | movd %2, [ecx +1+%1] ; src1 | ||
110 : | punpcklbw mm4, mm7 | ||
111 : | punpcklbw %2, mm7 | ||
112 : | pmullw mm4, mm0 | ||
113 : | pmullw %2, mm1 | ||
114 : | |||
115 : | paddw mm2, mm3 | ||
116 : | paddw %2, mm4 | ||
117 : | |||
118 : | paddw %2, mm2 | ||
119 : | %endmacro | ||
120 : | |||
121 : | align 16 | ||
122 : | xvid_GMC_Core_Lin_8_mmx: | ||
123 : | mov eax, [esp + 8] ; Offsets | ||
124 : | mov ecx, [esp +12] ; Src0 | ||
125 : | mov edx, [esp +16] ; BpS | ||
126 : | |||
127 : | pxor mm7, mm7 | ||
128 : | |||
129 : | GMC_4_SSE 0, mm5 | ||
130 : | GMC_4_SSE 4, mm6 | ||
131 : | |||
132 : | ; pshufw mm4, [esp +20], 01010101b ; Rounder (bits [16..31]) | ||
133 : | movd mm4, [esp+20] ; Rounder (bits [16..31]) | ||
134 : | mov eax, [esp + 4] ; Dst | ||
135 : | punpcklwd mm4, mm4 | ||
136 : | punpckhdq mm4, mm4 | ||
137 : | |||
138 : | paddw mm5, mm4 | ||
139 : | paddw mm6, mm4 | ||
140 : | psrlw mm5, 8 | ||
141 : | psrlw mm6, 8 | ||
142 : | packuswb mm5, mm6 | ||
143 : | movq [eax], mm5 | ||
144 : | |||
145 : | ret | ||
146 : | Isibaar | 1793 | ENDFUNC |
147 : | Skal | 1709 | |
148 : | ;////////////////////////////////////////////////////////////////////// | ||
149 : | ;// SSE2 version | ||
150 : | |||
151 : | %macro GMC_8_SSE2 0 | ||
152 : | |||
153 : | pcmpeqw xmm0, xmm0 | ||
154 : | movdqa xmm1, [eax ] ; u... | ||
155 : | psrlw xmm0, 12 ; mask = 0x000f | ||
156 : | movdqa xmm2, [eax+2*16] ; v... | ||
157 : | pand xmm1, xmm0 | ||
158 : | pand xmm2, xmm0 | ||
159 : | |||
160 : | movdqa xmm0, [Cst16] | ||
161 : | movdqa xmm3, xmm1 ; u | ... | ||
162 : | movdqa xmm4, xmm0 | ||
163 : | pmullw xmm3, xmm2 ; u.v | ||
164 : | psubw xmm0, xmm1 ; 16-u | ||
165 : | psubw xmm4, xmm2 ; 16-v | ||
166 : | pmullw xmm2, xmm0 ; (16-u).v | ||
167 : | pmullw xmm0, xmm4 ; (16-u).(16-v) | ||
168 : | pmullw xmm1, xmm4 ; u .(16-v) | ||
169 : | |||
170 : | movq xmm4, [ecx+edx ] ; src2 | ||
171 : | movq xmm5, [ecx+edx+1] ; src3 | ||
172 : | punpcklbw xmm4, xmm7 | ||
173 : | punpcklbw xmm5, xmm7 | ||
174 : | pmullw xmm2, xmm4 | ||
175 : | pmullw xmm3, xmm5 | ||
176 : | |||
177 : | movq xmm4, [ecx ] ; src0 | ||
178 : | movq xmm5, [ecx +1] ; src1 | ||
179 : | punpcklbw xmm4, xmm7 | ||
180 : | punpcklbw xmm5, xmm7 | ||
181 : | pmullw xmm4, xmm0 | ||
182 : | pmullw xmm5, xmm1 | ||
183 : | |||
184 : | paddw xmm2, xmm3 | ||
185 : | paddw xmm5, xmm4 | ||
186 : | |||
187 : | paddw xmm5, xmm2 | ||
188 : | %endmacro | ||
189 : | |||
190 : | align 16 | ||
191 : | xvid_GMC_Core_Lin_8_sse2: | ||
192 : | mov eax, [esp + 8] ; Offsets | ||
193 : | mov ecx, [esp +12] ; Src0 | ||
194 : | mov edx, [esp +16] ; BpS | ||
195 : | |||
196 : | pxor xmm7, xmm7 | ||
197 : | |||
198 : | GMC_8_SSE2 | ||
199 : | |||
200 : | Skal | 1756 | movd xmm4, [esp +20] |
201 : | pshuflw xmm4, xmm4, 01010101b ; Rounder (bits [16..31]) | ||
202 : | Skal | 1709 | punpckldq xmm4, xmm4 |
203 : | mov eax, [esp + 4] ; Dst | ||
204 : | |||
205 : | paddw xmm5, xmm4 | ||
206 : | psrlw xmm5, 8 | ||
207 : | packuswb xmm5, xmm5 | ||
208 : | movq [eax], xmm5 | ||
209 : | |||
210 : | ret | ||
211 : | Isibaar | 1793 | ENDFUNC |
212 : | Skal | 1709 | |
213 : | ;////////////////////////////////////////////////////////////////////// | ||
214 : | Isibaar | 1790 | |
215 : | %ifidn __OUTPUT_FORMAT__,elf | ||
216 : | section ".note.GNU-stack" noalloc noexec nowrite progbits | ||
217 : | %endif |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |