Parent Directory | Revision Log
Revision 1793 - (view) (download)
1 : | edgomez | 1586 | ;/**************************************************************************** |
2 : | ; * | ||
3 : | ; * XVID MPEG-4 VIDEO CODEC | ||
4 : | ; * - K7 optimized SAD operators - | ||
5 : | ; * | ||
6 : | ; * Copyright(C) 2001 Peter Ross <pross@xvid.org> | ||
7 : | ; * 2002 Pascal Massimino <skal@planet-d.net> | ||
8 : | ; * 2004 Andre Werthmann <wertmann@aei.mpg.de> | ||
9 : | ; * | ||
10 : | ; * This program is free software; you can redistribute it and/or modify it | ||
11 : | ; * under the terms of the GNU General Public License as published by | ||
12 : | ; * the Free Software Foundation; either version 2 of the License, or | ||
13 : | ; * (at your option) any later version. | ||
14 : | ; * | ||
15 : | ; * This program is distributed in the hope that it will be useful, | ||
16 : | ; * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
17 : | ; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 : | ; * GNU General Public License for more details. | ||
19 : | ; * | ||
20 : | ; * You should have received a copy of the GNU General Public License | ||
21 : | ; * along with this program; if not, write to the Free Software | ||
22 : | ; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
23 : | ; * | ||
24 : | Isibaar | 1793 | ; * $Id: sad_mmx.asm,v 1.3 2008-11-11 20:46:24 Isibaar Exp $ |
25 : | edgomez | 1586 | ; * |
26 : | ; ***************************************************************************/ | ||
27 : | |||
28 : | BITS 64 | ||
29 : | |||
30 : | %macro cglobal 1 | ||
31 : | %ifdef PREFIX | ||
32 : | %ifdef MARK_FUNCS | ||
33 : | global _%1:function %1.endfunc-%1 | ||
34 : | %define %1 _%1:function %1.endfunc-%1 | ||
35 : | Isibaar | 1793 | %define ENDFUNC .endfunc |
36 : | edgomez | 1586 | %else |
37 : | global _%1 | ||
38 : | %define %1 _%1 | ||
39 : | Isibaar | 1793 | %define ENDFUNC |
40 : | edgomez | 1586 | %endif |
41 : | %else | ||
42 : | %ifdef MARK_FUNCS | ||
43 : | global %1:function %1.endfunc-%1 | ||
44 : | Isibaar | 1793 | %define ENDFUNC .endfunc |
45 : | edgomez | 1586 | %else |
46 : | global %1 | ||
47 : | Isibaar | 1793 | %define ENDFUNC |
48 : | edgomez | 1586 | %endif |
49 : | %endif | ||
50 : | %endmacro | ||
51 : | |||
52 : | ;============================================================================= | ||
53 : | ; Read only data | ||
54 : | ;============================================================================= | ||
55 : | |||
56 : | %ifdef FORMAT_COFF | ||
57 : | SECTION .rodata | ||
58 : | %else | ||
59 : | SECTION .rodata align=16 | ||
60 : | %endif | ||
61 : | |||
62 : | ;============================================================================= | ||
63 : | ; Code | ||
64 : | ;============================================================================= | ||
65 : | |||
66 : | SECTION .text align=16 | ||
67 : | |||
68 : | cglobal sse8_16bit_x86_64 | ||
69 : | cglobal sse8_8bit_x86_64 | ||
70 : | |||
71 : | ;----------------------------------------------------------------------------- | ||
72 : | ; | ||
73 : | ; uint32_t sse8_16bit_x86_64x(const int16_t *b1, | ||
74 : | ; const int16_t *b2, | ||
75 : | ; const uint32_t stride); | ||
76 : | ; | ||
77 : | ;----------------------------------------------------------------------------- | ||
78 : | |||
79 : | %macro ROW_SSE_16Bit_MMX 2 | ||
80 : | movq mm0, [%1] | ||
81 : | movq mm1, [%1+8] | ||
82 : | psubw mm0, [%2] | ||
83 : | psubw mm1, [%2+8] | ||
84 : | pmaddwd mm0, mm0 | ||
85 : | pmaddwd mm1, mm1 | ||
86 : | paddd mm2, mm0 | ||
87 : | paddd mm2, mm1 | ||
88 : | %endmacro | ||
89 : | |||
90 : | sse8_16bit_x86_64: | ||
91 : | |||
92 : | ; rdx is stride | ||
93 : | ; rsi is b2 | ||
94 : | ; rdi is b1 | ||
95 : | |||
96 : | ;; Reset the sse accumulator | ||
97 : | pxor mm2, mm2 | ||
98 : | |||
99 : | ;; Let's go | ||
100 : | %rep 8 | ||
101 : | ROW_SSE_16Bit_MMX rsi, rdi | ||
102 : | lea rsi, [rsi+rdx] | ||
103 : | lea rdi, [rdi+rdx] | ||
104 : | %endrep | ||
105 : | |||
106 : | ;; Finish adding each dword of the accumulator | ||
107 : | movq mm3, mm2 | ||
108 : | psrlq mm2, 32 | ||
109 : | paddd mm2, mm3 | ||
110 : | movd eax, mm2 | ||
111 : | |||
112 : | ;; All done | ||
113 : | ret | ||
114 : | Isibaar | 1793 | ENDFUNC |
115 : | edgomez | 1586 | |
116 : | ;----------------------------------------------------------------------------- | ||
117 : | ; | ||
118 : | ; uint32_t sse8_8bit_x86_64(const int8_t *b1, | ||
119 : | ; const int8_t *b2, | ||
120 : | ; const uint32_t stride); | ||
121 : | ; | ||
122 : | ;----------------------------------------------------------------------------- | ||
123 : | |||
124 : | %macro ROW_SSE_8bit_MMX 2 | ||
125 : | movq mm0, [%1] ; load a row | ||
126 : | movq mm2, [%2] ; load a row | ||
127 : | |||
128 : | movq mm1, mm0 ; copy row | ||
129 : | movq mm3, mm2 ; copy row | ||
130 : | |||
131 : | punpcklbw mm0, mm7 ; turn the 4low elements into 16bit | ||
132 : | punpckhbw mm1, mm7 ; turn the 4high elements into 16bit | ||
133 : | |||
134 : | punpcklbw mm2, mm7 ; turn the 4low elements into 16bit | ||
135 : | punpckhbw mm3, mm7 ; turn the 4high elements into 16bit | ||
136 : | |||
137 : | psubw mm0, mm2 ; low part of src-dst | ||
138 : | psubw mm1, mm3 ; high part of src-dst | ||
139 : | |||
140 : | pmaddwd mm0, mm0 ; compute the square sum | ||
141 : | pmaddwd mm1, mm1 ; compute the square sum | ||
142 : | |||
143 : | paddd mm6, mm0 ; add to the accumulator | ||
144 : | paddd mm6, mm1 ; add to the accumulator | ||
145 : | %endmacro | ||
146 : | |||
147 : | sse8_8bit_x86_64: | ||
148 : | ;; Reset the sse accumulator | ||
149 : | pxor mm6, mm6 | ||
150 : | |||
151 : | ;; Used to interleave 8bit data with 0x00 values | ||
152 : | pxor mm7, mm7 | ||
153 : | |||
154 : | ;; Let's go | ||
155 : | %rep 8 | ||
156 : | ROW_SSE_8bit_MMX rsi, rdi | ||
157 : | lea rsi, [rsi+rdx] | ||
158 : | lea rdi, [rdi+rdx] | ||
159 : | %endrep | ||
160 : | |||
161 : | ;; Finish adding each dword of the accumulator | ||
162 : | movq mm7, mm6 | ||
163 : | psrlq mm6, 32 | ||
164 : | paddd mm6, mm7 | ||
165 : | movd eax, mm6 | ||
166 : | |||
167 : | ;; All done | ||
168 : | ret | ||
169 : | Isibaar | 1793 | ENDFUNC |
170 : | Isibaar | 1790 | |
171 : | %ifidn __OUTPUT_FORMAT__,elf | ||
172 : | section ".note.GNU-stack" noalloc noexec nowrite progbits | ||
173 : | %endif | ||
174 : |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |