Parent Directory | Revision Log
Revision 1839 - (view) (download)
1 : | edgomez | 1382 | ;/**************************************************************************** |
2 : | Isibaar | 262 | ; * |
3 : | edgomez | 1382 | ; * XVID MPEG-4 VIDEO CODEC |
4 : | ; * - SSE2 optimized SAD operators - | ||
5 : | Isibaar | 262 | ; * |
6 : | edgomez | 1382 | ; * Copyright(C) 2003 Pascal Massimino <skal@planet-d.net> |
7 : | Isibaar | 262 | ; * |
8 : | ; * | ||
9 : | edgomez | 1382 | ; * This program is free software; you can redistribute it and/or modify it |
10 : | ; * under the terms of the GNU General Public License as published by | ||
11 : | ; * the Free Software Foundation; either version 2 of the License, or | ||
12 : | ; * (at your option) any later version. | ||
13 : | Isibaar | 262 | ; * |
14 : | edgomez | 1382 | ; * This program is distributed in the hope that it will be useful, |
15 : | ; * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
16 : | ; * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
17 : | ; * GNU General Public License for more details. | ||
18 : | Isibaar | 262 | ; * |
19 : | edgomez | 1382 | ; * You should have received a copy of the GNU General Public License |
20 : | ; * along with this program; if not, write to the Free Software | ||
21 : | ; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
22 : | Isibaar | 262 | ; * |
23 : | Isibaar | 1839 | ; * $Id: sad_sse2.asm,v 1.18 2008-12-02 13:44:55 Isibaar Exp $ |
24 : | edgomez | 652 | ; * |
25 : | edgomez | 1382 | ; ***************************************************************************/ |
26 : | Isibaar | 262 | |
27 : | Isibaar | 1795 | %include "nasm.inc" |
28 : | Isibaar | 262 | |
29 : | edgomez | 1382 | ;============================================================================= |
30 : | ; Read only data | ||
31 : | ;============================================================================= | ||
32 : | Isibaar | 262 | |
33 : | Isibaar | 1795 | DATA |
34 : | Isibaar | 262 | |
35 : | Isibaar | 1795 | ALIGN SECTION_ALIGN |
36 : | Isibaar | 262 | zero times 4 dd 0 |
37 : | |||
38 : | edgomez | 1382 | ;============================================================================= |
39 : | ; Code | ||
40 : | ;============================================================================= | ||
41 : | Isibaar | 262 | |
42 : | Isibaar | 1795 | SECTION .rotext align=SECTION_ALIGN |
43 : | edgomez | 1382 | |
44 : | Isibaar | 262 | cglobal sad16_sse2 |
45 : | cglobal dev16_sse2 | ||
46 : | |||
47 : | Isibaar | 1764 | cglobal sad16_sse3 |
48 : | cglobal dev16_sse3 | ||
49 : | |||
50 : | edgomez | 1382 | ;----------------------------------------------------------------------------- |
51 : | ; uint32_t sad16_sse2 (const uint8_t * const cur, <- assumed aligned! | ||
52 : | ; const uint8_t * const ref, | ||
53 : | ; const uint32_t stride, | ||
54 : | ; const uint32_t /*ignored*/); | ||
55 : | ;----------------------------------------------------------------------------- | ||
56 : | Isibaar | 262 | |
57 : | |||
58 : | Isibaar | 1764 | %macro SAD_16x16_SSE2 1 |
59 : | Isibaar | 1795 | %1 xmm0, [TMP1] |
60 : | %1 xmm1, [TMP1+TMP0] | ||
61 : | lea TMP1,[TMP1+2*TMP0] | ||
62 : | movdqa xmm2, [_EAX] | ||
63 : | movdqa xmm3, [_EAX+TMP0] | ||
64 : | lea _EAX,[_EAX+2*TMP0] | ||
65 : | edgomez | 1382 | psadbw xmm0, xmm2 |
66 : | Isibaar | 1839 | paddusw xmm4,xmm0 |
67 : | edgomez | 1382 | psadbw xmm1, xmm3 |
68 : | Isibaar | 1839 | paddusw xmm4,xmm1 |
69 : | Isibaar | 262 | %endmacro |
70 : | |||
71 : | Isibaar | 1764 | %macro SAD16_SSE2_SSE3 1 |
72 : | Isibaar | 1795 | mov _EAX, prm1 ; cur (assumed aligned) |
73 : | mov TMP1, prm2 ; ref | ||
74 : | mov TMP0, prm3 ; stride | ||
75 : | Isibaar | 262 | |
76 : | Isibaar | 1839 | pxor xmm4, xmm4 ; accum |
77 : | Isibaar | 262 | |
78 : | Isibaar | 1764 | SAD_16x16_SSE2 %1 |
79 : | SAD_16x16_SSE2 %1 | ||
80 : | SAD_16x16_SSE2 %1 | ||
81 : | SAD_16x16_SSE2 %1 | ||
82 : | SAD_16x16_SSE2 %1 | ||
83 : | SAD_16x16_SSE2 %1 | ||
84 : | SAD_16x16_SSE2 %1 | ||
85 : | SAD_16x16_SSE2 %1 | ||
86 : | Isibaar | 262 | |
87 : | Isibaar | 1839 | pshufd xmm5, xmm4, 00000010b |
88 : | paddusw xmm4, xmm5 | ||
89 : | pextrw eax, xmm4, 0 | ||
90 : | Isibaar | 1833 | |
91 : | edgomez | 1382 | ret |
92 : | Isibaar | 1764 | %endmacro |
93 : | |||
94 : | Isibaar | 1795 | ALIGN SECTION_ALIGN |
95 : | Isibaar | 1764 | sad16_sse2: |
96 : | SAD16_SSE2_SSE3 movdqu | ||
97 : | Isibaar | 1793 | ENDFUNC |
98 : | Isibaar | 262 | |
99 : | |||
100 : | Isibaar | 1795 | ALIGN SECTION_ALIGN |
101 : | Isibaar | 1764 | sad16_sse3: |
102 : | SAD16_SSE2_SSE3 lddqu | ||
103 : | Isibaar | 1793 | ENDFUNC |
104 : | Isibaar | 1764 | |
105 : | |||
106 : | edgomez | 1382 | ;----------------------------------------------------------------------------- |
107 : | ; uint32_t dev16_sse2(const uint8_t * const cur, const uint32_t stride); | ||
108 : | ;----------------------------------------------------------------------------- | ||
109 : | Isibaar | 262 | |
110 : | Isibaar | 1795 | %macro MEAN_16x16_SSE2 1 ; _EAX: src, TMP0:stride, mm7: zero or mean => mm6: result |
111 : | %1 xmm0, [_EAX] | ||
112 : | %1 xmm1, [_EAX+TMP0] | ||
113 : | lea _EAX, [_EAX+2*TMP0] ; + 2*stride | ||
114 : | Isibaar | 1839 | psadbw xmm0, xmm5 |
115 : | paddusw xmm4, xmm0 | ||
116 : | psadbw xmm1, xmm5 | ||
117 : | paddusw xmm4, xmm1 | ||
118 : | Isibaar | 262 | %endmacro |
119 : | |||
120 : | |||
121 : | Isibaar | 1764 | %macro MEAN16_SSE2_SSE3 1 |
122 : | Isibaar | 1795 | mov _EAX, prm1 ; src |
123 : | mov TMP0, prm2 ; stride | ||
124 : | Isibaar | 262 | |
125 : | Isibaar | 1839 | pxor xmm4, xmm4 ; accum |
126 : | pxor xmm5, xmm5 ; zero | ||
127 : | Isibaar | 262 | |
128 : | Isibaar | 1764 | MEAN_16x16_SSE2 %1 |
129 : | MEAN_16x16_SSE2 %1 | ||
130 : | MEAN_16x16_SSE2 %1 | ||
131 : | MEAN_16x16_SSE2 %1 | ||
132 : | Isibaar | 262 | |
133 : | Isibaar | 1764 | MEAN_16x16_SSE2 %1 |
134 : | MEAN_16x16_SSE2 %1 | ||
135 : | MEAN_16x16_SSE2 %1 | ||
136 : | MEAN_16x16_SSE2 %1 | ||
137 : | Isibaar | 262 | |
138 : | Isibaar | 1795 | mov _EAX, prm1 ; src again |
139 : | Isibaar | 262 | |
140 : | Isibaar | 1839 | pshufd xmm5, xmm4, 10b |
141 : | paddusw xmm5, xmm4 | ||
142 : | pxor xmm4, xmm4 ; zero accum | ||
143 : | psrlw xmm5, 8 ; => Mean | ||
144 : | pshuflw xmm5, xmm5, 0 ; replicate Mean | ||
145 : | packuswb xmm5, xmm5 | ||
146 : | pshufd xmm5, xmm5, 00000000b | ||
147 : | Isibaar | 262 | |
148 : | Isibaar | 1764 | MEAN_16x16_SSE2 %1 |
149 : | MEAN_16x16_SSE2 %1 | ||
150 : | MEAN_16x16_SSE2 %1 | ||
151 : | MEAN_16x16_SSE2 %1 | ||
152 : | Isibaar | 262 | |
153 : | Isibaar | 1764 | MEAN_16x16_SSE2 %1 |
154 : | MEAN_16x16_SSE2 %1 | ||
155 : | MEAN_16x16_SSE2 %1 | ||
156 : | MEAN_16x16_SSE2 %1 | ||
157 : | Isibaar | 262 | |
158 : | Isibaar | 1839 | pshufd xmm5, xmm4, 10b |
159 : | paddusw xmm5, xmm4 | ||
160 : | pextrw eax, xmm5, 0 | ||
161 : | Isibaar | 1833 | |
162 : | edgomez | 1382 | ret |
163 : | Isibaar | 1764 | %endmacro |
164 : | |||
165 : | Isibaar | 1795 | ALIGN SECTION_ALIGN |
166 : | Isibaar | 1764 | dev16_sse2: |
167 : | MEAN16_SSE2_SSE3 movdqu | ||
168 : | Isibaar | 1793 | ENDFUNC |
169 : | edgomez | 1540 | |
170 : | Isibaar | 1795 | ALIGN SECTION_ALIGN |
171 : | Isibaar | 1764 | dev16_sse3: |
172 : | MEAN16_SSE2_SSE3 lddqu | ||
173 : | Isibaar | 1793 | ENDFUNC |
174 : | Isibaar | 1790 | |
175 : | |||
176 : | %ifidn __OUTPUT_FORMAT__,elf | ||
177 : | section ".note.GNU-stack" noalloc noexec nowrite progbits | ||
178 : | %endif | ||
179 : |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |