Parent Directory | Revision Log
Revision 1557 - (view) (download)
1 : | edgomez | 1557 | /***************************************************************************** |
2 : | * | ||
3 : | * XVID MPEG-4 VIDEO CODEC | ||
4 : | * - QPel interpolation with altivec optimization - | ||
5 : | * | ||
6 : | * Copyright(C) 2004 Christoph NŠgeli <chn@kbw.ch> | ||
7 : | * | ||
8 : | * This program is free software ; you can redistribute it and/or modify | ||
9 : | * it under the terms of the GNU General Public License as published by | ||
10 : | * the Free Software Foundation ; either version 2 of the License, or | ||
11 : | * (at your option) any later version. | ||
12 : | * | ||
13 : | * This program is distributed in the hope that it will be useful, | ||
14 : | * but WITHOUT ANY WARRANTY ; without even the implied warranty of | ||
15 : | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 : | * GNU General Public License for more details. | ||
17 : | * | ||
18 : | * You should have received a copy of the GNU General Public License | ||
19 : | * along with this program ; if not, write to the Free Software | ||
20 : | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 : | * | ||
22 : | * $Id: qpel_altivec.c,v 1.1 2004-10-17 10:20:15 edgomez Exp $ | ||
23 : | * | ||
24 : | ****************************************************************************/ | ||
25 : | |||
26 : | |||
27 : | |||
28 : | #ifdef HAVE_ALTIVEC_H | ||
29 : | #include <altivec.h> | ||
30 : | #endif | ||
31 : | |||
32 : | #include "../../portab.h" | ||
33 : | |||
34 : | #undef DEBUG | ||
35 : | #include <stdio.h> | ||
36 : | |||
37 : | static const vector signed char FIR_Tab_16[17] = { | ||
38 : | (vector signed char)AVV( 14, -3, 2, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ), | ||
39 : | (vector signed char)AVV( 23, 19, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ), | ||
40 : | (vector signed char)AVV( -7, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ), | ||
41 : | (vector signed char)AVV( 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0 ), | ||
42 : | (vector signed char)AVV( -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0, 0 ), | ||
43 : | (vector signed char)AVV( 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0, 0 ), | ||
44 : | (vector signed char)AVV( 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0, 0 ), | ||
45 : | (vector signed char)AVV( 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0, 0 ), | ||
46 : | (vector signed char)AVV( 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0, 0 ), | ||
47 : | (vector signed char)AVV( 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0, 0 ), | ||
48 : | (vector signed char)AVV( 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0, 0 ), | ||
49 : | (vector signed char)AVV( 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1, 0 ), | ||
50 : | (vector signed char)AVV( 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3, -1 ), | ||
51 : | (vector signed char)AVV( 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -6, 3 ), | ||
52 : | (vector signed char)AVV( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 20, 20, -7 ), | ||
53 : | (vector signed char)AVV( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 3, -6, 19, 23 ), | ||
54 : | (vector signed char)AVV( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 2, -3, 14 ) | ||
55 : | }; | ||
56 : | |||
57 : | static const vector signed short FIR_Tab_8[9] = { | ||
58 : | (vector signed short)AVV( 14, -3, 2, -1, 0, 0, 0, 0 ), | ||
59 : | (vector signed short)AVV( 23, 19, -6, 3, -1, 0, 0, 0 ), | ||
60 : | (vector signed short)AVV( -7, 20, 20, -6, 3, -1, 0, 0 ), | ||
61 : | (vector signed short)AVV( 3, -6, 20, 20, -6, 3, -1, 0 ), | ||
62 : | (vector signed short)AVV( -1, 3, -6, 20, 20, -6, 3, -1 ), | ||
63 : | (vector signed short)AVV( 0, -1, 3, -6, 20, 20, -6, 3 ), | ||
64 : | (vector signed short)AVV( 0, 0, -1, 3, -6, 20, 20, -7 ), | ||
65 : | (vector signed short)AVV( 0, 0, 0, -1, 3, -6, 19, 23 ), | ||
66 : | (vector signed short)AVV( 0, 0, 0, 0, -1, 2, -3, 14 ) | ||
67 : | }; | ||
68 : | |||
69 : | /* Processing with FIR_Tab */ | ||
70 : | #define PROCESS_FIR_16(x) \ | ||
71 : | firs = FIR_Tab_16[x];\ | ||
72 : | \ | ||
73 : | tmp = vec_splat(vec_src,(x));\ | ||
74 : | sums1 = vec_mladd( (vector signed short)vec_mergeh(ox00, tmp), vec_unpackh(firs), sums1 );\ | ||
75 : | sums2 = vec_mladd( (vector signed short)vec_mergel(ox00, tmp), vec_unpackl(firs), sums2 ) | ||
76 : | |||
77 : | #define PROCESS_FIR_8(x) \ | ||
78 : | firs = FIR_Tab_8[x];\ | ||
79 : | tmp = (vector signed short)vec_mergeh( ox00, vec_splat(vec_src,x) );\ | ||
80 : | sums = vec_mladd(firs,tmp,sums) | ||
81 : | |||
82 : | #define NOTHING() \ | ||
83 : | /* Nothing here */ | ||
84 : | |||
85 : | #pragma mark - | ||
86 : | |||
87 : | /* "Postprocessing" macros */ | ||
88 : | |||
89 : | #define AVRG_16() \ | ||
90 : | sums1 = (vector signed short)vec_mergeh(ox00,tmp);\ | ||
91 : | sums2 = (vector signed short)vec_mergel(ox00,tmp);\ | ||
92 : | sums1 = vec_add(sums1, (vector signed short)vec_mergeh(ox00,vec_src) );\ | ||
93 : | sums2 = vec_add(sums2, (vector signed short)vec_mergel(ox00,vec_src) );\ | ||
94 : | tmp = (vector unsigned char)vec_splat_u16(1);\ | ||
95 : | sums1 = vec_add(sums1, (vector signed short)tmp);\ | ||
96 : | sums2 = vec_add(sums2, (vector signed short)tmp);\ | ||
97 : | sums1 = vec_sub(sums1, vec_rnd);\ | ||
98 : | sums2 = vec_sub(sums2, vec_rnd);\ | ||
99 : | sums1 = vec_sra(sums1, (vector unsigned short)tmp);\ | ||
100 : | sums2 = vec_sra(sums2, (vector unsigned short)tmp);\ | ||
101 : | tmp = vec_packsu(sums1,sums2) | ||
102 : | |||
103 : | #define AVRG_UP_16_H() \ | ||
104 : | vec_src = vec_perm(vec_ld(1,Src),vec_ld(17,Src),vec_lvsl(1,Src));\ | ||
105 : | AVRG_16() | ||
106 : | |||
107 : | #define AVRG_UP_16_V() \ | ||
108 : | ((unsigned char*)&vec_src)[0] = Src[16 * BpS];\ | ||
109 : | vec_src = vec_perm(vec_src,vec_src,vec_lvsl(1,(unsigned char*)0));\ | ||
110 : | AVRG_16() | ||
111 : | |||
112 : | |||
113 : | #define AVRG_8() \ | ||
114 : | sums = (vector signed short)vec_mergeh(ox00, st);\ | ||
115 : | sums = vec_add(sums, (vector signed short)vec_mergeh(ox00, vec_src));\ | ||
116 : | st = (vector unsigned char)vec_splat_u16(1);\ | ||
117 : | sums = vec_add(sums, (vector signed short)st);\ | ||
118 : | sums = vec_sub(sums, vec_rnd);\ | ||
119 : | sums = vec_sra(sums, (vector unsigned short)st);\ | ||
120 : | st = vec_packsu(sums,sums) | ||
121 : | |||
122 : | |||
123 : | #define AVRG_UP_8() \ | ||
124 : | vec_src = vec_perm(vec_src, vec_src, vec_lvsl(1,(unsigned char*)0));\ | ||
125 : | AVRG_8() | ||
126 : | |||
127 : | #pragma mark - | ||
128 : | |||
129 : | /* Postprocessing Macros for the Pass_16 Add functions */ | ||
130 : | #define ADD_16_H()\ | ||
131 : | tmp = vec_avg(tmp, vec_perm(vec_ld(0,Dst),vec_ld(16,Dst),vec_lvsl(0,Dst))) | ||
132 : | |||
133 : | #define AVRG_ADD_16_H()\ | ||
134 : | AVRG_16();\ | ||
135 : | ADD_16_H() | ||
136 : | |||
137 : | #define AVRG_UP_ADD_16_H()\ | ||
138 : | AVRG_UP_16_H();\ | ||
139 : | ADD_16_H() | ||
140 : | |||
141 : | #define ADD_16_V() \ | ||
142 : | for(j = 0; j < 16; j++)\ | ||
143 : | ((unsigned char*)&vec_src)[j] = Dst[j * BpS];\ | ||
144 : | tmp = vec_avg(tmp, vec_src) | ||
145 : | |||
146 : | #define AVRG_ADD_16_V()\ | ||
147 : | AVRG_16();\ | ||
148 : | ADD_16_V() | ||
149 : | |||
150 : | #define AVRG_UP_ADD_16_V()\ | ||
151 : | AVRG_UP_16_V();\ | ||
152 : | ADD_16_V() | ||
153 : | |||
154 : | #pragma mark - | ||
155 : | |||
156 : | /* Postprocessing Macros for the Pass_8 Add functions */ | ||
157 : | #define ADD_8_H()\ | ||
158 : | sums = (vector signed short)vec_mergeh(ox00, st);\ | ||
159 : | tmp = (vector signed short)vec_mergeh(ox00,vec_perm(vec_ld(0,Dst),vec_ld(16,Dst),vec_lvsl(0,Dst)));\ | ||
160 : | sums = vec_avg(sums,tmp);\ | ||
161 : | st = vec_packsu(sums,sums) | ||
162 : | |||
163 : | #define AVRG_ADD_8_H()\ | ||
164 : | AVRG_8();\ | ||
165 : | ADD_8_H() | ||
166 : | |||
167 : | #define AVRG_UP_ADD_8_H()\ | ||
168 : | AVRG_UP_8();\ | ||
169 : | ADD_8_H() | ||
170 : | |||
171 : | |||
172 : | #define ADD_8_V()\ | ||
173 : | for(j = 0; j < 8; j++)\ | ||
174 : | ((short*)&tmp)[j] = (short)Dst[j * BpS];\ | ||
175 : | sums = (vector signed short)vec_mergeh(ox00,st);\ | ||
176 : | sums = vec_avg(sums,tmp);\ | ||
177 : | st = vec_packsu(sums,sums) | ||
178 : | |||
179 : | #define AVRG_ADD_8_V()\ | ||
180 : | AVRG_8();\ | ||
181 : | ADD_8_V() | ||
182 : | |||
183 : | #define AVRG_UP_ADD_8_V()\ | ||
184 : | AVRG_UP_8();\ | ||
185 : | ADD_8_V() | ||
186 : | |||
187 : | #pragma mark - | ||
188 : | |||
189 : | /* Load/Store Macros */ | ||
190 : | #define LOAD_H() \ | ||
191 : | vec_src = vec_perm(vec_ld(0,Src),vec_ld(16,Src),vec_lvsl(0,Src)) | ||
192 : | |||
193 : | #define LOAD_V_16() \ | ||
194 : | for(j = 0; j < 16; j++)\ | ||
195 : | ((unsigned char*)&vec_src)[j] = Src[j * BpS] | ||
196 : | |||
197 : | #define LOAD_V_8() \ | ||
198 : | for(j = 0; j <= 8; j++)\ | ||
199 : | ((unsigned char*)&vec_src)[j] = Src[j * BpS] | ||
200 : | |||
201 : | #define LOAD_UP_V_8() \ | ||
202 : | for(j = 0; j <= 9; j++)\ | ||
203 : | ((unsigned char*)&vec_src)[j] = Src[j * BpS] | ||
204 : | |||
205 : | #define STORE_H_16() \ | ||
206 : | mask = vec_lvsr(0,Dst);\ | ||
207 : | tmp = vec_perm(tmp,tmp,mask);\ | ||
208 : | mask = vec_perm(oxFF, ox00, mask);\ | ||
209 : | vec_st( vec_sel(tmp, vec_ld(0,Dst), mask), 0, Dst);\ | ||
210 : | vec_st( vec_sel(vec_ld(16,Dst), tmp, mask), 16, Dst) | ||
211 : | |||
212 : | #define STORE_V_16() \ | ||
213 : | for(j = 0; j < 16; j++)\ | ||
214 : | Dst[j * BpS] = ((unsigned char*)&tmp)[j] | ||
215 : | |||
216 : | |||
217 : | #define STORE_H_8() \ | ||
218 : | mask = vec_perm(mask_00ff, mask_00ff, vec_lvsr(0,Dst) );\ | ||
219 : | st = vec_sel(st, vec_ld(0,Dst), mask);\ | ||
220 : | vec_st(st, 0, Dst) | ||
221 : | |||
222 : | #define STORE_V_8() \ | ||
223 : | for(j = 0; j < 8; j++)\ | ||
224 : | Dst[j * BpS] = ((unsigned char*)&st)[j] | ||
225 : | |||
226 : | |||
227 : | #pragma mark - | ||
228 : | |||
229 : | /* Additional variable declaration/initialization macros */ | ||
230 : | |||
231 : | #define VARS_H_16()\ | ||
232 : | register vector unsigned char mask | ||
233 : | |||
234 : | |||
235 : | #define VARS_V()\ | ||
236 : | register unsigned j | ||
237 : | |||
238 : | |||
239 : | #define VARS_H_8()\ | ||
240 : | register vector unsigned char mask_00ff;\ | ||
241 : | register vector unsigned char mask;\ | ||
242 : | mask_00ff = vec_pack(vec_splat_u16(0),vec_splat_u16(-1)) | ||
243 : | |||
244 : | #pragma mark - | ||
245 : | |||
246 : | /* Function macros */ | ||
247 : | |||
248 : | #define MAKE_PASS_16(NAME, POSTPROC, ADDITIONAL_VARS, LOAD_SOURCE, STORE_DEST, NEXT_PIXEL, NEXT_LINE) \ | ||
249 : | void \ | ||
250 : | NAME(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t Rnd) \ | ||
251 : | {\ | ||
252 : | register vector signed short sums1,sums2;\ | ||
253 : | register vector unsigned char ox00;\ | ||
254 : | register vector unsigned char oxFF;\ | ||
255 : | register vector signed char firs;\ | ||
256 : | vector signed short vec_rnd;\ | ||
257 : | vector signed short s16Rnd;\ | ||
258 : | vector unsigned char vec_src;\ | ||
259 : | vector unsigned char tmp;\ | ||
260 : | \ | ||
261 : | register unsigned c;\ | ||
262 : | \ | ||
263 : | ADDITIONAL_VARS();\ | ||
264 : | \ | ||
265 : | ox00 = vec_splat_u8(0);\ | ||
266 : | oxFF = vec_splat_u8(-1);\ | ||
267 : | \ | ||
268 : | *((short*)&vec_rnd) = (short)Rnd;\ | ||
269 : | vec_rnd = vec_splat(vec_rnd,0);\ | ||
270 : | s16Rnd = vec_add(vec_splat_s16(8),vec_splat_s16(8));\ | ||
271 : | s16Rnd = vec_sub(s16Rnd, vec_rnd);\ | ||
272 : | \ | ||
273 : | c = ((1 << 24) | (16 << 16) | BpS);\ | ||
274 : | \ | ||
275 : | while(H-- > 0) {\ | ||
276 : | \ | ||
277 : | vec_dst(Src, c, 2);\ | ||
278 : | \ | ||
279 : | sums1 = s16Rnd;\ | ||
280 : | sums2 = s16Rnd;\ | ||
281 : | \ | ||
282 : | LOAD_SOURCE();\ | ||
283 : | \ | ||
284 : | PROCESS_FIR_16(0);\ | ||
285 : | PROCESS_FIR_16(1);\ | ||
286 : | PROCESS_FIR_16(2);\ | ||
287 : | PROCESS_FIR_16(3);\ | ||
288 : | \ | ||
289 : | PROCESS_FIR_16(4);\ | ||
290 : | PROCESS_FIR_16(5);\ | ||
291 : | PROCESS_FIR_16(6);\ | ||
292 : | PROCESS_FIR_16(7);\ | ||
293 : | \ | ||
294 : | PROCESS_FIR_16(8);\ | ||
295 : | PROCESS_FIR_16(9);\ | ||
296 : | PROCESS_FIR_16(10);\ | ||
297 : | PROCESS_FIR_16(11);\ | ||
298 : | \ | ||
299 : | PROCESS_FIR_16(12);\ | ||
300 : | PROCESS_FIR_16(13);\ | ||
301 : | PROCESS_FIR_16(14);\ | ||
302 : | PROCESS_FIR_16(15);\ | ||
303 : | \ | ||
304 : | firs = FIR_Tab_16[16];\ | ||
305 : | *((uint8_t*)&tmp) = Src[16*NEXT_PIXEL];\ | ||
306 : | tmp = vec_splat(tmp,0);\ | ||
307 : | \ | ||
308 : | sums1 = vec_mladd( (vector signed short)vec_mergeh(ox00,tmp),vec_unpackh(firs),sums1 );\ | ||
309 : | sums2 = vec_mladd( (vector signed short)vec_mergel(ox00,tmp),vec_unpackl(firs),sums2 );\ | ||
310 : | \ | ||
311 : | tmp = (vector unsigned char)vec_splat_u16(5);\ | ||
312 : | sums1 = vec_sra(sums1,(vector unsigned short)tmp);\ | ||
313 : | sums2 = vec_sra(sums2,(vector unsigned short)tmp);\ | ||
314 : | tmp = vec_packsu(sums1,sums2);\ | ||
315 : | \ | ||
316 : | POSTPROC();\ | ||
317 : | \ | ||
318 : | STORE_DEST();\ | ||
319 : | \ | ||
320 : | Src += NEXT_LINE;\ | ||
321 : | Dst += NEXT_LINE;\ | ||
322 : | }\ | ||
323 : | vec_dss(2);\ | ||
324 : | } | ||
325 : | |||
326 : | #define MAKE_PASS_8(NAME,POSTPROC,ADDITIONAL_VARS, LOAD_SOURCE, STORE_DEST, INC) \ | ||
327 : | void \ | ||
328 : | NAME(uint8_t *Dst, const uint8_t *Src, int32_t H, int32_t BpS, int32_t Rnd)\ | ||
329 : | {\ | ||
330 : | register vector signed short sums;\ | ||
331 : | register vector signed short firs;\ | ||
332 : | register vector unsigned char ox00;\ | ||
333 : | vector signed short tmp;\ | ||
334 : | vector signed short vec_rnd;\ | ||
335 : | vector signed short vec_rnd16;\ | ||
336 : | vector unsigned char vec_src;\ | ||
337 : | vector unsigned char st;\ | ||
338 : | \ | ||
339 : | ADDITIONAL_VARS();\ | ||
340 : | \ | ||
341 : | ox00 = vec_splat_u8(0);\ | ||
342 : | \ | ||
343 : | *((short*)&vec_rnd) = (short)Rnd;\ | ||
344 : | vec_rnd = vec_splat(vec_rnd,0);\ | ||
345 : | vec_rnd16 = vec_sub( vec_add(vec_splat_s16(8),vec_splat_s16(8)), vec_rnd );\ | ||
346 : | \ | ||
347 : | while(H-- > 0) {\ | ||
348 : | \ | ||
349 : | sums = vec_rnd16;\ | ||
350 : | LOAD_SOURCE();\ | ||
351 : | \ | ||
352 : | PROCESS_FIR_8(0);\ | ||
353 : | PROCESS_FIR_8(1);\ | ||
354 : | PROCESS_FIR_8(2);\ | ||
355 : | PROCESS_FIR_8(3);\ | ||
356 : | \ | ||
357 : | PROCESS_FIR_8(4);\ | ||
358 : | PROCESS_FIR_8(5);\ | ||
359 : | PROCESS_FIR_8(6);\ | ||
360 : | PROCESS_FIR_8(7);\ | ||
361 : | \ | ||
362 : | PROCESS_FIR_8(8);\ | ||
363 : | \ | ||
364 : | sums = vec_sra(sums, vec_splat_u16(5));\ | ||
365 : | st = vec_packsu(sums,sums);\ | ||
366 : | \ | ||
367 : | POSTPROC();\ | ||
368 : | \ | ||
369 : | STORE_DEST();\ | ||
370 : | \ | ||
371 : | Src += INC;\ | ||
372 : | Dst += INC;\ | ||
373 : | }\ | ||
374 : | } | ||
375 : | |||
376 : | |||
377 : | /* Create the actual Functions | ||
378 : | ***************************************/ | ||
379 : | |||
380 : | /* These functions assume no alignment */ | ||
381 : | MAKE_PASS_16(H_Pass_16_Altivec_C, NOTHING, VARS_H_16, LOAD_H, STORE_H_16, 1, BpS) | ||
382 : | MAKE_PASS_16(H_Pass_Avrg_16_Altivec_C, AVRG_16, VARS_H_16, LOAD_H, STORE_H_16, 1, BpS) | ||
383 : | MAKE_PASS_16(H_Pass_Avrg_Up_16_Altivec_C, AVRG_UP_16_H, VARS_H_16, LOAD_H, STORE_H_16, 1, BpS) | ||
384 : | |||
385 : | MAKE_PASS_16(V_Pass_16_Altivec_C, NOTHING, VARS_V, LOAD_V_16, STORE_V_16, BpS, 1) | ||
386 : | MAKE_PASS_16(V_Pass_Avrg_16_Altivec_C, AVRG_16, VARS_V, LOAD_V_16, STORE_V_16, BpS, 1) | ||
387 : | MAKE_PASS_16(V_Pass_Avrg_Up_16_Altivec_C, AVRG_UP_16_V, VARS_V, LOAD_V_16, STORE_V_16, BpS, 1) | ||
388 : | |||
389 : | |||
390 : | /* These functions assume: | ||
391 : | * Dst: 8 Byte aligned | ||
392 : | * BpS: Multiple of 8 | ||
393 : | */ | ||
394 : | MAKE_PASS_8(H_Pass_8_Altivec_C, NOTHING, VARS_H_8, LOAD_H, STORE_H_8, BpS) | ||
395 : | MAKE_PASS_8(H_Pass_Avrg_8_Altivec_C, AVRG_8, VARS_H_8, LOAD_H, STORE_H_8, BpS) | ||
396 : | MAKE_PASS_8(H_Pass_Avrg_Up_8_Altivec_C, AVRG_UP_8, VARS_H_8, LOAD_H, STORE_H_8, BpS) | ||
397 : | |||
398 : | /* These functions assume no alignment */ | ||
399 : | MAKE_PASS_8(V_Pass_8_Altivec_C, NOTHING, VARS_V, LOAD_V_8, STORE_V_8, 1) | ||
400 : | MAKE_PASS_8(V_Pass_Avrg_8_Altivec_C, AVRG_8, VARS_V, LOAD_V_8, STORE_V_8, 1) | ||
401 : | MAKE_PASS_8(V_Pass_Avrg_Up_8_Altivec_C, AVRG_UP_8, VARS_V, LOAD_UP_V_8, STORE_V_8, 1) | ||
402 : | |||
403 : | |||
404 : | /* These functions assume no alignment */ | ||
405 : | MAKE_PASS_16(H_Pass_16_Add_Altivec_C, ADD_16_H, VARS_H_16, LOAD_H, STORE_H_16, 1, BpS) | ||
406 : | MAKE_PASS_16(H_Pass_Avrg_16_Add_Altivec_C, AVRG_ADD_16_H, VARS_H_16, LOAD_H, STORE_H_16, 1, BpS) | ||
407 : | MAKE_PASS_16(H_Pass_Avrg_Up_16_Add_Altivec_C, AVRG_UP_ADD_16_H, VARS_H_16, LOAD_H, STORE_H_16, 1, BpS) | ||
408 : | |||
409 : | MAKE_PASS_16(V_Pass_16_Add_Altivec_C, ADD_16_V, VARS_V, LOAD_V_16, STORE_V_16, BpS, 1) | ||
410 : | MAKE_PASS_16(V_Pass_Avrg_16_Add_Altivec_C, AVRG_ADD_16_V, VARS_V, LOAD_V_16, STORE_V_16, BpS, 1) | ||
411 : | MAKE_PASS_16(V_Pass_Avrg_Up_16_Add_Altivec_C, AVRG_UP_ADD_16_V, VARS_V, LOAD_V_16, STORE_V_16, BpS, 1) | ||
412 : | |||
413 : | |||
414 : | /* These functions assume: | ||
415 : | * Dst: 8 Byte aligned | ||
416 : | * BpS: Multiple of 8 | ||
417 : | */ | ||
418 : | MAKE_PASS_8(H_Pass_8_Add_Altivec_C, ADD_8_H, VARS_H_8, LOAD_H, STORE_H_8, BpS) | ||
419 : | MAKE_PASS_8(H_Pass_Avrg_8_Add_Altivec_C, AVRG_ADD_8_H, VARS_H_8, LOAD_H, STORE_H_8, BpS) | ||
420 : | MAKE_PASS_8(H_Pass_Avrg_Up_8_Add_Altivec_C, AVRG_UP_ADD_8_H, VARS_H_8, LOAD_H, STORE_H_8, BpS) | ||
421 : | |||
422 : | |||
423 : | /* These functions assume no alignment */ | ||
424 : | MAKE_PASS_8(V_Pass_8_Add_Altivec_C, ADD_8_V, VARS_V, LOAD_V_8, STORE_V_8, 1) | ||
425 : | MAKE_PASS_8(V_Pass_Avrg_8_Add_Altivec_C, AVRG_ADD_8_V, VARS_V, LOAD_V_8, STORE_V_8, 1) | ||
426 : | MAKE_PASS_8(V_Pass_Avrg_Up_8_Add_Altivec_C, AVRG_UP_ADD_8_V, VARS_V, LOAD_UP_V_8, STORE_V_8, 1) |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |