Parent Directory | Revision Log
Revision 1413 - (view) (download)
1 : | edgomez | 1413 | /***************************************************************************** |
2 : | * | ||
3 : | * XVID MPEG-4 VIDEO CODEC | ||
4 : | * - Altivec 8bit<->16bit transfer - | ||
5 : | * | ||
6 : | * Copyright(C) 2004 Christoph Naegeli <chn@kbw.ch> | ||
7 : | * | ||
8 : | * This program is free software ; you can redistribute it and/or modify | ||
9 : | * it under the terms of the GNU General Public License as published by | ||
10 : | * the Free Software Foundation ; either version 2 of the License, or | ||
11 : | * (at your option) any later version. | ||
12 : | * | ||
13 : | * This program is distributed in the hope that it will be useful, | ||
14 : | * but WITHOUT ANY WARRANTY ; without even the implied warranty of | ||
15 : | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
16 : | * GNU General Public License for more details. | ||
17 : | * | ||
18 : | * You should have received a copy of the GNU General Public License | ||
19 : | * along with this program ; if not, write to the Free Software | ||
20 : | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
21 : | * | ||
22 : | * $Id: mem_transfer_altivec.c,v 1.1 2004-04-05 20:36:37 edgomez Exp $ | ||
23 : | * | ||
24 : | ****************************************************************************/ | ||
25 : | |||
26 : | #ifdef HAVE_ALTIVEC_H | ||
27 : | #include <altivec.h> | ||
28 : | #endif | ||
29 : | |||
30 : | #include "../../portab.h" | ||
31 : | |||
32 : | |||
33 : | /* Turn this on if you like debugging the alignment */ | ||
34 : | #undef DEBUG | ||
35 : | |||
36 : | #include <stdio.h> | ||
37 : | |||
38 : | /* | ||
39 : | * This Function assumes dst is 16 byte aligned src is unaligned and stride is | ||
40 : | * a multiple of 16. | ||
41 : | */ | ||
42 : | |||
43 : | #define COPY8TO16() \ | ||
44 : | s = vec_perm(vec_ld(0, src), vec_ld(16, src), perm); /* load the next 8 bytes */ \ | ||
45 : | *dst++ = (vector signed short)vec_mergeh(zerovec, s); /* convert and save */ \ | ||
46 : | src += stride | ||
47 : | |||
48 : | void | ||
49 : | transfer_8to16copy_altivec_c(vector signed short *dst, | ||
50 : | uint8_t * src, | ||
51 : | uint32_t stride) | ||
52 : | { | ||
53 : | register vector unsigned char perm; | ||
54 : | register vector unsigned char s; | ||
55 : | register vector unsigned char zerovec; | ||
56 : | |||
57 : | #ifdef DEBUG | ||
58 : | /* check the alignment */ | ||
59 : | if(((unsigned long)dst) & 0xf) | ||
60 : | fprintf(stderr, "transfer_8to16copy_altivec:incorrect align, dst: %x\n", dst); | ||
61 : | if(stride & 0xf) | ||
62 : | fprintf(stderr, "transfer_8to16copy_altivec:incorrect align, stride: %u\n", stride); | ||
63 : | #endif | ||
64 : | |||
65 : | /* initialisation */ | ||
66 : | perm = vec_lvsl(0, src); | ||
67 : | zerovec = vec_splat_u8(0); | ||
68 : | |||
69 : | /* to the actual copy */ | ||
70 : | COPY8TO16(); | ||
71 : | COPY8TO16(); | ||
72 : | COPY8TO16(); | ||
73 : | COPY8TO16(); | ||
74 : | |||
75 : | COPY8TO16(); | ||
76 : | COPY8TO16(); | ||
77 : | COPY8TO16(); | ||
78 : | COPY8TO16(); | ||
79 : | } | ||
80 : | |||
81 : | |||
82 : | /* | ||
83 : | * This function assumes dst is 8 byte aligned and stride is a multiple of 8 | ||
84 : | * src may be unaligned | ||
85 : | */ | ||
86 : | |||
87 : | #define COPY16TO8() \ | ||
88 : | s = vec_perm(src[0], src[1], load_src_perm); \ | ||
89 : | packed = vec_packsu(s, vec_splat_s16(0)); \ | ||
90 : | mask = vec_perm(mask_stencil, mask_stencil, vec_lvsl(0, dst)); \ | ||
91 : | packed = vec_perm(packed, packed, vec_lvsl(0, dst)); \ | ||
92 : | packed = vec_sel(packed, vec_ld(0, dst), mask); \ | ||
93 : | vec_st(packed, 0, dst); \ | ||
94 : | src++; \ | ||
95 : | dst += stride | ||
96 : | |||
97 : | void transfer_16to8copy_altivec_c(uint8_t *dst, | ||
98 : | vector signed short *src, | ||
99 : | uint32_t stride) | ||
100 : | { | ||
101 : | register vector signed short s; | ||
102 : | register vector unsigned char packed; | ||
103 : | register vector unsigned char mask_stencil; | ||
104 : | register vector unsigned char mask; | ||
105 : | register vector unsigned char load_src_perm; | ||
106 : | |||
107 : | #ifdef DEBUG | ||
108 : | /* if this is on, print alignment errors */ | ||
109 : | if(((unsigned long) dst) & 0x7) | ||
110 : | fprintf(stderr, "transfer_16to8copy_altivec:incorrect align, dst %x\n", dst); | ||
111 : | if(stride & 0x7) | ||
112 : | fprintf(stderr, "transfer_16to8copy_altivec:incorrect align, stride %u\n", stride); | ||
113 : | #endif | ||
114 : | /* Initialisation stuff */ | ||
115 : | load_src_perm = vec_lvsl(0, (unsigned char*)src); | ||
116 : | mask_stencil = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); | ||
117 : | |||
118 : | COPY16TO8(); | ||
119 : | COPY16TO8(); | ||
120 : | COPY16TO8(); | ||
121 : | COPY16TO8(); | ||
122 : | |||
123 : | COPY16TO8(); | ||
124 : | COPY16TO8(); | ||
125 : | COPY16TO8(); | ||
126 : | COPY16TO8(); | ||
127 : | } | ||
128 : | |||
129 : | |||
130 : | |||
131 : | /* | ||
132 : | * This function assumes dst is 8 byte aligned and src is unaligned. Stride has | ||
133 : | * to be a multiple of 8 | ||
134 : | */ | ||
135 : | |||
136 : | #define COPY8TO8() \ | ||
137 : | tmp = vec_perm(vec_ld(0, src), vec_ld(16, src), vec_lvsl(0, src)); \ | ||
138 : | tmp = vec_sel(vec_perm(tmp, tmp, vec_lvsl(0, dst)), vec_ld(0, dst), vec_perm(mask, mask, vec_lvsl(0, dst))); \ | ||
139 : | vec_st(tmp, 0, dst); \ | ||
140 : | dst += stride; \ | ||
141 : | src += stride | ||
142 : | |||
143 : | void | ||
144 : | transfer8x8_copy_altivec_c( uint8_t * dst, | ||
145 : | uint8_t * src, | ||
146 : | uint32_t stride) | ||
147 : | { | ||
148 : | register vector unsigned char tmp; | ||
149 : | register vector unsigned char mask; | ||
150 : | |||
151 : | #ifdef DEBUG | ||
152 : | if(((unsigned long)dst) & 0x7) | ||
153 : | fprintf("transfer8x8_copy_altivec:incorrect align, dst: %x\n", dst); | ||
154 : | if(stride & 0x7) | ||
155 : | fprintf("transfer8x8_copy_altivec:incorrect stride, stride: %u\n", stride); | ||
156 : | #endif | ||
157 : | mask = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); | ||
158 : | |||
159 : | COPY8TO8(); | ||
160 : | COPY8TO8(); | ||
161 : | COPY8TO8(); | ||
162 : | COPY8TO8(); | ||
163 : | |||
164 : | COPY8TO8(); | ||
165 : | COPY8TO8(); | ||
166 : | COPY8TO8(); | ||
167 : | COPY8TO8(); | ||
168 : | } | ||
169 : | |||
170 : | |||
171 : | /* | ||
172 : | * This function assumes dct is 16 bytes aligned, cur and ref are 8 bytes | ||
173 : | * aligned and stride is a multiple of 8 | ||
174 : | */ | ||
175 : | |||
176 : | #define SUB8TO16() \ | ||
177 : | c = vec_perm(vec_ld(0, cur), vec_ld(16, cur), vec_lvsl(0, cur)); \ | ||
178 : | r = vec_perm(vec_ld(0, ref), vec_ld(16, ref), vec_lvsl(0, ref)); \ | ||
179 : | t = vec_sel(vec_perm(r, r, vec_lvsl(0, cur)), vec_ld(0, cur), vec_perm(mask, mask, vec_lvsl(0, cur))); \ | ||
180 : | vec_st(t, 0, cur); \ | ||
181 : | t = vec_splat_u8(0); \ | ||
182 : | cs = (vector signed short)vec_mergeh(t, c); \ | ||
183 : | rs = (vector signed short)vec_mergeh(t, r); \ | ||
184 : | *dct++ = vec_sub(cs, rs); \ | ||
185 : | cur += stride; \ | ||
186 : | ref += stride | ||
187 : | |||
188 : | void | ||
189 : | transfer_8to16sub_altivec_c(vector signed short *dct, | ||
190 : | uint8_t *cur, | ||
191 : | uint8_t *ref, | ||
192 : | uint32_t stride) | ||
193 : | { | ||
194 : | vector unsigned char c; | ||
195 : | vector unsigned char r; | ||
196 : | vector unsigned char t; | ||
197 : | vector unsigned char mask; | ||
198 : | vector signed short cs; | ||
199 : | vector signed short rs; | ||
200 : | |||
201 : | #ifdef DEBUG | ||
202 : | if(((unsigned long)dct) & 0xf) | ||
203 : | fprintf(stderr, "transfer_8to16sub_altivec:incorrect align, dct: %x\n", dct); | ||
204 : | if(((unsigned long)cur) & 0x7) | ||
205 : | fprintf(stderr, "transfer_8to16sub_altivec:incorrect align, cur: %x\n", cur); | ||
206 : | if(((unsigned long)ref) & 0x7) | ||
207 : | fprintf(stderr, "transfer_8to16sub_altivec:incorrect align, ref: %x\n", ref); | ||
208 : | if(stride & 0x7) | ||
209 : | fprintf(stderr, "transfer_8to16sub_altivec:incorrect align, stride: %u\n", stride); | ||
210 : | #endif | ||
211 : | |||
212 : | /* Initialisation */ | ||
213 : | mask = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); | ||
214 : | |||
215 : | SUB8TO16(); | ||
216 : | SUB8TO16(); | ||
217 : | SUB8TO16(); | ||
218 : | SUB8TO16(); | ||
219 : | |||
220 : | SUB8TO16(); | ||
221 : | SUB8TO16(); | ||
222 : | SUB8TO16(); | ||
223 : | SUB8TO16(); | ||
224 : | } | ||
225 : | |||
226 : | /* | ||
227 : | * This function assumes that dct is 16 bytes aligned, cur and ref is 8 bytes aligned | ||
228 : | * and stride is a multiple of 8 | ||
229 : | */ | ||
230 : | |||
231 : | #define SUBRO8TO16() \ | ||
232 : | c = vec_perm(vec_ld(0, cur), vec_ld(16, cur), vec_lvsl(0, cur)); \ | ||
233 : | r = vec_perm(vec_ld(0, ref), vec_ld(16, ref), vec_lvsl(0, ref)); \ | ||
234 : | cs = (vector signed short)vec_mergeh(z, c); \ | ||
235 : | rs = (vector signed short)vec_mergeh(z, r); \ | ||
236 : | *dct++ = vec_sub(cs, rs); \ | ||
237 : | cur += stride; \ | ||
238 : | ref += stride | ||
239 : | |||
240 : | void | ||
241 : | transfer_8to16subro_altivec_c(vector signed short *dct, | ||
242 : | uint8_t *cur, | ||
243 : | uint8_t *ref, | ||
244 : | uint32_t stride) | ||
245 : | { | ||
246 : | register vector unsigned char c; | ||
247 : | register vector unsigned char r; | ||
248 : | register vector unsigned char z; | ||
249 : | register vector signed short cs; | ||
250 : | register vector signed short rs; | ||
251 : | |||
252 : | #ifdef DEBUG | ||
253 : | /* if this is on, print alignment errors */ | ||
254 : | if(((unsigned long)dct) & 0xf) | ||
255 : | fprintf(stderr, "transfer_8to16subro_altivec_c:incorrect align, dct: %x\n", dct); | ||
256 : | if(((unsigned long)cur) & 0x7) | ||
257 : | fprintf(stderr, "transfer_8to16subro_altivec_c:incorrect align, cur: %x\n", cur); | ||
258 : | if(((unsigned long)ref) & 0x7) | ||
259 : | fprintf(stderr, "transfer_8to16subro_altivec_c:incorrect align, ref: %x\n", ref); | ||
260 : | if(stride & 0x7) | ||
261 : | fprintf(stderr, "transfer_8to16subro_altivec_c:incorrect align, stride: %u\n", stride); | ||
262 : | #endif | ||
263 : | |||
264 : | z = vec_splat_u8(0); | ||
265 : | |||
266 : | SUBRO8TO16(); | ||
267 : | SUBRO8TO16(); | ||
268 : | SUBRO8TO16(); | ||
269 : | SUBRO8TO16(); | ||
270 : | |||
271 : | SUBRO8TO16(); | ||
272 : | SUBRO8TO16(); | ||
273 : | SUBRO8TO16(); | ||
274 : | SUBRO8TO16(); | ||
275 : | } | ||
276 : | |||
277 : | |||
278 : | /* | ||
279 : | * This function assumes: | ||
280 : | * dct: 16 bytes alignment | ||
281 : | * cur: 8 bytes alignment | ||
282 : | * ref1: unaligned | ||
283 : | * ref2: unaligned | ||
284 : | * stride: multiple of 8 | ||
285 : | */ | ||
286 : | |||
287 : | #define SUB28TO16() \ | ||
288 : | r1 = vec_perm(vec_ld(0, ref1), vec_ld(16, ref1), vec_lvsl(0, ref1)); \ | ||
289 : | r2 = vec_perm(vec_ld(0, ref2), vec_ld(16, ref2), vec_lvsl(0, ref2)); \ | ||
290 : | c = vec_perm(vec_ld(0, cur), vec_ld(16, cur), vec_lvsl(0, cur)); \ | ||
291 : | r = vec_avg(r1, r2); \ | ||
292 : | vec_st(vec_sel(r, vec_ld(0, cur), vec_perm(mask, mask, vec_lvsl(0, cur))), 0, cur); \ | ||
293 : | cs = (vector signed short)vec_mergeh(vec_splat_u8(0), c); \ | ||
294 : | rs = (vector signed short)vec_mergeh(vec_splat_u8(0), r); \ | ||
295 : | *dct++ = vec_sub(cs, rs); \ | ||
296 : | cur += stride; \ | ||
297 : | ref1 += stride; \ | ||
298 : | ref2 += stride | ||
299 : | |||
300 : | void | ||
301 : | transfer_8to16sub2_altivec_c(vector signed short *dct, | ||
302 : | uint8_t *cur, | ||
303 : | uint8_t *ref1, | ||
304 : | uint8_t *ref2, | ||
305 : | const uint32_t stride) | ||
306 : | { | ||
307 : | vector unsigned char r1; | ||
308 : | vector unsigned char r2; | ||
309 : | vector unsigned char r; | ||
310 : | vector unsigned char c; | ||
311 : | vector unsigned char mask; | ||
312 : | vector signed short cs; | ||
313 : | vector signed short rs; | ||
314 : | |||
315 : | #ifdef DEBUG | ||
316 : | /* Dump alignment erros if DEBUG is set */ | ||
317 : | if(((unsigned long)dct) & 0xf) | ||
318 : | fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, dct: %x\n", dct); | ||
319 : | if(((unsigned long)cur) & 0x7) | ||
320 : | fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, cur: %x\n", cur); | ||
321 : | if(stride & 0x7) | ||
322 : | fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, dct: %u\n", stride); | ||
323 : | #endif | ||
324 : | |||
325 : | /* Initialisation */ | ||
326 : | mask = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); | ||
327 : | |||
328 : | SUB28TO16(); | ||
329 : | SUB28TO16(); | ||
330 : | SUB28TO16(); | ||
331 : | SUB28TO16(); | ||
332 : | |||
333 : | SUB28TO16(); | ||
334 : | SUB28TO16(); | ||
335 : | SUB28TO16(); | ||
336 : | SUB28TO16(); | ||
337 : | } | ||
338 : | |||
339 : | |||
340 : | |||
341 : | /* | ||
342 : | * This function assumes: | ||
343 : | * dst: 8 byte aligned | ||
344 : | * src: unaligned | ||
345 : | * stride: multiple of 8 | ||
346 : | */ | ||
347 : | |||
348 : | #define ADD16TO8() \ | ||
349 : | s = vec_perm(vec_ld(0, src), vec_ld(16, src), vec_lvsl(0, src)); \ | ||
350 : | d = vec_perm(vec_ld(0, dst), vec_ld(16, dst), vec_lvsl(0, dst)); \ | ||
351 : | ds = (vector signed short)vec_mergeh(vec_splat_u8(0), d); \ | ||
352 : | ds = vec_add(ds, s); \ | ||
353 : | packed = vec_packsu(ds, vec_splat_s16(0)); \ | ||
354 : | mask = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); \ | ||
355 : | mask = vec_perm(mask, mask, vec_lvsl(0, dst)); \ | ||
356 : | packed = vec_perm(packed, packed, vec_lvsl(0, dst)); \ | ||
357 : | packed = vec_sel(packed, vec_ld(0, dst), mask); \ | ||
358 : | vec_st(packed, 0, dst); \ | ||
359 : | src += 8; \ | ||
360 : | dst += stride | ||
361 : | |||
362 : | void | ||
363 : | transfer_16to8add_altivec_c(uint8_t *dst, | ||
364 : | int16_t *src, | ||
365 : | uint32_t stride) | ||
366 : | { | ||
367 : | vector signed short s; | ||
368 : | vector signed short ds; | ||
369 : | vector unsigned char d; | ||
370 : | vector unsigned char packed; | ||
371 : | vector unsigned char mask; | ||
372 : | |||
373 : | #ifdef DEBUG | ||
374 : | /* if this is set, dump alignment errors */ | ||
375 : | if(((unsigned long)dst) & 0x7) | ||
376 : | fprintf(stderr, "transfer_16to8add_altivec_c:incorrect align, dst: %x\n", dst); | ||
377 : | if(stride & 0x7) | ||
378 : | fprintf(stderr, "transfer_16to8add_altivec_c:incorrect align, dst: %u\n", stride); | ||
379 : | #endif | ||
380 : | |||
381 : | ADD16TO8(); | ||
382 : | ADD16TO8(); | ||
383 : | ADD16TO8(); | ||
384 : | ADD16TO8(); | ||
385 : | |||
386 : | ADD16TO8(); | ||
387 : | ADD16TO8(); | ||
388 : | ADD16TO8(); | ||
389 : | ADD16TO8(); | ||
390 : | } |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |