19 |
* along with this program ; if not, write to the Free Software |
* along with this program ; if not, write to the Free Software |
20 |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
21 |
* |
* |
22 |
* $Id: mem_transfer_altivec.c,v 1.1 2004-04-05 20:36:37 edgomez Exp $ |
* $Id: mem_transfer_altivec.c,v 1.2 2004-12-09 23:02:54 edgomez Exp $ |
23 |
* |
* |
24 |
****************************************************************************/ |
****************************************************************************/ |
25 |
|
|
35 |
|
|
36 |
#include <stdio.h> |
#include <stdio.h> |
37 |
|
|
38 |
/* |
/* This function assumes: |
39 |
* This Function assumes dst is 16 byte aligned src is unaligned and stride is |
* dst: 16 byte aligned |
|
* a multiple of 16. |
|
40 |
*/ |
*/ |
41 |
|
|
42 |
#define COPY8TO16() \ |
#define COPY8TO16() \ |
43 |
s = vec_perm(vec_ld(0, src), vec_ld(16, src), perm); /* load the next 8 bytes */ \ |
s = vec_perm(vec_ld(0,src),vec_ld(16,src),vec_lvsl(0,src));\ |
44 |
*dst++ = (vector signed short)vec_mergeh(zerovec, s); /* convert and save */ \ |
vec_st((vector signed short)vec_mergeh(zerovec,s),0,dst);\ |
45 |
src += stride |
src += stride;\ |
46 |
|
dst += 8 |
47 |
|
|
48 |
void |
void |
49 |
transfer_8to16copy_altivec_c(vector signed short *dst, |
transfer_8to16copy_altivec_c(int16_t *dst, |
50 |
uint8_t * src, |
uint8_t * src, |
51 |
uint32_t stride) |
uint32_t stride) |
52 |
{ |
{ |
|
register vector unsigned char perm; |
|
53 |
register vector unsigned char s; |
register vector unsigned char s; |
54 |
register vector unsigned char zerovec; |
register vector unsigned char zerovec; |
55 |
|
|
56 |
#ifdef DEBUG |
#ifdef DEBUG |
57 |
/* check the alignment */ |
/* Check the alignment */ |
58 |
if(((unsigned long)dst) & 0xf) |
if((long)dst & 0xf) |
59 |
fprintf(stderr, "transfer_8to16copy_altivec:incorrect align, dst: %x\n", dst); |
fprintf(stderr, "transfer_8to16copy_altivec_c:incorrect align, dst: %lx\n", (long)dst); |
|
if(stride & 0xf) |
|
|
fprintf(stderr, "transfer_8to16copy_altivec:incorrect align, stride: %u\n", stride); |
|
60 |
#endif |
#endif |
61 |
|
|
62 |
/* initialisation */ |
/* initialization */ |
|
perm = vec_lvsl(0, src); |
|
63 |
zerovec = vec_splat_u8(0); |
zerovec = vec_splat_u8(0); |
64 |
|
|
|
/* to the actual copy */ |
|
65 |
COPY8TO16(); |
COPY8TO16(); |
66 |
COPY8TO16(); |
COPY8TO16(); |
67 |
COPY8TO16(); |
COPY8TO16(); |
102 |
#ifdef DEBUG |
#ifdef DEBUG |
103 |
/* if this is on, print alignment errors */ |
/* if this is on, print alignment errors */ |
104 |
if(((unsigned long) dst) & 0x7) |
if(((unsigned long) dst) & 0x7) |
105 |
fprintf(stderr, "transfer_16to8copy_altivec:incorrect align, dst %x\n", dst); |
fprintf(stderr, "transfer_16to8copy_altivec:incorrect align, dst %lx\n", (long)dst); |
106 |
if(stride & 0x7) |
if(stride & 0x7) |
107 |
fprintf(stderr, "transfer_16to8copy_altivec:incorrect align, stride %u\n", stride); |
fprintf(stderr, "transfer_16to8copy_altivec:incorrect align, stride %u\n", stride); |
108 |
#endif |
#endif |
130 |
|
|
131 |
#define COPY8TO8() \ |
#define COPY8TO8() \ |
132 |
tmp = vec_perm(vec_ld(0, src), vec_ld(16, src), vec_lvsl(0, src)); \ |
tmp = vec_perm(vec_ld(0, src), vec_ld(16, src), vec_lvsl(0, src)); \ |
133 |
tmp = vec_sel(vec_perm(tmp, tmp, vec_lvsl(0, dst)), vec_ld(0, dst), vec_perm(mask, mask, vec_lvsl(0, dst))); \ |
t0 = vec_perm(tmp, tmp, vec_lvsl(0, dst));\ |
134 |
|
t1 = vec_perm(mask, mask, vec_lvsl(0, dst));\ |
135 |
|
tmp = vec_sel(t0, vec_ld(0, dst), t1);\ |
136 |
vec_st(tmp, 0, dst); \ |
vec_st(tmp, 0, dst); \ |
137 |
dst += stride; \ |
dst += stride; \ |
138 |
src += stride |
src += stride |
144 |
{ |
{ |
145 |
register vector unsigned char tmp; |
register vector unsigned char tmp; |
146 |
register vector unsigned char mask; |
register vector unsigned char mask; |
147 |
|
register vector unsigned char t0, t1; |
148 |
|
|
149 |
#ifdef DEBUG |
#ifdef DEBUG |
150 |
if(((unsigned long)dst) & 0x7) |
if(((unsigned long)dst) & 0x7) |
151 |
fprintf("transfer8x8_copy_altivec:incorrect align, dst: %x\n", dst); |
fprintf(stderr, "transfer8x8_copy_altivec:incorrect align, dst: %lx\n", (long)dst); |
152 |
if(stride & 0x7) |
if(stride & 0x7) |
153 |
fprintf("transfer8x8_copy_altivec:incorrect stride, stride: %u\n", stride); |
fprintf(stderr, "transfer8x8_copy_altivec:incorrect stride, stride: %u\n", stride); |
154 |
#endif |
#endif |
155 |
mask = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); |
mask = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); |
156 |
|
|
166 |
} |
} |
167 |
|
|
168 |
|
|
|
/* |
|
|
* This function assumes dct is 16 bytes aligned, cur and ref are 8 bytes |
|
|
* aligned and stride is a multiple of 8 |
|
|
*/ |
|
|
|
|
169 |
#define SUB8TO16() \ |
#define SUB8TO16() \ |
170 |
c = vec_perm(vec_ld(0, cur), vec_ld(16, cur), vec_lvsl(0, cur)); \ |
c = vec_perm(vec_ld(0, cur), vec_ld(16, cur), vec_lvsl(0, cur)); \ |
171 |
r = vec_perm(vec_ld(0, ref), vec_ld(16, ref), vec_lvsl(0, ref)); \ |
r = vec_perm(vec_ld(0, ref), vec_ld(16, ref), vec_lvsl(0, ref)); \ |
172 |
t = vec_sel(vec_perm(r, r, vec_lvsl(0, cur)), vec_ld(0, cur), vec_perm(mask, mask, vec_lvsl(0, cur))); \ |
cs = (vector signed short)vec_mergeh(ox00,c);\ |
173 |
vec_st(t, 0, cur); \ |
rs = (vector signed short)vec_mergeh(ox00,r);\ |
174 |
t = vec_splat_u8(0); \ |
\ |
175 |
cs = (vector signed short)vec_mergeh(t, c); \ |
c = vec_lvsr(0,cur);\ |
176 |
rs = (vector signed short)vec_mergeh(t, r); \ |
mask = vec_perm(mask_00ff, mask_00ff, c);\ |
177 |
*dct++ = vec_sub(cs, rs); \ |
r = vec_perm(r, r, c);\ |
178 |
|
r = vec_sel(r, vec_ld(0,cur), mask);\ |
179 |
|
vec_st(r,0,cur);\ |
180 |
|
vec_st( vec_sub(cs,rs), 0, dct );\ |
181 |
|
\ |
182 |
|
dct += 8;\ |
183 |
cur += stride; \ |
cur += stride; \ |
184 |
ref += stride |
ref += stride |
185 |
|
|
186 |
|
|
187 |
|
/* This function assumes: |
188 |
|
* dct: 16 Byte aligned |
189 |
|
* cur: 8 Byte aligned |
190 |
|
* stride: multiple of 8 |
191 |
|
*/ |
192 |
|
|
193 |
void |
void |
194 |
transfer_8to16sub_altivec_c(vector signed short *dct, |
transfer_8to16sub_altivec_c(int16_t * dct, |
195 |
uint8_t *cur, |
uint8_t *cur, |
196 |
uint8_t *ref, |
uint8_t *ref, |
197 |
uint32_t stride) |
const uint32_t stride) |
198 |
{ |
{ |
199 |
vector unsigned char c; |
register vector unsigned char c,r; |
200 |
vector unsigned char r; |
register vector unsigned char ox00; |
201 |
vector unsigned char t; |
register vector unsigned char mask_00ff; |
202 |
vector unsigned char mask; |
register vector unsigned char mask; |
203 |
vector signed short cs; |
register vector signed short cs,rs; |
|
vector signed short rs; |
|
204 |
|
|
205 |
#ifdef DEBUG |
#ifdef DEBUG |
206 |
if(((unsigned long)dct) & 0xf) |
if((long)dct & 0xf) |
207 |
fprintf(stderr, "transfer_8to16sub_altivec:incorrect align, dct: %x\n", dct); |
fprintf(stderr, "transfer_8to16sub_altivec_c:incorrect align, dct: %lx\n", (long)dct); |
208 |
if(((unsigned long)cur) & 0x7) |
if((long)cur & 0x7) |
209 |
fprintf(stderr, "transfer_8to16sub_altivec:incorrect align, cur: %x\n", cur); |
fprintf(stderr, "transfer_8to16sub_altivec_c:incorrect align, cur: %lx\n", (long)cur); |
|
if(((unsigned long)ref) & 0x7) |
|
|
fprintf(stderr, "transfer_8to16sub_altivec:incorrect align, ref: %x\n", ref); |
|
210 |
if(stride & 0x7) |
if(stride & 0x7) |
211 |
fprintf(stderr, "transfer_8to16sub_altivec:incorrect align, stride: %u\n", stride); |
fprintf(stderr, "transfer_8to16sub_altivec_c:incorrect stride, stride: %lu\n", (long)stride); |
212 |
#endif |
#endif |
213 |
|
/* initialize */ |
214 |
/* Initialisation */ |
ox00 = vec_splat_u8(0); |
215 |
mask = vec_pack(vec_splat_u16(0), vec_splat_u16(-1)); |
mask_00ff = vec_pack((vector unsigned short)ox00,vec_splat_u16(-1)); |
216 |
|
|
217 |
SUB8TO16(); |
SUB8TO16(); |
218 |
SUB8TO16(); |
SUB8TO16(); |
225 |
SUB8TO16(); |
SUB8TO16(); |
226 |
} |
} |
227 |
|
|
|
/* |
|
|
* This function assumes that dct is 16 bytes aligned, cur and ref is 8 bytes aligned |
|
|
* and stride is a multiple of 8 |
|
|
*/ |
|
228 |
|
|
229 |
#define SUBRO8TO16() \ |
#define SUBRO8TO16() \ |
230 |
c = vec_perm(vec_ld(0, cur), vec_ld(16, cur), vec_lvsl(0, cur)); \ |
c = vec_perm(vec_ld(0, cur), vec_ld(16, cur), vec_lvsl(0, cur)); \ |
231 |
r = vec_perm(vec_ld(0, ref), vec_ld(16, ref), vec_lvsl(0, ref)); \ |
r = vec_perm(vec_ld(0, ref), vec_ld(16, ref), vec_lvsl(0, ref)); \ |
232 |
cs = (vector signed short)vec_mergeh(z, c); \ |
cs = (vector signed short)vec_mergeh(z, c); \ |
233 |
rs = (vector signed short)vec_mergeh(z, r); \ |
rs = (vector signed short)vec_mergeh(z, r); \ |
234 |
*dct++ = vec_sub(cs, rs); \ |
vec_st( vec_sub(cs,rs), 0, dct );\ |
235 |
|
dct += 8;\ |
236 |
cur += stride; \ |
cur += stride; \ |
237 |
ref += stride |
ref += stride |
238 |
|
|
239 |
|
|
240 |
|
/* This function assumes: |
241 |
|
* dct: 16 Byte aligned |
242 |
|
*/ |
243 |
|
|
244 |
void |
void |
245 |
transfer_8to16subro_altivec_c(vector signed short *dct, |
transfer_8to16subro_altivec_c(int16_t * dct, |
246 |
uint8_t *cur, |
const uint8_t * cur, |
247 |
uint8_t *ref, |
const uint8_t * ref, |
248 |
uint32_t stride) |
const uint32_t stride) |
249 |
{ |
{ |
250 |
register vector unsigned char c; |
register vector unsigned char c; |
251 |
register vector unsigned char r; |
register vector unsigned char r; |
254 |
register vector signed short rs; |
register vector signed short rs; |
255 |
|
|
256 |
#ifdef DEBUG |
#ifdef DEBUG |
257 |
/* if this is on, print alignment errors */ |
/* Check the alignment assumptions if this is on */ |
258 |
if(((unsigned long)dct) & 0xf) |
if((long)dct & 0xf) |
259 |
fprintf(stderr, "transfer_8to16subro_altivec_c:incorrect align, dct: %x\n", dct); |
fprintf(stderr, "transfer_8to16subro_altivec_c:incorrect align, dct: %lx\n", (long)dct); |
|
if(((unsigned long)cur) & 0x7) |
|
|
fprintf(stderr, "transfer_8to16subro_altivec_c:incorrect align, cur: %x\n", cur); |
|
|
if(((unsigned long)ref) & 0x7) |
|
|
fprintf(stderr, "transfer_8to16subro_altivec_c:incorrect align, ref: %x\n", ref); |
|
|
if(stride & 0x7) |
|
|
fprintf(stderr, "transfer_8to16subro_altivec_c:incorrect align, stride: %u\n", stride); |
|
260 |
#endif |
#endif |
261 |
|
/* initialize */ |
262 |
z = vec_splat_u8(0); |
z = vec_splat_u8(0); |
263 |
|
|
264 |
SUBRO8TO16(); |
SUBRO8TO16(); |
272 |
SUBRO8TO16(); |
SUBRO8TO16(); |
273 |
} |
} |
274 |
|
|
|
|
|
275 |
/* |
/* |
276 |
* This function assumes: |
* This function assumes: |
277 |
* dct: 16 bytes alignment |
* dct: 16 bytes alignment |
286 |
r2 = vec_perm(vec_ld(0, ref2), vec_ld(16, ref2), vec_lvsl(0, ref2)); \ |
r2 = vec_perm(vec_ld(0, ref2), vec_ld(16, ref2), vec_lvsl(0, ref2)); \ |
287 |
c = vec_perm(vec_ld(0, cur), vec_ld(16, cur), vec_lvsl(0, cur)); \ |
c = vec_perm(vec_ld(0, cur), vec_ld(16, cur), vec_lvsl(0, cur)); \ |
288 |
r = vec_avg(r1, r2); \ |
r = vec_avg(r1, r2); \ |
|
vec_st(vec_sel(r, vec_ld(0, cur), vec_perm(mask, mask, vec_lvsl(0, cur))), 0, cur); \ |
|
289 |
cs = (vector signed short)vec_mergeh(vec_splat_u8(0), c); \ |
cs = (vector signed short)vec_mergeh(vec_splat_u8(0), c); \ |
290 |
rs = (vector signed short)vec_mergeh(vec_splat_u8(0), r); \ |
rs = (vector signed short)vec_mergeh(vec_splat_u8(0), r); \ |
291 |
|
c = vec_perm(mask, mask, vec_lvsl(0, cur));\ |
292 |
|
r = vec_sel(r, vec_ld(0, cur), c);\ |
293 |
|
vec_st(r, 0, cur); \ |
294 |
*dct++ = vec_sub(cs, rs); \ |
*dct++ = vec_sub(cs, rs); \ |
295 |
cur += stride; \ |
cur += stride; \ |
296 |
ref1 += stride; \ |
ref1 += stride; \ |
314 |
#ifdef DEBUG |
#ifdef DEBUG |
315 |
/* Dump alignment erros if DEBUG is set */ |
/* Dump alignment erros if DEBUG is set */ |
316 |
if(((unsigned long)dct) & 0xf) |
if(((unsigned long)dct) & 0xf) |
317 |
fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, dct: %x\n", dct); |
fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, dct: %lx\n", (long)dct); |
318 |
if(((unsigned long)cur) & 0x7) |
if(((unsigned long)cur) & 0x7) |
319 |
fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, cur: %x\n", cur); |
fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, cur: %lx\n", (long)cur); |
320 |
if(stride & 0x7) |
if(stride & 0x7) |
321 |
fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, dct: %u\n", stride); |
fprintf(stderr, "transfer_8to16sub2_altivec_c:incorrect align, dct: %u\n", stride); |
322 |
#endif |
#endif |
372 |
#ifdef DEBUG |
#ifdef DEBUG |
373 |
/* if this is set, dump alignment errors */ |
/* if this is set, dump alignment errors */ |
374 |
if(((unsigned long)dst) & 0x7) |
if(((unsigned long)dst) & 0x7) |
375 |
fprintf(stderr, "transfer_16to8add_altivec_c:incorrect align, dst: %x\n", dst); |
fprintf(stderr, "transfer_16to8add_altivec_c:incorrect align, dst: %lx\n", (long)dst); |
376 |
if(stride & 0x7) |
if(stride & 0x7) |
377 |
fprintf(stderr, "transfer_16to8add_altivec_c:incorrect align, dst: %u\n", stride); |
fprintf(stderr, "transfer_16to8add_altivec_c:incorrect align, dst: %u\n", stride); |
378 |
#endif |
#endif |