17 |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 |
|
|
19 |
|
|
20 |
$Id: sad_altivec.c,v 1.10 2004-04-12 14:05:08 edgomez Exp $ |
$Id: sad_altivec.c,v 1.11 2004-12-09 23:02:54 edgomez Exp $ |
21 |
*/ |
*/ |
22 |
|
|
23 |
#ifdef HAVE_ALTIVEC_H |
#ifdef HAVE_ALTIVEC_H |
46 |
/* |
/* |
47 |
* This function assumes cur and stride are 16 bytes aligned and ref is unaligned |
* This function assumes cur and stride are 16 bytes aligned and ref is unaligned |
48 |
*/ |
*/ |
49 |
unsigned long |
|
50 |
sad16_altivec_c(const vector unsigned char *cur, |
uint32_t |
51 |
const vector unsigned char *ref, |
sad16_altivec_c(vector unsigned char *cur, |
52 |
unsigned long stride, |
vector unsigned char *ref, |
53 |
const unsigned long best_sad) |
uint32_t stride, |
54 |
|
const uint32_t best_sad) |
55 |
{ |
{ |
56 |
vector unsigned char perm; |
vector unsigned char perm; |
57 |
vector unsigned char t1, t2; |
vector unsigned char t1, t2; |
58 |
vector unsigned int sad; |
vector unsigned int sad; |
59 |
vector unsigned int sumdiffs; |
vector unsigned int sumdiffs; |
60 |
vector unsigned int best_vec; |
vector unsigned int best_vec; |
61 |
unsigned long result; |
uint32_t result; |
62 |
|
|
63 |
|
|
64 |
#ifdef DEBUG |
#ifdef DEBUG |
65 |
/* print alignment errors if DEBUG is on */ |
/* print alignment errors if DEBUG is on */ |
66 |
if (((unsigned long) cur) & 0xf) |
if (((unsigned long) cur) & 0xf) |
67 |
fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur); |
fprintf(stderr, "sad16_altivec:incorrect align, cur: %lx\n", (long)cur); |
68 |
if (stride & 0xf) |
if (stride & 0xf) |
69 |
fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride); |
fprintf(stderr, "sad16_altivec:incorrect align, stride: %lu\n", stride); |
70 |
#endif |
#endif |
71 |
/* initialization */ |
/* initialization */ |
72 |
sad = vec_splat_u32(0); |
sad = vec_splat_u32(0); |
73 |
sumdiffs = sad; |
sumdiffs = sad; |
74 |
stride >>= 4; |
stride >>= 4; |
75 |
perm = vec_lvsl(0, (unsigned char *) ref); |
perm = vec_lvsl(0, (unsigned char *) ref); |
76 |
*((unsigned long *) &best_vec) = best_sad; |
*((uint32_t*)&best_vec) = best_sad; |
77 |
best_vec = vec_splat(best_vec, 0); |
best_vec = vec_splat(best_vec, 0); |
78 |
|
|
79 |
/* perform sum of differences between current and previous */ |
/* perform sum of differences between current and previous */ |
100 |
bail: |
bail: |
101 |
/* copy vector sum into unaligned result */ |
/* copy vector sum into unaligned result */ |
102 |
sumdiffs = vec_splat(sumdiffs, 3); |
sumdiffs = vec_splat(sumdiffs, 3); |
103 |
vec_ste(sumdiffs, 0, (unsigned long *) &result); |
vec_ste(sumdiffs, 0, (uint32_t*) &result); |
104 |
return result; |
return result; |
105 |
} |
} |
106 |
|
|
107 |
|
|
108 |
#define SAD8() \ |
#define SAD8() \ |
109 |
t1 = vec_perm(cur[0], cur[stride], perm_cur); /* align current vector */ \ |
c = vec_perm(vec_ld(0,cur),vec_ld(16,cur),vec_lvsl(0,cur));\ |
110 |
t2 = vec_perm(ref[0], ref[1], perm_ref1); /* align current vector */ \ |
r = vec_perm(vec_ld(0,ref),vec_ld(16,ref),vec_lvsl(0,ref));\ |
111 |
tp = vec_perm(ref[stride], ref[stride+1], perm_ref1); /* align current vector */ \ |
c = vec_sub(vec_max(c,r),vec_min(c,r));\ |
112 |
t2 = vec_perm(t2,tp,perm_ref2); \ |
sad = vec_sum4s(c,sad);\ |
113 |
tp = vec_max(t1, t2); /* find largest of two */ \ |
cur += stride;\ |
114 |
t1 = vec_min(t1, t2); /* find smaller of two */ \ |
ref += stride |
|
tp = vec_sub(tp, t1); /* find absolute difference */ \ |
|
|
sad = vec_sum4s(tp, sad); /* accumulate sum of differences */ \ |
|
|
cur += stride<<1; ref += stride<<1; |
|
115 |
|
|
116 |
/* |
/* |
117 |
* This function assumes cur is 8 bytes aligned, stride is 16 bytes |
* This function assumes nothing |
|
* aligned and ref is unaligned |
|
118 |
*/ |
*/ |
|
unsigned long |
|
|
sad8_altivec_c(const vector unsigned char *cur, |
|
|
const vector unsigned char *ref, |
|
|
unsigned long stride) |
|
|
{ |
|
|
vector unsigned char t1, t2, tp; |
|
|
vector unsigned int sad; |
|
|
vector unsigned int sumdiffs; |
|
|
vector unsigned char perm_cur; |
|
|
vector unsigned char perm_ref1, perm_ref2; |
|
|
unsigned long result; |
|
119 |
|
|
120 |
#ifdef DEBUG |
uint32_t |
121 |
/* print alignment errors if DEBUG is on */ |
sad8_altivec_c(const uint8_t * cur, |
122 |
if (((unsigned long) cur) & 0x7) |
const uint8_t *ref, |
123 |
fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur); |
const uint32_t stride) |
124 |
if (stride & 0xf) |
{ |
125 |
fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride); |
uint32_t result = 0; |
|
#endif |
|
126 |
|
|
127 |
/* check if cur is 8 or 16 bytes aligned an create the perm_cur vector */ |
register vector unsigned int sad; |
128 |
perm_ref1 = vec_lvsl(0, (unsigned char*)ref); |
register vector unsigned char c; |
129 |
perm_ref2 = vec_add(vec_lvsl(0, (unsigned char*)NULL), vec_pack(vec_splat_u16(0), vec_splat_u16(8))); |
register vector unsigned char r; |
|
perm_cur = vec_add(perm_ref2, vec_splat(vec_lvsl(0, (unsigned char*)cur), 0)); |
|
130 |
|
|
131 |
/* initialization */ |
/* initialize */ |
132 |
sad = vec_splat_u32(0); |
sad = vec_splat_u32(0); |
|
stride >>= 4; |
|
133 |
|
|
134 |
/* perform sum of differences between current and previous */ |
/* Perform sad operations */ |
135 |
SAD8(); |
SAD8(); |
136 |
SAD8(); |
SAD8(); |
137 |
SAD8(); |
SAD8(); |
138 |
SAD8(); |
SAD8(); |
139 |
|
|
140 |
/* sum all parts of difference into one 32 bit quantity */ |
SAD8(); |
141 |
sumdiffs = (vector unsigned int)vec_sums((vector signed int) sad, vec_splat_s32(0)); |
SAD8(); |
142 |
|
SAD8(); |
143 |
|
SAD8(); |
144 |
|
|
145 |
|
/* finish addition, add the first 2 together */ |
146 |
|
sad = vec_and(sad, (vector unsigned int)vec_pack(vec_splat_u16(-1),vec_splat_u16(0))); |
147 |
|
sad = (vector unsigned int)vec_sums((vector signed int)sad, vec_splat_s32(0)); |
148 |
|
sad = vec_splat(sad,3); |
149 |
|
vec_ste(sad, 0, &result); |
150 |
|
|
|
/* copy vector sum into unaligned result */ |
|
|
sumdiffs = vec_splat(sumdiffs, 3); |
|
|
vec_ste(sumdiffs, 0, (unsigned int *) &result); |
|
151 |
return result; |
return result; |
152 |
} |
} |
153 |
|
|
154 |
|
|
155 |
|
|
156 |
|
|
157 |
#define MEAN16() \ |
#define MEAN16() \ |
158 |
mean = vec_sum4s(*ptr,mean);\ |
mean = vec_sum4s(*ptr,mean);\ |
159 |
ptr += stride |
ptr += stride |
169 |
* This function assumes cur is 16 bytes aligned and stride is 16 bytes |
* This function assumes cur is 16 bytes aligned and stride is 16 bytes |
170 |
* aligned |
* aligned |
171 |
*/ |
*/ |
172 |
unsigned long |
|
173 |
dev16_altivec_c(const vector unsigned char *cur, |
uint32_t |
174 |
unsigned long stride) |
dev16_altivec_c(vector unsigned char *cur, |
175 |
|
uint32_t stride) |
176 |
{ |
{ |
177 |
vector unsigned char t2, t3, mn; |
vector unsigned char t2, t3, mn; |
178 |
vector unsigned int mean, dev; |
vector unsigned int mean, dev; |
179 |
vector unsigned int sumdiffs; |
vector unsigned int sumdiffs; |
180 |
const vector unsigned char *ptr; |
vector unsigned char *ptr; |
181 |
unsigned long result; |
uint32_t result; |
182 |
|
|
183 |
#ifdef DEBUG |
#ifdef DEBUG |
184 |
/* print alignment errors if DEBUG is on */ |
/* print alignment errors if DEBUG is on */ |
185 |
if(((unsigned long)cur) & 0x7) |
if(((unsigned long)cur) & 0x7) |
186 |
fprintf(stderr, "dev16_altivec:incorrect align, cur: %x\n", cur); |
fprintf(stderr, "dev16_altivec:incorrect align, cur: %lx\n", (long)cur); |
187 |
if(stride & 0xf) |
if(stride & 0xf) |
188 |
fprintf(stderr, "dev16_altivec:incorrect align, stride: %ld\n", stride); |
fprintf(stderr, "dev16_altivec:incorrect align, stride: %lu\n", stride); |
189 |
#endif |
#endif |
190 |
|
|
191 |
dev = mean = vec_splat_u32(0); |
dev = mean = vec_splat_u32(0); |
241 |
|
|
242 |
/* copy vector sum into unaligned result */ |
/* copy vector sum into unaligned result */ |
243 |
sumdiffs = vec_splat(sumdiffs, 3); |
sumdiffs = vec_splat(sumdiffs, 3); |
244 |
vec_ste(sumdiffs, 0, (unsigned int *) &result); |
vec_ste(sumdiffs, 0, (uint32_t*) &result); |
245 |
return result; |
return result; |
246 |
} |
} |
247 |
|
|
261 |
* This function assumes cur is 16 bytes aligned, stride is 16 bytes |
* This function assumes cur is 16 bytes aligned, stride is 16 bytes |
262 |
* aligned and ref1 and ref2 is unaligned |
* aligned and ref1 and ref2 is unaligned |
263 |
*/ |
*/ |
264 |
unsigned long |
|
265 |
|
uint32_t |
266 |
sad16bi_altivec_c(vector unsigned char *cur, |
sad16bi_altivec_c(vector unsigned char *cur, |
267 |
vector unsigned char *ref1, |
vector unsigned char *ref1, |
268 |
vector unsigned char *ref2, |
vector unsigned char *ref2, |
269 |
unsigned long stride) |
uint32_t stride) |
270 |
{ |
{ |
271 |
vector unsigned char t1, t2; |
vector unsigned char t1, t2; |
272 |
vector unsigned char mask1, mask2; |
vector unsigned char mask1, mask2; |
273 |
vector unsigned char sad; |
vector unsigned char sad; |
274 |
vector unsigned int sum; |
vector unsigned int sum; |
275 |
unsigned long result; |
uint32_t result; |
276 |
|
|
277 |
#ifdef DEBUG |
#ifdef DEBUG |
278 |
/* print alignment errors if this is on */ |
/* print alignment errors if this is on */ |
279 |
if(cur & 0xf) |
if((long)cur & 0xf) |
280 |
fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %x\n", cur); |
fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %lx\n", (long)cur); |
281 |
if(stride & 0xf) |
if(stride & 0xf) |
282 |
fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %ld\n", stride); |
fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %lu\n", stride); |
283 |
#endif |
#endif |
284 |
|
|
285 |
/* Initialisation stuff */ |
/* Initialisation stuff */ |
311 |
|
|
312 |
sum = (vector unsigned int)vec_sums((vector signed int)sum, vec_splat_s32(0)); |
sum = (vector unsigned int)vec_sums((vector signed int)sum, vec_splat_s32(0)); |
313 |
sum = vec_splat(sum, 3); |
sum = vec_splat(sum, 3); |
314 |
vec_ste(sum, 0, (unsigned int*)&result); |
vec_ste(sum, 0, (uint32_t*)&result); |
315 |
|
|
316 |
return result; |
return result; |
317 |
} |
} |