1 |
/* |
/***************************************************************************** |
2 |
|
* |
3 |
|
* XVID MPEG-4 VIDEO CODEC |
4 |
|
* - altivec sum of absolute difference (C version) |
5 |
|
* |
6 |
|
* Copyright (C) 2002 Benjamin Herrenschmidt <benh@kernel.crashing.org> |
7 |
|
* |
8 |
|
* This file is part of XviD, a free MPEG-4 video encoder/decoder |
9 |
|
* |
10 |
|
* XviD is free software; you can redistribute it and/or modify it |
11 |
|
* under the terms of the GNU General Public License as published by |
12 |
|
* the Free Software Foundation; either version 2 of the License, or |
13 |
|
* (at your option) any later version. |
14 |
|
* |
15 |
|
* This program is distributed in the hope that it will be useful, |
16 |
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of |
17 |
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
18 |
|
* GNU General Public License for more details. |
19 |
|
* |
20 |
|
* You should have received a copy of the GNU General Public License |
21 |
|
* along with this program; if not, write to the Free Software |
22 |
|
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
23 |
|
* |
24 |
|
* Under section 8 of the GNU General Public License, the copyright |
25 |
|
* holders of XVID explicitly forbid distribution in the following |
26 |
|
* countries: |
27 |
|
* |
28 |
|
* - Japan |
29 |
|
* - United States of America |
30 |
|
* |
31 |
|
* Linking XviD statically or dynamically with other modules is making a |
32 |
|
* combined work based on XviD. Thus, the terms and conditions of the |
33 |
|
* GNU General Public License cover the whole combination. |
34 |
|
* |
35 |
|
* As a special exception, the copyright holders of XviD give you |
36 |
|
* permission to link XviD with independent modules that communicate with |
37 |
|
* XviD solely through the VFW1.1 and DShow interfaces, regardless of the |
38 |
|
* license terms of these independent modules, and to copy and distribute |
39 |
|
* the resulting combined work under terms of your choice, provided that |
40 |
|
* every copy of the combined work is accompanied by a complete copy of |
41 |
|
* the source code of XviD (the version of XviD used to produce the |
42 |
|
* combined work), being distributed under the terms of the GNU General |
43 |
|
* Public License plus this exception. An independent module is a module |
44 |
|
* which is not derived from or based on XviD. |
45 |
|
* |
46 |
|
* Note that people who make modified versions of XviD are not obligated |
47 |
|
* to grant this special exception for their modified versions; it is |
48 |
|
* their choice whether to do so. The GNU General Public License gives |
49 |
|
* permission to release a modified version without this exception; this |
50 |
|
* exception also makes it possible to release a modified version which |
51 |
|
* carries forward this exception. |
52 |
|
* |
53 |
|
* $Id: sad_altivec.c,v 1.5 2002-11-17 00:32:06 edgomez Exp $ |
54 |
|
* |
55 |
|
****************************************************************************/ |
56 |
|
|
57 |
|
#define G_REG |
58 |
|
|
59 |
|
#ifdef G_REG |
60 |
|
register vector unsigned char perm0 asm("%v29"); |
61 |
|
register vector unsigned char perm1 asm("%v30"); |
62 |
|
register vector unsigned int zerovec asm("%v31"); |
63 |
|
#endif |
64 |
|
|
65 |
Copyright (C) 2002 Benjamin Herrenschmidt <benh@kernel.crashing.org> |
#include <stdio.h> |
66 |
|
|
67 |
This program is free software; you can redistribute it and/or modify |
#undef DEBUG |
|
it under the terms of the GNU General Public License as published by |
|
|
the Free Software Foundation; either version 2 of the License, or |
|
|
(at your option) any later version. |
|
|
|
|
|
This program is distributed in the hope that it will be useful, |
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
|
GNU General Public License for more details. |
|
|
|
|
|
You should have received a copy of the GNU General Public License |
|
|
along with this program; if not, write to the Free Software |
|
|
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|
|
|
|
|
|
|
|
$Id: sad_altivec.c,v 1.1 2002-04-03 14:17:05 canard Exp $ |
|
|
$Source: /home/xvid/cvs_copy/cvs-server-root/xvid/xvidcore/src/motion/ppc_asm/sad_altivec.c,v $ |
|
|
$Date: 2002-04-03 14:17:05 $ |
|
|
$Author: canard $ |
|
68 |
|
|
69 |
*/ |
static const vector unsigned char perms[2] = { |
70 |
|
(vector unsigned char) ( /* Used when cur is aligned */ |
71 |
|
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
72 |
|
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17), |
73 |
|
(vector unsigned char) ( /* Used when cur is unaligned */ |
74 |
|
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
75 |
|
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f), |
76 |
|
}; |
77 |
|
|
78 |
#include <stdio.h> |
#ifdef G_REG |
79 |
|
void |
80 |
|
sadInit_altivec(void) |
81 |
|
{ |
82 |
|
perm0 = perms[0]; |
83 |
|
perm1 = perms[1]; |
84 |
|
zerovec = (vector unsigned int) (0); |
85 |
|
} |
86 |
|
static inline const vector unsigned char |
87 |
|
get_perm(unsigned long i) |
88 |
|
{ |
89 |
|
return i ? perm1 : perm0; |
90 |
|
} |
91 |
|
|
92 |
|
#define ZERODEF |
93 |
|
#define ZEROVEC zerovec |
94 |
|
#else |
95 |
|
void |
96 |
|
sadInit_altivec(void) |
97 |
|
{ |
98 |
|
} |
99 |
|
static inline const vector unsigned char |
100 |
|
get_perm(unsigned long i) |
101 |
|
{ |
102 |
|
return perms[i]; |
103 |
|
} |
104 |
|
|
105 |
|
#define ZERODEF vector unsigned int zerovec = (vector unsigned int)(0) |
106 |
|
#define ZEROVEC zerovec |
107 |
|
#endif |
108 |
|
|
|
#undef DEBUG |
|
109 |
|
|
110 |
#define SAD16() \ |
#define SAD16() \ |
111 |
t1 = vec_perm(ref[0], ref[1], perm); /* align current vector */ \ |
t1 = vec_perm(ref[0], ref[1], perm); /* align current vector */ \ |
126 |
{ |
{ |
127 |
vector unsigned char perm; |
vector unsigned char perm; |
128 |
vector unsigned char t1, t2, t3, t4 ; |
vector unsigned char t1, t2, t3, t4 ; |
129 |
vector unsigned int sad, zero; |
vector unsigned int sad; |
130 |
vector signed int sumdiffs, best_vec; |
vector signed int sumdiffs, best_vec; |
131 |
unsigned long result; |
unsigned long result; |
132 |
|
|
133 |
|
ZERODEF; |
134 |
|
|
135 |
#ifdef DEBUG |
#ifdef DEBUG |
136 |
if (((unsigned long)cur) & 0xf) |
if (((unsigned long)cur) & 0xf) |
137 |
fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur); |
fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur); |
141 |
fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride); |
fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride); |
142 |
#endif |
#endif |
143 |
/* initialization */ |
/* initialization */ |
144 |
zero = (vector unsigned int)(0); |
sad = (vector unsigned int) (ZEROVEC); |
|
sad = (vector unsigned int)(0); |
|
145 |
stride >>= 4; |
stride >>= 4; |
146 |
perm = vec_lvsl(0, (unsigned char *)ref); |
perm = vec_lvsl(0, (unsigned char *)ref); |
147 |
*((unsigned long *)&best_vec) = best_sad; |
*((unsigned long *)&best_vec) = best_sad; |
153 |
SAD16(); |
SAD16(); |
154 |
SAD16(); |
SAD16(); |
155 |
/* Temp sum for exit */ |
/* Temp sum for exit */ |
156 |
sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); |
sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC); |
157 |
if (vec_all_ge(sumdiffs, best_vec)) |
if (vec_all_ge(sumdiffs, best_vec)) |
158 |
goto bail; |
goto bail; |
159 |
SAD16(); |
SAD16(); |
160 |
SAD16(); |
SAD16(); |
161 |
SAD16(); |
SAD16(); |
162 |
SAD16(); |
SAD16(); |
163 |
sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); |
sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC); |
164 |
if (vec_all_ge(sumdiffs, best_vec)) |
if (vec_all_ge(sumdiffs, best_vec)) |
165 |
goto bail; |
goto bail; |
166 |
SAD16(); |
SAD16(); |
173 |
SAD16(); |
SAD16(); |
174 |
|
|
175 |
/* sum all parts of difference into one 32 bit quantity */ |
/* sum all parts of difference into one 32 bit quantity */ |
176 |
sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); |
sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC); |
177 |
bail: |
bail: |
178 |
/* copy vector sum into unaligned result */ |
/* copy vector sum into unaligned result */ |
179 |
sumdiffs = vec_splat( sumdiffs, 3 ); |
sumdiffs = vec_splat( sumdiffs, 3 ); |
192 |
sad = vec_sum4s(t5, sad); /* accumulate sum of differences */ \ |
sad = vec_sum4s(t5, sad); /* accumulate sum of differences */ \ |
193 |
cur += stride<<1; ref += stride<<1; |
cur += stride<<1; ref += stride<<1; |
194 |
|
|
|
static const vector unsigned char perms[2] = { |
|
|
(vector unsigned char)( /* Used when cur is aligned */ |
|
|
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, |
|
|
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 |
|
|
), |
|
|
(vector unsigned char)( /* Used when cur is unaligned */ |
|
|
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, |
|
|
0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f |
|
|
), |
|
|
}; |
|
|
|
|
195 |
/* |
/* |
196 |
* This function assumes cur is 8 bytes aligned, stride is 16 bytes |
* This function assumes cur is 8 bytes aligned, stride is 16 bytes |
197 |
* aligned and ref is unaligned |
* aligned and ref is unaligned |
202 |
unsigned long stride) |
unsigned long stride) |
203 |
{ |
{ |
204 |
vector unsigned char t1, t2, t3, t4, t5, tp ; |
vector unsigned char t1, t2, t3, t4, t5, tp ; |
205 |
vector unsigned int sad, zero; |
vector unsigned int sad; |
206 |
vector signed int sumdiffs; |
vector signed int sumdiffs; |
207 |
vector unsigned char perm_cur; |
vector unsigned char perm_cur; |
208 |
vector unsigned char perm_ref1, perm_ref2; |
vector unsigned char perm_ref1, perm_ref2; |
209 |
unsigned long result; |
unsigned long result; |
210 |
|
|
211 |
|
ZERODEF; |
212 |
|
|
213 |
#ifdef DEBUG |
#ifdef DEBUG |
214 |
if (((unsigned long)cur) & 0x7) |
if (((unsigned long)cur) & 0x7) |
215 |
fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur); |
fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur); |
219 |
fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride); |
fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride); |
220 |
#endif |
#endif |
221 |
|
|
222 |
perm_cur = perms[(((unsigned long)cur)>>3) & 0x01]; |
perm_cur = get_perm((((unsigned long) cur) >> 3) & 0x01); |
223 |
perm_ref1 = vec_lvsl(0, (unsigned char *)ref); |
perm_ref1 = vec_lvsl(0, (unsigned char *)ref); |
224 |
perm_ref2 = perms[0]; |
perm_ref2 = get_perm(0); |
225 |
|
|
226 |
/* initialization */ |
/* initialization */ |
227 |
zero = (vector unsigned int)(0); |
sad = (vector unsigned int) (ZEROVEC); |
|
sad = (vector unsigned int)(0); |
|
228 |
stride >>= 4; |
stride >>= 4; |
229 |
|
|
230 |
/* perform sum of differences between current and previous */ |
/* perform sum of differences between current and previous */ |
234 |
SAD8(); |
SAD8(); |
235 |
|
|
236 |
/* sum all parts of difference into one 32 bit quantity */ |
/* sum all parts of difference into one 32 bit quantity */ |
237 |
sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero); |
sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC); |
238 |
|
|
239 |
/* copy vector sum into unaligned result */ |
/* copy vector sum into unaligned result */ |
240 |
sumdiffs = vec_splat( sumdiffs, 3 ); |
sumdiffs = vec_splat( sumdiffs, 3 ); |
258 |
unsigned long stride) |
unsigned long stride) |
259 |
{ |
{ |
260 |
vector unsigned char t2,t3,t4, mn; |
vector unsigned char t2,t3,t4, mn; |
261 |
vector unsigned int mean, dev, zero; |
vector unsigned int mean, dev; |
262 |
vector signed int sumdiffs; |
vector signed int sumdiffs; |
263 |
vector unsigned char c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15; |
vector unsigned char c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, |
264 |
|
c13, c14, c15; |
265 |
unsigned long result; |
unsigned long result; |
266 |
|
|
267 |
zero = (vector unsigned int)(0); |
ZERODEF; |
268 |
mean = (vector unsigned int)(0); |
|
269 |
dev = (vector unsigned int)(0); |
mean = (vector unsigned int) (ZEROVEC); |
270 |
|
dev = (vector unsigned int) (ZEROVEC); |
271 |
stride >>= 4; |
stride >>= 4; |
272 |
|
|
273 |
MEAN16(0); |
MEAN16(0); |
287 |
MEAN16(14); |
MEAN16(14); |
288 |
MEAN16(15); |
MEAN16(15); |
289 |
|
|
290 |
sumdiffs = vec_sums((vector signed int) mean, (vector signed int) zero); |
sumdiffs = vec_sums((vector signed int) mean, (vector signed int) ZEROVEC); |
291 |
mn = vec_perm((vector unsigned char)sumdiffs, (vector unsigned char)sumdiffs, |
mn = vec_perm((vector unsigned char) sumdiffs, |
292 |
(vector unsigned char)(14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14)); |
(vector unsigned char) sumdiffs, (vector unsigned char) (14, |
293 |
|
14, |
294 |
|
14, |
295 |
|
14, |
296 |
|
14, |
297 |
|
14, |
298 |
|
14, |
299 |
|
14, |
300 |
|
14, |
301 |
|
14, |
302 |
|
14, |
303 |
|
14, |
304 |
|
14, |
305 |
|
14, |
306 |
|
14, |
307 |
|
14)); |
308 |
DEV16(0); |
DEV16(0); |
309 |
DEV16(1); |
DEV16(1); |
310 |
DEV16(2); |
DEV16(2); |
323 |
DEV16(15); |
DEV16(15); |
324 |
|
|
325 |
/* sum all parts of difference into one 32 bit quantity */ |
/* sum all parts of difference into one 32 bit quantity */ |
326 |
sumdiffs = vec_sums((vector signed int) dev, (vector signed int) zero); |
sumdiffs = vec_sums((vector signed int) dev, (vector signed int) ZEROVEC); |
327 |
|
|
328 |
/* copy vector sum into unaligned result */ |
/* copy vector sum into unaligned result */ |
329 |
sumdiffs = vec_splat( sumdiffs, 3 ); |
sumdiffs = vec_splat( sumdiffs, 3 ); |