--- trunk/xvidcore/src/motion/ppc_asm/sad_altivec.c 2002/06/09 23:30:50 194 +++ trunk/xvidcore/src/motion/ppc_asm/sad_altivec.c 2002/06/12 20:38:41 195 @@ -17,19 +17,19 @@ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - $Id: sad_altivec.c,v 1.2 2002-04-11 10:18:40 canard Exp $ + $Id: sad_altivec.c,v 1.3 2002-06-12 20:38:40 edgomez Exp $ $Source: /home/xvid/cvs_copy/cvs-server-root/xvid/xvidcore/src/motion/ppc_asm/sad_altivec.c,v $ - $Date: 2002-04-11 10:18:40 $ - $Author: canard $ + $Date: 2002-06-12 20:38:40 $ + $Author: edgomez $ */ #define G_REG #ifdef G_REG -register vector unsigned char perm0 asm ("%v29"); -register vector unsigned char perm1 asm ("%v30"); -register vector unsigned int zerovec asm ("%v31"); +register vector unsigned char perm0 asm("%v29"); +register vector unsigned char perm1 asm("%v30"); +register vector unsigned int zerovec asm("%v31"); #endif #include @@ -37,35 +37,41 @@ #undef DEBUG static const vector unsigned char perms[2] = { - (vector unsigned char)( /* Used when cur is aligned */ - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, - 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 - ), - (vector unsigned char)( /* Used when cur is unaligned */ - 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, - 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f - ), + (vector unsigned char) ( /* Used when cur is aligned */ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17), + (vector unsigned char) ( /* Used when cur is unaligned */ + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f), }; #ifdef G_REG -void sadInit_altivec(void) +void +sadInit_altivec(void) { perm0 = perms[0]; perm1 = perms[1]; - zerovec = (vector unsigned int)(0); + zerovec = (vector unsigned int) (0); } -static inline const vector unsigned char get_perm(unsigned long i) +static inline const vector unsigned char +get_perm(unsigned long i) { return i ? perm1 : perm0; } + #define ZERODEF #define ZEROVEC zerovec #else -void sadInit_altivec(void) { } -static inline const vector unsigned char get_perm(unsigned long i) +void +sadInit_altivec(void) +{ +} +static inline const vector unsigned char +get_perm(unsigned long i) { return perms[i]; } + #define ZERODEF vector unsigned int zerovec = (vector unsigned int)(0) #define ZEROVEC zerovec #endif @@ -83,65 +89,66 @@ * This function assumes cur and stride are 16 bytes aligned and ref is unaligned */ unsigned long -sad16_altivec( const vector unsigned char * cur, - const vector unsigned char * ref, - unsigned long stride, - const unsigned long best_sad) +sad16_altivec(const vector unsigned char *cur, + const vector unsigned char *ref, + unsigned long stride, + const unsigned long best_sad) { - vector unsigned char perm; - vector unsigned char t1, t2, t3, t4 ; - vector unsigned int sad; - vector signed int sumdiffs, best_vec; - unsigned long result; - ZERODEF; - + vector unsigned char perm; + vector unsigned char t1, t2, t3, t4; + vector unsigned int sad; + vector signed int sumdiffs, best_vec; + unsigned long result; + + ZERODEF; + #ifdef DEBUG - if (((unsigned long)cur) & 0xf) - fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur); + if (((unsigned long) cur) & 0xf) + fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur); // if (((unsigned long)ref) & 0xf) -// fprintf(stderr, "sad16_altivec:incorrect align, ref: %x\n", ref); - if (stride & 0xf) - fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride); -#endif - /* initialization */ - sad = (vector unsigned int)(ZEROVEC); - stride >>= 4; - perm = vec_lvsl(0, (unsigned char *)ref); - *((unsigned long *)&best_vec) = best_sad; - best_vec = vec_splat(best_vec, 0); - - /* perform sum of differences between current and previous */ - SAD16(); - SAD16(); - SAD16(); - SAD16(); - /* Temp sum for exit */ - sumdiffs = vec_sums((vector signed int) sad, (vector signed int)ZEROVEC); - if (vec_all_ge(sumdiffs, best_vec)) - goto bail; - SAD16(); - SAD16(); - SAD16(); - SAD16(); - sumdiffs = vec_sums((vector signed int) sad, (vector signed int)ZEROVEC); - if (vec_all_ge(sumdiffs, best_vec)) - goto bail; - SAD16(); - SAD16(); - SAD16(); - SAD16(); - SAD16(); - SAD16(); - SAD16(); - SAD16(); - - /* sum all parts of difference into one 32 bit quantity */ - sumdiffs = vec_sums((vector signed int) sad, (vector signed int)ZEROVEC); -bail: - /* copy vector sum into unaligned result */ - sumdiffs = vec_splat( sumdiffs, 3 ); - vec_ste( sumdiffs, 0, (int *)&result ); - return( result ); +// fprintf(stderr, "sad16_altivec:incorrect align, ref: %x\n", ref); + if (stride & 0xf) + fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride); +#endif + /* initialization */ + sad = (vector unsigned int) (ZEROVEC); + stride >>= 4; + perm = vec_lvsl(0, (unsigned char *) ref); + *((unsigned long *) &best_vec) = best_sad; + best_vec = vec_splat(best_vec, 0); + + /* perform sum of differences between current and previous */ + SAD16(); + SAD16(); + SAD16(); + SAD16(); + /* Temp sum for exit */ + sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC); + if (vec_all_ge(sumdiffs, best_vec)) + goto bail; + SAD16(); + SAD16(); + SAD16(); + SAD16(); + sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC); + if (vec_all_ge(sumdiffs, best_vec)) + goto bail; + SAD16(); + SAD16(); + SAD16(); + SAD16(); + SAD16(); + SAD16(); + SAD16(); + SAD16(); + + /* sum all parts of difference into one 32 bit quantity */ + sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC); + bail: + /* copy vector sum into unaligned result */ + sumdiffs = vec_splat(sumdiffs, 3); + vec_ste(sumdiffs, 0, (int *) &result); + return (result); } #define SAD8() \ @@ -160,48 +167,49 @@ * aligned and ref is unaligned */ unsigned long -sad8_altivec( const vector unsigned char * cur, - const vector unsigned char * ref, - unsigned long stride) +sad8_altivec(const vector unsigned char *cur, + const vector unsigned char *ref, + unsigned long stride) { - vector unsigned char t1, t2, t3, t4, t5, tp ; - vector unsigned int sad; - vector signed int sumdiffs; - vector unsigned char perm_cur; - vector unsigned char perm_ref1, perm_ref2; - unsigned long result; - ZERODEF; + vector unsigned char t1, t2, t3, t4, t5, tp; + vector unsigned int sad; + vector signed int sumdiffs; + vector unsigned char perm_cur; + vector unsigned char perm_ref1, perm_ref2; + unsigned long result; + + ZERODEF; #ifdef DEBUG - if (((unsigned long)cur) & 0x7) - fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur); + if (((unsigned long) cur) & 0x7) + fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur); // if (((unsigned long)ref) & 0x7) -// fprintf(stderr, "sad8_altivec:incorrect align, ref: %x\n", ref); - if (stride & 0xf) - fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride); -#endif - - perm_cur = get_perm((((unsigned long)cur)>>3) & 0x01); - perm_ref1 = vec_lvsl(0, (unsigned char *)ref); - perm_ref2 = get_perm(0); - - /* initialization */ - sad = (vector unsigned int)(ZEROVEC); - stride >>= 4; - - /* perform sum of differences between current and previous */ - SAD8(); - SAD8(); - SAD8(); - SAD8(); - - /* sum all parts of difference into one 32 bit quantity */ - sumdiffs = vec_sums((vector signed int) sad, (vector signed int)ZEROVEC); - - /* copy vector sum into unaligned result */ - sumdiffs = vec_splat( sumdiffs, 3 ); - vec_ste( sumdiffs, 0, (int *)&result ); - return( result ); +// fprintf(stderr, "sad8_altivec:incorrect align, ref: %x\n", ref); + if (stride & 0xf) + fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride); +#endif + + perm_cur = get_perm((((unsigned long) cur) >> 3) & 0x01); + perm_ref1 = vec_lvsl(0, (unsigned char *) ref); + perm_ref2 = get_perm(0); + + /* initialization */ + sad = (vector unsigned int) (ZEROVEC); + stride >>= 4; + + /* perform sum of differences between current and previous */ + SAD8(); + SAD8(); + SAD8(); + SAD8(); + + /* sum all parts of difference into one 32 bit quantity */ + sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC); + + /* copy vector sum into unaligned result */ + sumdiffs = vec_splat(sumdiffs, 3); + vec_ste(sumdiffs, 0, (int *) &result); + return (result); } #define MEAN16(i)\ @@ -216,62 +224,79 @@ dev = vec_sum4s(t4, dev); unsigned long -dev16_altivec( const vector unsigned char * cur, - unsigned long stride) +dev16_altivec(const vector unsigned char *cur, + unsigned long stride) { - vector unsigned char t2,t3,t4, mn; - vector unsigned int mean, dev; - vector signed int sumdiffs; - vector unsigned char c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15; - unsigned long result; - ZERODEF; - - mean = (vector unsigned int)(ZEROVEC); - dev = (vector unsigned int)(ZEROVEC); - stride >>= 4; - - MEAN16(0); - MEAN16(1); - MEAN16(2); - MEAN16(3); - MEAN16(4); - MEAN16(5); - MEAN16(6); - MEAN16(7); - MEAN16(8); - MEAN16(9); - MEAN16(10); - MEAN16(11); - MEAN16(12); - MEAN16(13); - MEAN16(14); - MEAN16(15); - - sumdiffs = vec_sums((vector signed int) mean, (vector signed int) ZEROVEC); - mn = vec_perm((vector unsigned char)sumdiffs, (vector unsigned char)sumdiffs, - (vector unsigned char)(14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14)); - DEV16(0); - DEV16(1); - DEV16(2); - DEV16(3); - DEV16(4); - DEV16(5); - DEV16(6); - DEV16(7); - DEV16(8); - DEV16(9); - DEV16(10); - DEV16(11); - DEV16(12); - DEV16(13); - DEV16(14); - DEV16(15); - - /* sum all parts of difference into one 32 bit quantity */ - sumdiffs = vec_sums((vector signed int) dev, (vector signed int) ZEROVEC); - - /* copy vector sum into unaligned result */ - sumdiffs = vec_splat( sumdiffs, 3 ); - vec_ste( sumdiffs, 0, (int *)&result ); - return( result ); + vector unsigned char t2, t3, t4, mn; + vector unsigned int mean, dev; + vector signed int sumdiffs; + vector unsigned char c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12, + c13, c14, c15; + unsigned long result; + + ZERODEF; + + mean = (vector unsigned int) (ZEROVEC); + dev = (vector unsigned int) (ZEROVEC); + stride >>= 4; + + MEAN16(0); + MEAN16(1); + MEAN16(2); + MEAN16(3); + MEAN16(4); + MEAN16(5); + MEAN16(6); + MEAN16(7); + MEAN16(8); + MEAN16(9); + MEAN16(10); + MEAN16(11); + MEAN16(12); + MEAN16(13); + MEAN16(14); + MEAN16(15); + + sumdiffs = vec_sums((vector signed int) mean, (vector signed int) ZEROVEC); + mn = vec_perm((vector unsigned char) sumdiffs, + (vector unsigned char) sumdiffs, (vector unsigned char) (14, + 14, + 14, + 14, + 14, + 14, + 14, + 14, + 14, + 14, + 14, + 14, + 14, + 14, + 14, + 14)); + DEV16(0); + DEV16(1); + DEV16(2); + DEV16(3); + DEV16(4); + DEV16(5); + DEV16(6); + DEV16(7); + DEV16(8); + DEV16(9); + DEV16(10); + DEV16(11); + DEV16(12); + DEV16(13); + DEV16(14); + DEV16(15); + + /* sum all parts of difference into one 32 bit quantity */ + sumdiffs = vec_sums((vector signed int) dev, (vector signed int) ZEROVEC); + + /* copy vector sum into unaligned result */ + sumdiffs = vec_splat(sumdiffs, 3); + vec_ste(sumdiffs, 0, (int *) &result); + return (result); }