/* Copyright (C) 2002 Benjamin Herrenschmidt This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA $Id: sad_altivec.c,v 1.10 2004-04-12 14:05:08 edgomez Exp $ */ #ifdef HAVE_ALTIVEC_H #include #endif #include "../../portab.h" /* no debugging by default */ #undef DEBUG #include #define SAD16() \ t1 = vec_perm(ref[0], ref[1], perm); /* align current vector */ \ t2 = vec_max(t1, *cur); /* find largest of two */ \ t1 = vec_min(t1, *cur); /* find smaller of two */ \ t1 = vec_sub(t2, t1); /* find absolute difference */ \ sad = vec_sum4s(t1, vec_splat_u32(0)); /* sum of differences */ \ sumdiffs = (vector unsigned int)vec_sums((vector signed int)sad, (vector signed int)sumdiffs); /* accumulate sumdiffs */ \ if(vec_any_ge(sumdiffs, best_vec)) \ goto bail; \ cur += stride; ref += stride; /* * This function assumes cur and stride are 16 bytes aligned and ref is unaligned */ unsigned long sad16_altivec_c(const vector unsigned char *cur, const vector unsigned char *ref, unsigned long stride, const unsigned long best_sad) { vector unsigned char perm; vector unsigned char t1, t2; vector unsigned int sad; vector unsigned int sumdiffs; vector unsigned int best_vec; unsigned long result; #ifdef DEBUG /* print alignment errors if DEBUG is on */ if (((unsigned long) cur) & 0xf) fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur); if (stride & 0xf) fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride); #endif /* initialization */ sad = vec_splat_u32(0); sumdiffs = sad; stride >>= 4; perm = vec_lvsl(0, (unsigned char *) ref); *((unsigned long *) &best_vec) = best_sad; best_vec = vec_splat(best_vec, 0); /* perform sum of differences between current and previous */ SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); SAD16(); bail: /* copy vector sum into unaligned result */ sumdiffs = vec_splat(sumdiffs, 3); vec_ste(sumdiffs, 0, (unsigned long *) &result); return result; } #define SAD8() \ t1 = vec_perm(cur[0], cur[stride], perm_cur); /* align current vector */ \ t2 = vec_perm(ref[0], ref[1], perm_ref1); /* align current vector */ \ tp = vec_perm(ref[stride], ref[stride+1], perm_ref1); /* align current vector */ \ t2 = vec_perm(t2,tp,perm_ref2); \ tp = vec_max(t1, t2); /* find largest of two */ \ t1 = vec_min(t1, t2); /* find smaller of two */ \ tp = vec_sub(tp, t1); /* find absolute difference */ \ sad = vec_sum4s(tp, sad); /* accumulate sum of differences */ \ cur += stride<<1; ref += stride<<1; /* * This function assumes cur is 8 bytes aligned, stride is 16 bytes * aligned and ref is unaligned */ unsigned long sad8_altivec_c(const vector unsigned char *cur, const vector unsigned char *ref, unsigned long stride) { vector unsigned char t1, t2, tp; vector unsigned int sad; vector unsigned int sumdiffs; vector unsigned char perm_cur; vector unsigned char perm_ref1, perm_ref2; unsigned long result; #ifdef DEBUG /* print alignment errors if DEBUG is on */ if (((unsigned long) cur) & 0x7) fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur); if (stride & 0xf) fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride); #endif /* check if cur is 8 or 16 bytes aligned an create the perm_cur vector */ perm_ref1 = vec_lvsl(0, (unsigned char*)ref); perm_ref2 = vec_add(vec_lvsl(0, (unsigned char*)NULL), vec_pack(vec_splat_u16(0), vec_splat_u16(8))); perm_cur = vec_add(perm_ref2, vec_splat(vec_lvsl(0, (unsigned char*)cur), 0)); /* initialization */ sad = vec_splat_u32(0); stride >>= 4; /* perform sum of differences between current and previous */ SAD8(); SAD8(); SAD8(); SAD8(); /* sum all parts of difference into one 32 bit quantity */ sumdiffs = (vector unsigned int)vec_sums((vector signed int) sad, vec_splat_s32(0)); /* copy vector sum into unaligned result */ sumdiffs = vec_splat(sumdiffs, 3); vec_ste(sumdiffs, 0, (unsigned int *) &result); return result; } #define MEAN16() \ mean = vec_sum4s(*ptr,mean);\ ptr += stride #define DEV16() \ t2 = vec_max(*ptr, mn); /* find largest of two */ \ t3 = vec_min(*ptr, mn); /* find smaller of two */ \ t2 = vec_sub(t2, t3); /* find absolute difference */ \ dev = vec_sum4s(t2, dev); \ ptr += stride /* * This function assumes cur is 16 bytes aligned and stride is 16 bytes * aligned */ unsigned long dev16_altivec_c(const vector unsigned char *cur, unsigned long stride) { vector unsigned char t2, t3, mn; vector unsigned int mean, dev; vector unsigned int sumdiffs; const vector unsigned char *ptr; unsigned long result; #ifdef DEBUG /* print alignment errors if DEBUG is on */ if(((unsigned long)cur) & 0x7) fprintf(stderr, "dev16_altivec:incorrect align, cur: %x\n", cur); if(stride & 0xf) fprintf(stderr, "dev16_altivec:incorrect align, stride: %ld\n", stride); #endif dev = mean = vec_splat_u32(0); stride >>= 4; /* set pointer to iterate through cur */ ptr = cur; MEAN16(); MEAN16(); MEAN16(); MEAN16(); MEAN16(); MEAN16(); MEAN16(); MEAN16(); MEAN16(); MEAN16(); MEAN16(); MEAN16(); MEAN16(); MEAN16(); MEAN16(); MEAN16(); /* Add all together in sumdiffs */ sumdiffs = (vector unsigned int)vec_sums((vector signed int) mean, vec_splat_s32(0)); /* teilen durch 16 * 16 */ mn = vec_perm((vector unsigned char)sumdiffs, (vector unsigned char)sumdiffs, vec_splat_u8(14)); /* set pointer to iterate through cur */ ptr = cur; DEV16(); DEV16(); DEV16(); DEV16(); DEV16(); DEV16(); DEV16(); DEV16(); DEV16(); DEV16(); DEV16(); DEV16(); DEV16(); DEV16(); DEV16(); DEV16(); /* sum all parts of difference into one 32 bit quantity */ sumdiffs = (vector unsigned int)vec_sums((vector signed int) dev, vec_splat_s32(0)); /* copy vector sum into unaligned result */ sumdiffs = vec_splat(sumdiffs, 3); vec_ste(sumdiffs, 0, (unsigned int *) &result); return result; } #define SAD16BI() \ t1 = vec_perm(ref1[0], ref1[1], mask1); \ t2 = vec_perm(ref2[0], ref2[1], mask2); \ t1 = vec_avg(t1, t2); \ t2 = vec_max(t1, *cur); \ t1 = vec_min(t1, *cur); \ sad = vec_sub(t2, t1); \ sum = vec_sum4s(sad, sum); \ cur += stride; \ ref1 += stride; \ ref2 += stride /* * This function assumes cur is 16 bytes aligned, stride is 16 bytes * aligned and ref1 and ref2 is unaligned */ unsigned long sad16bi_altivec_c(vector unsigned char *cur, vector unsigned char *ref1, vector unsigned char *ref2, unsigned long stride) { vector unsigned char t1, t2; vector unsigned char mask1, mask2; vector unsigned char sad; vector unsigned int sum; unsigned long result; #ifdef DEBUG /* print alignment errors if this is on */ if(cur & 0xf) fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %x\n", cur); if(stride & 0xf) fprintf(stderr, "sad16bi_altivec:incorrect align, cur: %ld\n", stride); #endif /* Initialisation stuff */ stride >>= 4; mask1 = vec_lvsl(0, (unsigned char*)ref1); mask2 = vec_lvsl(0, (unsigned char*)ref2); sad = vec_splat_u8(0); sum = (vector unsigned int)sad; SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); SAD16BI(); sum = (vector unsigned int)vec_sums((vector signed int)sum, vec_splat_s32(0)); sum = vec_splat(sum, 3); vec_ste(sum, 0, (unsigned int*)&result); return result; } #define SSE8_16BIT() \ b1_vec = vec_perm(vec_ld(0,b1), vec_ld(16,b1), vec_lvsl(0,b1)); \ b2_vec = vec_perm(vec_ld(0,b2), vec_ld(16,b2), vec_lvsl(0,b2)); \ diff = vec_sub(b1_vec,b2_vec); \ sum = vec_msum(diff,diff,sum); \ b1 = (const int16_t*)((int8_t*)b1+stride); \ b2 = (const int16_t*)((int8_t*)b2+stride) uint32_t sse8_16bit_altivec_c(const int16_t * b1, const int16_t * b2, const uint32_t stride) { register vector signed short b1_vec; register vector signed short b2_vec; register vector signed short diff; register vector signed int sum; uint32_t result; /* initialize */ sum = vec_splat_s32(0); SSE8_16BIT(); SSE8_16BIT(); SSE8_16BIT(); SSE8_16BIT(); SSE8_16BIT(); SSE8_16BIT(); SSE8_16BIT(); SSE8_16BIT(); /* sum the vector */ sum = vec_sums(sum, vec_splat_s32(0)); sum = vec_splat(sum,3); vec_ste(sum,0,(int*)&result); /* and return */ return result; }