[svn] / trunk / xvidcore / src / motion / ppc_asm / sad_altivec.c Repository:
ViewVC logotype

Diff of /trunk/xvidcore/src/motion/ppc_asm/sad_altivec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 98, Wed Apr 3 14:17:05 2002 UTC revision 115, Thu Apr 11 10:18:40 2002 UTC
# Line 17  Line 17 
17      Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA      Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
18    
19    
20      $Id: sad_altivec.c,v 1.1 2002-04-03 14:17:05 canard Exp $      $Id: sad_altivec.c,v 1.2 2002-04-11 10:18:40 canard Exp $
21      $Source: /home/xvid/cvs_copy/cvs-server-root/xvid/xvidcore/src/motion/ppc_asm/sad_altivec.c,v $      $Source: /home/xvid/cvs_copy/cvs-server-root/xvid/xvidcore/src/motion/ppc_asm/sad_altivec.c,v $
22      $Date: 2002-04-03 14:17:05 $      $Date: 2002-04-11 10:18:40 $
23      $Author: canard $      $Author: canard $
24    
25  */  */
26    
27    #define G_REG
28    
29    #ifdef G_REG
30    register vector unsigned char perm0 asm ("%v29");
31    register vector unsigned char perm1 asm ("%v30");
32    register vector unsigned int zerovec asm ("%v31");
33    #endif
34    
35  #include <stdio.h>  #include <stdio.h>
36    
37  #undef DEBUG  #undef DEBUG
38    
39    static const vector unsigned char perms[2] = {
40            (vector unsigned char)( /* Used when cur is aligned */
41                    0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
42                    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17
43            ),
44            (vector unsigned char)( /* Used when cur is unaligned */
45                    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
46                    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f
47            ),
48    };
49    
50    #ifdef G_REG
51    void sadInit_altivec(void)
52    {
53            perm0 = perms[0];
54            perm1 = perms[1];
55            zerovec = (vector unsigned int)(0);
56    }
57    static inline const vector unsigned char get_perm(unsigned long i)
58    {
59            return i ? perm1 : perm0;
60    }
61    #define ZERODEF
62    #define ZEROVEC zerovec
63    #else
64    void sadInit_altivec(void) { }
65    static inline const vector unsigned char get_perm(unsigned long i)
66    {
67            return perms[i];
68    }
69    #define ZERODEF vector unsigned int zerovec = (vector unsigned int)(0)
70    #define ZEROVEC zerovec
71    #endif
72    
73    
74  #define SAD16() \  #define SAD16() \
75  t1  = vec_perm(ref[0], ref[1], perm);  /* align current vector  */ \  t1  = vec_perm(ref[0], ref[1], perm);  /* align current vector  */ \
76  t2  = vec_max(t1, *cur);         /* find largest of two           */ \  t2  = vec_max(t1, *cur);         /* find largest of two           */ \
# Line 47  Line 90 
90  {  {
91    vector unsigned char perm;    vector unsigned char perm;
92    vector unsigned char t1, t2, t3, t4 ;    vector unsigned char t1, t2, t3, t4 ;
93    vector unsigned int sad, zero;    vector unsigned int sad;
94    vector signed int sumdiffs, best_vec;    vector signed int sumdiffs, best_vec;
95    unsigned long result;    unsigned long result;
96      ZERODEF;
97    
98  #ifdef DEBUG  #ifdef DEBUG
99    if (((unsigned long)cur) & 0xf)    if (((unsigned long)cur) & 0xf)
# Line 60  Line 104 
104          fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride);          fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride);
105  #endif  #endif
106    /* initialization */    /* initialization */
107    zero = (vector unsigned int)(0);    sad  = (vector unsigned int)(ZEROVEC);
   sad  = (vector unsigned int)(0);  
108    stride >>= 4;    stride >>= 4;
109    perm = vec_lvsl(0, (unsigned char *)ref);    perm = vec_lvsl(0, (unsigned char *)ref);
110    *((unsigned long *)&best_vec) = best_sad;    *((unsigned long *)&best_vec) = best_sad;
# Line 73  Line 116 
116    SAD16();    SAD16();
117    SAD16();    SAD16();
118    /* Temp sum for exit */    /* Temp sum for exit */
119    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);    sumdiffs = vec_sums((vector signed int) sad, (vector signed int)ZEROVEC);
120    if (vec_all_ge(sumdiffs, best_vec))    if (vec_all_ge(sumdiffs, best_vec))
121          goto bail;          goto bail;
122    SAD16();    SAD16();
123    SAD16();    SAD16();
124    SAD16();    SAD16();
125    SAD16();    SAD16();
126    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);    sumdiffs = vec_sums((vector signed int) sad, (vector signed int)ZEROVEC);
127    if (vec_all_ge(sumdiffs, best_vec))    if (vec_all_ge(sumdiffs, best_vec))
128          goto bail;          goto bail;
129    SAD16();    SAD16();
# Line 93  Line 136 
136    SAD16();    SAD16();
137    
138    /* sum all parts of difference into one 32 bit quantity */    /* sum all parts of difference into one 32 bit quantity */
139    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);    sumdiffs = vec_sums((vector signed int) sad, (vector signed int)ZEROVEC);
140  bail:  bail:
141    /* copy vector sum into unaligned result */    /* copy vector sum into unaligned result */
142    sumdiffs = vec_splat( sumdiffs, 3 );    sumdiffs = vec_splat( sumdiffs, 3 );
# Line 112  Line 155 
155  sad = vec_sum4s(t5, sad);                /* accumulate sum of differences */ \  sad = vec_sum4s(t5, sad);                /* accumulate sum of differences */ \
156  cur += stride<<1; ref += stride<<1;  cur += stride<<1; ref += stride<<1;
157    
 static const vector unsigned char perms[2] = {  
         (vector unsigned char)( /* Used when cur is aligned */  
                 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,  
                 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17  
         ),  
         (vector unsigned char)( /* Used when cur is unaligned */  
                 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,  
                 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f  
         ),  
 };  
   
158  /*  /*
159   * This function assumes cur is 8 bytes aligned, stride is 16 bytes   * This function assumes cur is 8 bytes aligned, stride is 16 bytes
160   * aligned and ref is unaligned   * aligned and ref is unaligned
# Line 133  Line 165 
165                  unsigned long stride)                  unsigned long stride)
166  {  {
167    vector unsigned char t1, t2, t3, t4, t5, tp ;    vector unsigned char t1, t2, t3, t4, t5, tp ;
168    vector unsigned int sad, zero;    vector unsigned int sad;
169    vector signed int sumdiffs;    vector signed int sumdiffs;
170    vector unsigned char perm_cur;    vector unsigned char perm_cur;
171    vector unsigned char perm_ref1, perm_ref2;    vector unsigned char perm_ref1, perm_ref2;
172    unsigned long result;    unsigned long result;
173      ZERODEF;
174    
175  #ifdef DEBUG  #ifdef DEBUG
176    if (((unsigned long)cur) & 0x7)    if (((unsigned long)cur) & 0x7)
# Line 148  Line 181 
181          fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride);          fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride);
182  #endif  #endif
183    
184    perm_cur = perms[(((unsigned long)cur)>>3) & 0x01];    perm_cur = get_perm((((unsigned long)cur)>>3) & 0x01);
185    perm_ref1 = vec_lvsl(0, (unsigned char *)ref);    perm_ref1 = vec_lvsl(0, (unsigned char *)ref);
186    perm_ref2 = perms[0];    perm_ref2 = get_perm(0);
187    
188    /* initialization */    /* initialization */
189    zero = (vector unsigned int)(0);    sad  = (vector unsigned int)(ZEROVEC);
   sad  = (vector unsigned int)(0);  
190    stride >>= 4;    stride >>= 4;
191    
192    /* perform sum of differences between current and previous */    /* perform sum of differences between current and previous */
# Line 164  Line 196 
196    SAD8();    SAD8();
197    
198    /* sum all parts of difference into one 32 bit quantity */    /* sum all parts of difference into one 32 bit quantity */
199    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);    sumdiffs = vec_sums((vector signed int) sad, (vector signed int)ZEROVEC);
200    
201    /* copy vector sum into unaligned result */    /* copy vector sum into unaligned result */
202    sumdiffs = vec_splat( sumdiffs, 3 );    sumdiffs = vec_splat( sumdiffs, 3 );
# Line 188  Line 220 
220                  unsigned long stride)                  unsigned long stride)
221  {  {
222    vector unsigned char t2,t3,t4, mn;    vector unsigned char t2,t3,t4, mn;
223    vector unsigned int mean, dev, zero;    vector unsigned int mean, dev;
224    vector signed int sumdiffs;    vector signed int sumdiffs;
225    vector unsigned char c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15;    vector unsigned char c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15;
226    unsigned long result;    unsigned long result;
227      ZERODEF;
228    
229    zero = (vector unsigned int)(0);    mean = (vector unsigned int)(ZEROVEC);
230    mean = (vector unsigned int)(0);    dev = (vector unsigned int)(ZEROVEC);
   dev = (vector unsigned int)(0);  
231    stride >>= 4;    stride >>= 4;
232    
233    MEAN16(0);    MEAN16(0);
# Line 215  Line 247 
247    MEAN16(14);    MEAN16(14);
248    MEAN16(15);    MEAN16(15);
249    
250    sumdiffs = vec_sums((vector signed int) mean, (vector signed int) zero);    sumdiffs = vec_sums((vector signed int) mean, (vector signed int) ZEROVEC);
251    mn = vec_perm((vector unsigned char)sumdiffs, (vector unsigned char)sumdiffs,    mn = vec_perm((vector unsigned char)sumdiffs, (vector unsigned char)sumdiffs,
252          (vector unsigned char)(14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14));          (vector unsigned char)(14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14));
253    DEV16(0);    DEV16(0);
# Line 236  Line 268 
268    DEV16(15);    DEV16(15);
269    
270    /* sum all parts of difference into one 32 bit quantity */    /* sum all parts of difference into one 32 bit quantity */
271    sumdiffs = vec_sums((vector signed int) dev, (vector signed int) zero);    sumdiffs = vec_sums((vector signed int) dev, (vector signed int) ZEROVEC);
272    
273    /* copy vector sum into unaligned result */    /* copy vector sum into unaligned result */
274    sumdiffs = vec_splat( sumdiffs, 3 );    sumdiffs = vec_splat( sumdiffs, 3 );

Legend:
Removed from v.98  
changed lines
  Added in v.115

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4