[svn] / branches / dev-api-4 / xvidcore / src / motion / ppc_asm / sad_altivec.c Repository:
ViewVC logotype

Diff of /branches/dev-api-4/xvidcore/src/motion/ppc_asm/sad_altivec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 98, Wed Apr 3 14:17:05 2002 UTC revision 430, Fri Sep 6 16:59:47 2002 UTC
# Line 1  Line 1 
1  /*  /*****************************************************************************
2     *
3     *  XVID MPEG-4 VIDEO CODEC
4     *  - altivec sum of absolute difference (C version)
5     *
6     *  Copyright (C) 2002 Benjamin Herrenschmidt <benh@kernel.crashing.org>
7     *
8     *  This program is an implementation of a part of one or more MPEG-4
9     *  Video tools as specified in ISO/IEC 14496-2 standard.  Those intending
10     *  to use this software module in hardware or software products are
11     *  advised that its use may infringe existing patents or copyrights, and
12     *  any such use would be at such party's own risk.  The original
13     *  developer of this software module and his/her company, and subsequent
14     *  editors and their companies, will have no liability for use of this
15     *  software or modifications or derivatives thereof.
16     *
17     *  This program is free software; you can redistribute it and/or modify
18     *  it under the terms of the GNU General Public License as published by
19     *  the Free Software Foundation; either version 2 of the License, or
20     *  (at your option) any later version.
21     *
22     *  This program is distributed in the hope that it will be useful,
23     *  but WITHOUT ANY WARRANTY; without even the implied warranty of
24     *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
25     *  GNU General Public License for more details.
26     *
27     *  You should have received a copy of the GNU General Public License
28     *  along with this program; if not, write to the Free Software
29     *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
30     *
31     *  $Id: sad_altivec.c,v 1.4 2002-09-06 16:59:47 chl Exp $
32     *
33     ****************************************************************************/
34    
35    #define G_REG
36    
37    #ifdef G_REG
38    register vector unsigned char perm0 asm("%v29");
39    register vector unsigned char perm1 asm("%v30");
40    register vector unsigned int zerovec asm("%v31");
41    #endif
42    
43      Copyright (C) 2002 Benjamin Herrenschmidt <benh@kernel.crashing.org>  #include <stdio.h>
44    
45      This program is free software; you can redistribute it and/or modify  #undef DEBUG
     it under the terms of the GNU General Public License as published by  
     the Free Software Foundation; either version 2 of the License, or  
     (at your option) any later version.  
   
     This program is distributed in the hope that it will be useful,  
     but WITHOUT ANY WARRANTY; without even the implied warranty of  
     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the  
     GNU General Public License for more details.  
   
     You should have received a copy of the GNU General Public License  
     along with this program; if not, write to the Free Software  
     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA  
   
   
     $Id: sad_altivec.c,v 1.1 2002-04-03 14:17:05 canard Exp $  
     $Source: /home/xvid/cvs_copy/cvs-server-root/xvid/xvidcore/src/motion/ppc_asm/sad_altivec.c,v $  
     $Date: 2002-04-03 14:17:05 $  
     $Author: canard $  
46    
47  */  static const vector unsigned char perms[2] = {
48            (vector unsigned char) (        /* Used when cur is aligned */
49                                                               0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
50                                                               0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17),
51            (vector unsigned char) (        /* Used when cur is unaligned */
52                                                               0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
53                                                               0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f),
54    };
55    
56  #include <stdio.h>  #ifdef G_REG
57    void
58    sadInit_altivec(void)
59    {
60            perm0 = perms[0];
61            perm1 = perms[1];
62            zerovec = (vector unsigned int) (0);
63    }
64    static inline const vector unsigned char
65    get_perm(unsigned long i)
66    {
67            return i ? perm1 : perm0;
68    }
69    
70    #define ZERODEF
71    #define ZEROVEC zerovec
72    #else
73    void
74    sadInit_altivec(void)
75    {
76    }
77    static inline const vector unsigned char
78    get_perm(unsigned long i)
79    {
80            return perms[i];
81    }
82    
83    #define ZERODEF vector unsigned int zerovec = (vector unsigned int)(0)
84    #define ZEROVEC zerovec
85    #endif
86    
 #undef DEBUG  
87    
88  #define SAD16() \  #define SAD16() \
89  t1  = vec_perm(ref[0], ref[1], perm);  /* align current vector  */ \  t1  = vec_perm(ref[0], ref[1], perm);  /* align current vector  */ \
# Line 47  Line 104 
104  {  {
105    vector unsigned char perm;    vector unsigned char perm;
106    vector unsigned char t1, t2, t3, t4 ;    vector unsigned char t1, t2, t3, t4 ;
107    vector unsigned int sad, zero;          vector unsigned int sad;
108    vector signed int sumdiffs, best_vec;    vector signed int sumdiffs, best_vec;
109    unsigned long result;    unsigned long result;
110    
111            ZERODEF;
112    
113  #ifdef DEBUG  #ifdef DEBUG
114    if (((unsigned long)cur) & 0xf)    if (((unsigned long)cur) & 0xf)
115          fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur);          fprintf(stderr, "sad16_altivec:incorrect align, cur: %x\n", cur);
# Line 60  Line 119 
119          fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride);          fprintf(stderr, "sad16_altivec:incorrect align, stride: %x\n", stride);
120  #endif  #endif
121    /* initialization */    /* initialization */
122    zero = (vector unsigned int)(0);          sad = (vector unsigned int) (ZEROVEC);
   sad  = (vector unsigned int)(0);  
123    stride >>= 4;    stride >>= 4;
124    perm = vec_lvsl(0, (unsigned char *)ref);    perm = vec_lvsl(0, (unsigned char *)ref);
125    *((unsigned long *)&best_vec) = best_sad;    *((unsigned long *)&best_vec) = best_sad;
# Line 73  Line 131 
131    SAD16();    SAD16();
132    SAD16();    SAD16();
133    /* Temp sum for exit */    /* Temp sum for exit */
134    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);          sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
135    if (vec_all_ge(sumdiffs, best_vec))    if (vec_all_ge(sumdiffs, best_vec))
136          goto bail;          goto bail;
137    SAD16();    SAD16();
138    SAD16();    SAD16();
139    SAD16();    SAD16();
140    SAD16();    SAD16();
141    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);          sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
142    if (vec_all_ge(sumdiffs, best_vec))    if (vec_all_ge(sumdiffs, best_vec))
143          goto bail;          goto bail;
144    SAD16();    SAD16();
# Line 93  Line 151 
151    SAD16();    SAD16();
152    
153    /* sum all parts of difference into one 32 bit quantity */    /* sum all parts of difference into one 32 bit quantity */
154    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);          sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
155  bail:  bail:
156    /* copy vector sum into unaligned result */    /* copy vector sum into unaligned result */
157    sumdiffs = vec_splat( sumdiffs, 3 );    sumdiffs = vec_splat( sumdiffs, 3 );
# Line 112  Line 170 
170  sad = vec_sum4s(t5, sad);                /* accumulate sum of differences */ \  sad = vec_sum4s(t5, sad);                /* accumulate sum of differences */ \
171  cur += stride<<1; ref += stride<<1;  cur += stride<<1; ref += stride<<1;
172    
 static const vector unsigned char perms[2] = {  
         (vector unsigned char)( /* Used when cur is aligned */  
                 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,  
                 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17  
         ),  
         (vector unsigned char)( /* Used when cur is unaligned */  
                 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,  
                 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f  
         ),  
 };  
   
173  /*  /*
174   * This function assumes cur is 8 bytes aligned, stride is 16 bytes   * This function assumes cur is 8 bytes aligned, stride is 16 bytes
175   * aligned and ref is unaligned   * aligned and ref is unaligned
# Line 133  Line 180 
180                  unsigned long stride)                  unsigned long stride)
181  {  {
182    vector unsigned char t1, t2, t3, t4, t5, tp ;    vector unsigned char t1, t2, t3, t4, t5, tp ;
183    vector unsigned int sad, zero;          vector unsigned int sad;
184    vector signed int sumdiffs;    vector signed int sumdiffs;
185    vector unsigned char perm_cur;    vector unsigned char perm_cur;
186    vector unsigned char perm_ref1, perm_ref2;    vector unsigned char perm_ref1, perm_ref2;
187    unsigned long result;    unsigned long result;
188    
189            ZERODEF;
190    
191  #ifdef DEBUG  #ifdef DEBUG
192    if (((unsigned long)cur) & 0x7)    if (((unsigned long)cur) & 0x7)
193          fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur);          fprintf(stderr, "sad8_altivec:incorrect align, cur: %x\n", cur);
# Line 148  Line 197 
197          fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride);          fprintf(stderr, "sad8_altivec:incorrect align, stride: %x\n", stride);
198  #endif  #endif
199    
200    perm_cur = perms[(((unsigned long)cur)>>3) & 0x01];          perm_cur = get_perm((((unsigned long) cur) >> 3) & 0x01);
201    perm_ref1 = vec_lvsl(0, (unsigned char *)ref);    perm_ref1 = vec_lvsl(0, (unsigned char *)ref);
202    perm_ref2 = perms[0];          perm_ref2 = get_perm(0);
203    
204    /* initialization */    /* initialization */
205    zero = (vector unsigned int)(0);          sad = (vector unsigned int) (ZEROVEC);
   sad  = (vector unsigned int)(0);  
206    stride >>= 4;    stride >>= 4;
207    
208    /* perform sum of differences between current and previous */    /* perform sum of differences between current and previous */
# Line 164  Line 212 
212    SAD8();    SAD8();
213    
214    /* sum all parts of difference into one 32 bit quantity */    /* sum all parts of difference into one 32 bit quantity */
215    sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);          sumdiffs = vec_sums((vector signed int) sad, (vector signed int) ZEROVEC);
216    
217    /* copy vector sum into unaligned result */    /* copy vector sum into unaligned result */
218    sumdiffs = vec_splat( sumdiffs, 3 );    sumdiffs = vec_splat( sumdiffs, 3 );
# Line 188  Line 236 
236                  unsigned long stride)                  unsigned long stride)
237  {  {
238    vector unsigned char t2,t3,t4, mn;    vector unsigned char t2,t3,t4, mn;
239    vector unsigned int mean, dev, zero;          vector unsigned int mean, dev;
240    vector signed int sumdiffs;    vector signed int sumdiffs;
241    vector unsigned char c0,c1,c2,c3,c4,c5,c6,c7,c8,c9,c10,c11,c12,c13,c14,c15;          vector unsigned char c0, c1, c2, c3, c4, c5, c6, c7, c8, c9, c10, c11, c12,
242                    c13, c14, c15;
243    unsigned long result;    unsigned long result;
244    
245    zero = (vector unsigned int)(0);          ZERODEF;
246    mean = (vector unsigned int)(0);  
247    dev = (vector unsigned int)(0);          mean = (vector unsigned int) (ZEROVEC);
248            dev = (vector unsigned int) (ZEROVEC);
249    stride >>= 4;    stride >>= 4;
250    
251    MEAN16(0);    MEAN16(0);
# Line 215  Line 265 
265    MEAN16(14);    MEAN16(14);
266    MEAN16(15);    MEAN16(15);
267    
268    sumdiffs = vec_sums((vector signed int) mean, (vector signed int) zero);          sumdiffs = vec_sums((vector signed int) mean, (vector signed int) ZEROVEC);
269    mn = vec_perm((vector unsigned char)sumdiffs, (vector unsigned char)sumdiffs,          mn = vec_perm((vector unsigned char) sumdiffs,
270          (vector unsigned char)(14,14,14,14,14,14,14,14,14,14,14,14,14,14,14,14));                                    (vector unsigned char) sumdiffs, (vector unsigned char) (14,
271                                                                                                                                                       14,
272                                                                                                                                                       14,
273                                                                                                                                                       14,
274                                                                                                                                                       14,
275                                                                                                                                                       14,
276                                                                                                                                                       14,
277                                                                                                                                                       14,
278                                                                                                                                                       14,
279                                                                                                                                                       14,
280                                                                                                                                                       14,
281                                                                                                                                                       14,
282                                                                                                                                                       14,
283                                                                                                                                                       14,
284                                                                                                                                                       14,
285                                                                                                                                                       14));
286    DEV16(0);    DEV16(0);
287    DEV16(1);    DEV16(1);
288    DEV16(2);    DEV16(2);
# Line 236  Line 301 
301    DEV16(15);    DEV16(15);
302    
303    /* sum all parts of difference into one 32 bit quantity */    /* sum all parts of difference into one 32 bit quantity */
304    sumdiffs = vec_sums((vector signed int) dev, (vector signed int) zero);          sumdiffs = vec_sums((vector signed int) dev, (vector signed int) ZEROVEC);
305    
306    /* copy vector sum into unaligned result */    /* copy vector sum into unaligned result */
307    sumdiffs = vec_splat( sumdiffs, 3 );    sumdiffs = vec_splat( sumdiffs, 3 );

Legend:
Removed from v.98  
changed lines
  Added in v.430

No admin address has been configured
ViewVC Help
Powered by ViewVC 1.0.4