1 |
/************************************************************************** |
/***************************************************************************** |
2 |
* |
* |
3 |
* XVID MPEG-4 VIDEO CODEC |
* XVID MPEG-4 VIDEO CODEC |
4 |
* 8x8 block-based halfpel interpolation |
* - 8x8 block-based halfpel interpolation - |
5 |
|
* |
6 |
|
* Copyright(C) 2001-2003 Peter Ross <pross@xvid.org> |
7 |
* |
* |
8 |
* This program is free software; you can redistribute it and/or modify |
* This program is free software; you can redistribute it and/or modify |
9 |
* it under the terms of the GNU General Public License as published by |
* it under the terms of the GNU General Public License as published by |
17 |
* |
* |
18 |
* You should have received a copy of the GNU General Public License |
* You should have received a copy of the GNU General Public License |
19 |
* along with this program; if not, write to the Free Software |
* along with this program; if not, write to the Free Software |
20 |
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
|
* |
|
|
*************************************************************************/ |
|
|
|
|
|
/************************************************************************** |
|
21 |
* |
* |
22 |
* History: |
* $Id: interpolate8x8.c,v 1.12 2004-03-22 22:36:23 edgomez Exp $ |
23 |
* |
* |
24 |
* 05.10.2002 new bilinear and qpel interpolation code - Isibaar |
****************************************************************************/ |
|
* 27.12.2001 modified "compensate_halfpel" |
|
|
* 05.11.2001 initial version; (c)2001 peter ross <pross@cs.rmit.edu.au> |
|
|
* |
|
|
*************************************************************************/ |
|
|
|
|
25 |
|
|
26 |
#include "../portab.h" |
#include "../portab.h" |
27 |
#include "../global.h" |
#include "../global.h" |
28 |
#include "interpolate8x8.h" |
#include "interpolate8x8.h" |
29 |
|
|
30 |
// function pointers |
/* function pointers */ |
31 |
INTERPOLATE8X8_PTR interpolate8x8_halfpel_h; |
INTERPOLATE8X8_PTR interpolate8x8_halfpel_h; |
32 |
INTERPOLATE8X8_PTR interpolate8x8_halfpel_v; |
INTERPOLATE8X8_PTR interpolate8x8_halfpel_v; |
33 |
INTERPOLATE8X8_PTR interpolate8x8_halfpel_hv; |
INTERPOLATE8X8_PTR interpolate8x8_halfpel_hv; |
93 |
} |
} |
94 |
} |
} |
95 |
|
|
96 |
// dst = interpolate(src) |
/* dst = interpolate(src) */ |
97 |
|
|
98 |
void |
void |
99 |
interpolate8x8_halfpel_h_c(uint8_t * const dst, |
interpolate8x8_halfpel_h_c(uint8_t * const dst, |
101 |
const uint32_t stride, |
const uint32_t stride, |
102 |
const uint32_t rounding) |
const uint32_t rounding) |
103 |
{ |
{ |
104 |
intptr_t j; |
uintptr_t j; |
105 |
|
|
106 |
if (rounding) |
if (rounding) |
107 |
for (j = 7*stride; j >= 0; j-=stride) |
for (j = 0; j < 8*stride; j+=stride) |
108 |
{ |
{ |
109 |
dst[j + 0] = (uint8_t)((src[j + 0] + src[j + 1] )>>1); |
dst[j + 0] = (uint8_t)((src[j + 0] + src[j + 1] )>>1); |
110 |
dst[j + 1] = (uint8_t)((src[j + 1] + src[j + 2] )>>1); |
dst[j + 1] = (uint8_t)((src[j + 1] + src[j + 2] )>>1); |
137 |
const uint32_t stride, |
const uint32_t stride, |
138 |
const uint32_t rounding) |
const uint32_t rounding) |
139 |
{ |
{ |
140 |
intptr_t j; |
uintptr_t j; |
141 |
// const uint8_t * const src2 = src+stride; /* using a second pointer is _not_ faster here */ |
|
142 |
|
|
143 |
if (rounding) |
if (rounding) |
144 |
for (j = 0; j < 8*stride; j+=stride) /* forward is better. Some automatic prefetch perhaps. */ |
for (j = 0; j < 8*stride; j+=stride) /* forward is better. Some automatic prefetch perhaps. */ |
173 |
const uint32_t stride, |
const uint32_t stride, |
174 |
const uint32_t rounding) |
const uint32_t rounding) |
175 |
{ |
{ |
176 |
intptr_t j; |
uintptr_t j; |
177 |
|
|
178 |
if (rounding) |
if (rounding) |
179 |
for (j = 7*stride; j >= 0; j-=stride) |
for (j = 0; j < 8*stride; j+=stride) |
180 |
{ |
{ |
181 |
dst[j + 0] = (uint8_t)((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +1)>>2); |
dst[j + 0] = (uint8_t)((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +1)>>2); |
182 |
dst[j + 1] = (uint8_t)((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +1)>>2); |
dst[j + 1] = (uint8_t)((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +1)>>2); |
188 |
dst[j + 7] = (uint8_t)((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +1)>>2); |
dst[j + 7] = (uint8_t)((src[j+7] + src[j+8] + src[j+stride+7] + src[j+stride+8] +1)>>2); |
189 |
} |
} |
190 |
else |
else |
191 |
for (j = 7*stride; j >= 0; j-=stride) |
for (j = 0; j < 8*stride; j+=stride) |
192 |
{ |
{ |
193 |
dst[j + 0] = (uint8_t)((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +2)>>2); |
dst[j + 0] = (uint8_t)((src[j+0] + src[j+1] + src[j+stride+0] + src[j+stride+1] +2)>>2); |
194 |
dst[j + 1] = (uint8_t)((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +2)>>2); |
dst[j + 1] = (uint8_t)((src[j+1] + src[j+2] + src[j+stride+1] + src[j+stride+2] +2)>>2); |
201 |
} |
} |
202 |
} |
} |
203 |
|
|
|
|
|
|
|
|
|
|
|
204 |
/************************************************************* |
/************************************************************* |
205 |
* QPEL STUFF STARTS HERE * |
* QPEL STUFF STARTS HERE * |
206 |
*************************************************************/ |
*************************************************************/ |