Parent Directory | Revision Log
Revision 1988 - (view) (download)
1 : | edgomez | 1382 | /***************************************************************************** |
2 : | * | ||
3 : | * XVID MPEG-4 VIDEO CODEC | ||
4 : | * - Motion Estimation for B-VOPs - | ||
5 : | * | ||
6 : | * Copyright(C) 2002 Christoph Lampert <gruel@web.de> | ||
7 : | Isibaar | 1909 | * 2002-2010 Michael Militzer <michael@xvid.org> |
8 : | edgomez | 1382 | * 2002-2003 Radoslaw Czyz <xvid@syskin.cjb.net> |
9 : | * | ||
10 : | * This program is free software ; you can redistribute it and/or modify | ||
11 : | * it under the terms of the GNU General Public License as published by | ||
12 : | * the Free Software Foundation ; either version 2 of the License, or | ||
13 : | * (at your option) any later version. | ||
14 : | * | ||
15 : | * This program is distributed in the hope that it will be useful, | ||
16 : | * but WITHOUT ANY WARRANTY ; without even the implied warranty of | ||
17 : | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
18 : | * GNU General Public License for more details. | ||
19 : | * | ||
20 : | * You should have received a copy of the GNU General Public License | ||
21 : | * along with this program ; if not, write to the Free Software | ||
22 : | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | ||
23 : | * | ||
24 : | Isibaar | 1988 | * $Id$ |
25 : | edgomez | 1382 | * |
26 : | ****************************************************************************/ | ||
27 : | |||
28 : | |||
29 : | #include <assert.h> | ||
30 : | #include <stdio.h> | ||
31 : | #include <stdlib.h> | ||
32 : | #include <string.h> /* memcpy */ | ||
33 : | |||
34 : | #include "../encoder.h" | ||
35 : | #include "../global.h" | ||
36 : | #include "../image/interpolate8x8.h" | ||
37 : | #include "estimation.h" | ||
38 : | #include "motion.h" | ||
39 : | #include "sad.h" | ||
40 : | #include "motion_inlines.h" | ||
41 : | |||
42 : | static int32_t | ||
43 : | ChromaSAD2(const int fx, const int fy, const int bx, const int by, | ||
44 : | SearchData * const data) | ||
45 : | { | ||
46 : | int sad; | ||
47 : | const uint32_t stride = data->iEdgedWidth/2; | ||
48 : | uint8_t *f_refu, *f_refv, *b_refu, *b_refv; | ||
49 : | syskin | 1441 | int offset, filter; |
50 : | edgomez | 1382 | |
51 : | const INTERPOLATE8X8_PTR interpolate8x8_halfpel[] = { | ||
52 : | NULL, | ||
53 : | interpolate8x8_halfpel_v, | ||
54 : | interpolate8x8_halfpel_h, | ||
55 : | interpolate8x8_halfpel_hv | ||
56 : | }; | ||
57 : | syskin | 1441 | |
58 : | syskin | 1443 | if (data->chromaX == fx && data->chromaY == fy && |
59 : | data->b_chromaX == bx && data->b_chromaY == by) | ||
60 : | return data->chromaSAD; | ||
61 : | |||
62 : | syskin | 1441 | offset = (fx>>1) + (fy>>1)*stride; |
63 : | filter = ((fx & 1) << 1) | (fy & 1); | ||
64 : | edgomez | 1382 | |
65 : | if (filter != 0) { | ||
66 : | syskin | 1475 | f_refu = data->RefQ + 64; |
67 : | f_refv = data->RefQ + 64 + 8; | ||
68 : | syskin | 1443 | if (data->chromaX != fx || data->chromaY != fy) { |
69 : | interpolate8x8_halfpel[filter](f_refu, data->RefP[4] + offset, stride, data->rounding); | ||
70 : | interpolate8x8_halfpel[filter](f_refv, data->RefP[5] + offset, stride, data->rounding); | ||
71 : | } | ||
72 : | edgomez | 1382 | } else { |
73 : | f_refu = (uint8_t*)data->RefP[4] + offset; | ||
74 : | f_refv = (uint8_t*)data->RefP[5] + offset; | ||
75 : | } | ||
76 : | syskin | 1443 | data->chromaX = fx; data->chromaY = fy; |
77 : | edgomez | 1382 | |
78 : | offset = (bx>>1) + (by>>1)*stride; | ||
79 : | filter = ((bx & 1) << 1) | (by & 1); | ||
80 : | |||
81 : | if (filter != 0) { | ||
82 : | syskin | 1475 | b_refu = data->RefQ + 64 + 16; |
83 : | b_refv = data->RefQ + 64 + 24; | ||
84 : | syskin | 1443 | if (data->b_chromaX != bx || data->b_chromaY != by) { |
85 : | interpolate8x8_halfpel[filter](b_refu, data->b_RefP[4] + offset, stride, data->rounding); | ||
86 : | interpolate8x8_halfpel[filter](b_refv, data->b_RefP[5] + offset, stride, data->rounding); | ||
87 : | } | ||
88 : | edgomez | 1382 | } else { |
89 : | b_refu = (uint8_t*)data->b_RefP[4] + offset; | ||
90 : | b_refv = (uint8_t*)data->b_RefP[5] + offset; | ||
91 : | } | ||
92 : | syskin | 1443 | data->b_chromaX = bx; data->b_chromaY = by; |
93 : | edgomez | 1382 | |
94 : | sad = sad8bi(data->CurU, b_refu, f_refu, stride); | ||
95 : | sad += sad8bi(data->CurV, b_refv, f_refv, stride); | ||
96 : | |||
97 : | syskin | 1443 | data->chromaSAD = sad; |
98 : | edgomez | 1382 | return sad; |
99 : | } | ||
100 : | |||
101 : | static void | ||
102 : | CheckCandidateInt(const int x, const int y, SearchData * const data, const unsigned int Direction) | ||
103 : | { | ||
104 : | int32_t sad, xf, yf, xb, yb, xcf, ycf, xcb, ycb; | ||
105 : | uint32_t t; | ||
106 : | syskin | 1441 | |
107 : | edgomez | 1382 | const uint8_t *ReferenceF, *ReferenceB; |
108 : | VECTOR *current; | ||
109 : | |||
110 : | if ((x > data->max_dx) || (x < data->min_dx) || | ||
111 : | (y > data->max_dy) || (y < data->min_dy)) | ||
112 : | return; | ||
113 : | |||
114 : | if (Direction == 1) { /* x and y mean forward vector */ | ||
115 : | VECTOR backward = data->qpel_precision ? data->currentQMV[1] : data->currentMV[1]; | ||
116 : | xb = backward.x; | ||
117 : | yb = backward.y; | ||
118 : | xf = x; yf = y; | ||
119 : | } else { /* x and y mean backward vector */ | ||
120 : | VECTOR forward = data->qpel_precision ? data->currentQMV[0] : data->currentMV[0]; | ||
121 : | xf = forward.x; | ||
122 : | yf = forward.y; | ||
123 : | xb = x; yb = y; | ||
124 : | } | ||
125 : | syskin | 1478 | |
126 : | edgomez | 1382 | if (!data->qpel_precision) { |
127 : | ReferenceF = GetReference(xf, yf, data); | ||
128 : | ReferenceB = GetReferenceB(xb, yb, 1, data); | ||
129 : | current = data->currentMV + Direction - 1; | ||
130 : | xcf = xf; ycf = yf; | ||
131 : | xcb = xb; ycb = yb; | ||
132 : | } else { | ||
133 : | ReferenceF = xvid_me_interpolate16x16qpel(xf, yf, 0, data); | ||
134 : | current = data->currentQMV + Direction - 1; | ||
135 : | ReferenceB = xvid_me_interpolate16x16qpel(xb, yb, 1, data); | ||
136 : | xcf = xf/2; ycf = yf/2; | ||
137 : | xcb = xb/2; ycb = yb/2; | ||
138 : | } | ||
139 : | |||
140 : | syskin | 1564 | t = d_mv_bits(xf, yf, data->predMV, data->iFcode, data->qpel^data->qpel_precision) |
141 : | + d_mv_bits(xb, yb, data->bpredMV, data->iFcode, data->qpel^data->qpel_precision); | ||
142 : | edgomez | 1382 | |
143 : | sad = sad16bi(data->Cur, ReferenceF, ReferenceB, data->iEdgedWidth); | ||
144 : | Isibaar | 1604 | sad += (data->lambda16 * t); |
145 : | edgomez | 1382 | |
146 : | if (data->chroma && sad < *data->iMinSAD) | ||
147 : | sad += ChromaSAD2((xcf >> 1) + roundtab_79[xcf & 0x3], | ||
148 : | (ycf >> 1) + roundtab_79[ycf & 0x3], | ||
149 : | (xcb >> 1) + roundtab_79[xcb & 0x3], | ||
150 : | (ycb >> 1) + roundtab_79[ycb & 0x3], data); | ||
151 : | |||
152 : | if (sad < *(data->iMinSAD)) { | ||
153 : | *data->iMinSAD = sad; | ||
154 : | current->x = x; current->y = y; | ||
155 : | data->dir = Direction; | ||
156 : | } | ||
157 : | } | ||
158 : | |||
159 : | static void | ||
160 : | CheckCandidateDirect(const int x, const int y, SearchData * const data, const unsigned int Direction) | ||
161 : | { | ||
162 : | int32_t sad = 0, xcf = 0, ycf = 0, xcb = 0, ycb = 0; | ||
163 : | uint32_t k; | ||
164 : | const uint8_t *ReferenceF; | ||
165 : | const uint8_t *ReferenceB; | ||
166 : | VECTOR mvs, b_mvs; | ||
167 : | syskin | 1478 | const int blocks[4] = {0, 8, 8*data->iEdgedWidth, 8*data->iEdgedWidth+8}; |
168 : | edgomez | 1382 | |
169 : | if (( x > 31) || ( x < -32) || ( y > 31) || (y < -32)) return; | ||
170 : | |||
171 : | for (k = 0; k < 4; k++) { | ||
172 : | mvs.x = data->directmvF[k].x + x; | ||
173 : | b_mvs.x = ((x == 0) ? | ||
174 : | data->directmvB[k].x | ||
175 : | : mvs.x - data->referencemv[k].x); | ||
176 : | |||
177 : | mvs.y = data->directmvF[k].y + y; | ||
178 : | b_mvs.y = ((y == 0) ? | ||
179 : | data->directmvB[k].y | ||
180 : | : mvs.y - data->referencemv[k].y); | ||
181 : | |||
182 : | if ((mvs.x > data->max_dx) || (mvs.x < data->min_dx) || | ||
183 : | (mvs.y > data->max_dy) || (mvs.y < data->min_dy) || | ||
184 : | (b_mvs.x > data->max_dx) || (b_mvs.x < data->min_dx) || | ||
185 : | (b_mvs.y > data->max_dy) || (b_mvs.y < data->min_dy) ) | ||
186 : | return; | ||
187 : | |||
188 : | if (data->qpel) { | ||
189 : | xcf += mvs.x/2; ycf += mvs.y/2; | ||
190 : | xcb += b_mvs.x/2; ycb += b_mvs.y/2; | ||
191 : | syskin | 1478 | if (data->qpel_precision) { |
192 : | ReferenceF = xvid_me_interpolate8x8qpel(mvs.x, mvs.y, k, 0, data); | ||
193 : | ReferenceB = xvid_me_interpolate8x8qpel(b_mvs.x, b_mvs.y, k, 1, data); | ||
194 : | goto done; | ||
195 : | } | ||
196 : | mvs.x >>=1; mvs.y >>=1; b_mvs.x >>=1; b_mvs.y >>=1; // qpel->hpel | ||
197 : | edgomez | 1382 | } else { |
198 : | xcf += mvs.x; ycf += mvs.y; | ||
199 : | xcb += b_mvs.x; ycb += b_mvs.y; | ||
200 : | } | ||
201 : | syskin | 1478 | ReferenceF = GetReference(mvs.x, mvs.y, data) + blocks[k]; |
202 : | ReferenceB = GetReferenceB(b_mvs.x, b_mvs.y, 1, data) + blocks[k]; | ||
203 : | done: | ||
204 : | syskin | 1443 | sad += data->iMinSAD[k+1] = |
205 : | syskin | 1478 | sad8bi(data->Cur + blocks[k], |
206 : | syskin | 1443 | ReferenceF, ReferenceB, data->iEdgedWidth); |
207 : | edgomez | 1382 | if (sad > *(data->iMinSAD)) return; |
208 : | } | ||
209 : | |||
210 : | Isibaar | 1604 | sad += (data->lambda16 * d_mv_bits(x, y, zeroMV, 1, 0)); |
211 : | edgomez | 1382 | |
212 : | if (data->chroma && sad < *data->iMinSAD) | ||
213 : | sad += ChromaSAD2((xcf >> 3) + roundtab_76[xcf & 0xf], | ||
214 : | (ycf >> 3) + roundtab_76[ycf & 0xf], | ||
215 : | (xcb >> 3) + roundtab_76[xcb & 0xf], | ||
216 : | (ycb >> 3) + roundtab_76[ycb & 0xf], data); | ||
217 : | |||
218 : | if (sad < *(data->iMinSAD)) { | ||
219 : | data->iMinSAD[0] = sad; | ||
220 : | data->currentMV->x = x; data->currentMV->y = y; | ||
221 : | data->dir = Direction; | ||
222 : | } | ||
223 : | } | ||
224 : | |||
225 : | static void | ||
226 : | CheckCandidateDirectno4v(const int x, const int y, SearchData * const data, const unsigned int Direction) | ||
227 : | { | ||
228 : | int32_t sad, xcf, ycf, xcb, ycb; | ||
229 : | const uint8_t *ReferenceF; | ||
230 : | const uint8_t *ReferenceB; | ||
231 : | VECTOR mvs, b_mvs; | ||
232 : | |||
233 : | if (( x > 31) || ( x < -32) || ( y > 31) || (y < -32)) return; | ||
234 : | |||
235 : | mvs.x = data->directmvF[0].x + x; | ||
236 : | b_mvs.x = ((x == 0) ? | ||
237 : | data->directmvB[0].x | ||
238 : | : mvs.x - data->referencemv[0].x); | ||
239 : | |||
240 : | mvs.y = data->directmvF[0].y + y; | ||
241 : | b_mvs.y = ((y == 0) ? | ||
242 : | data->directmvB[0].y | ||
243 : | : mvs.y - data->referencemv[0].y); | ||
244 : | |||
245 : | if ( (mvs.x > data->max_dx) || (mvs.x < data->min_dx) | ||
246 : | || (mvs.y > data->max_dy) || (mvs.y < data->min_dy) | ||
247 : | || (b_mvs.x > data->max_dx) || (b_mvs.x < data->min_dx) | ||
248 : | || (b_mvs.y > data->max_dy) || (b_mvs.y < data->min_dy) ) return; | ||
249 : | |||
250 : | if (data->qpel) { | ||
251 : | xcf = 4*(mvs.x/2); ycf = 4*(mvs.y/2); | ||
252 : | xcb = 4*(b_mvs.x/2); ycb = 4*(b_mvs.y/2); | ||
253 : | syskin | 1478 | if (data->qpel_precision) { |
254 : | ReferenceF = xvid_me_interpolate16x16qpel(mvs.x, mvs.y, 0, data); | ||
255 : | ReferenceB = xvid_me_interpolate16x16qpel(b_mvs.x, b_mvs.y, 1, data); | ||
256 : | goto done; | ||
257 : | } | ||
258 : | mvs.x >>=1; mvs.y >>=1; b_mvs.x >>=1; b_mvs.y >>=1; // qpel->hpel | ||
259 : | edgomez | 1382 | } else { |
260 : | xcf = 4*mvs.x; ycf = 4*mvs.y; | ||
261 : | xcb = 4*b_mvs.x; ycb = 4*b_mvs.y; | ||
262 : | } | ||
263 : | syskin | 1478 | ReferenceF = GetReference(mvs.x, mvs.y, data); |
264 : | ReferenceB = GetReferenceB(b_mvs.x, b_mvs.y, 1, data); | ||
265 : | edgomez | 1382 | |
266 : | syskin | 1478 | done: |
267 : | edgomez | 1382 | sad = sad16bi(data->Cur, ReferenceF, ReferenceB, data->iEdgedWidth); |
268 : | Isibaar | 1604 | sad += (data->lambda16 * d_mv_bits(x, y, zeroMV, 1, 0)); |
269 : | edgomez | 1382 | |
270 : | if (data->chroma && sad < *data->iMinSAD) | ||
271 : | sad += ChromaSAD2((xcf >> 3) + roundtab_76[xcf & 0xf], | ||
272 : | (ycf >> 3) + roundtab_76[ycf & 0xf], | ||
273 : | (xcb >> 3) + roundtab_76[xcb & 0xf], | ||
274 : | (ycb >> 3) + roundtab_76[ycb & 0xf], data); | ||
275 : | |||
276 : | if (sad < *(data->iMinSAD)) { | ||
277 : | *(data->iMinSAD) = sad; | ||
278 : | data->currentMV->x = x; data->currentMV->y = y; | ||
279 : | data->dir = Direction; | ||
280 : | } | ||
281 : | } | ||
282 : | |||
283 : | void | ||
284 : | CheckCandidate16no4v(const int x, const int y, SearchData * const data, const unsigned int Direction) | ||
285 : | { | ||
286 : | int32_t sad, xc, yc; | ||
287 : | const uint8_t * Reference; | ||
288 : | uint32_t t; | ||
289 : | VECTOR * current; | ||
290 : | |||
291 : | if ( (x > data->max_dx) || ( x < data->min_dx) | ||
292 : | || (y > data->max_dy) || (y < data->min_dy) ) return; | ||
293 : | |||
294 : | if (data->qpel_precision) { /* x and y are in 1/4 precision */ | ||
295 : | Reference = xvid_me_interpolate16x16qpel(x, y, 0, data); | ||
296 : | current = data->currentQMV; | ||
297 : | xc = x/2; yc = y/2; | ||
298 : | } else { | ||
299 : | Reference = GetReference(x, y, data); | ||
300 : | current = data->currentMV; | ||
301 : | xc = x; yc = y; | ||
302 : | } | ||
303 : | t = d_mv_bits(x, y, data->predMV, data->iFcode, | ||
304 : | syskin | 1564 | data->qpel^data->qpel_precision); |
305 : | edgomez | 1382 | |
306 : | sad = sad16(data->Cur, Reference, data->iEdgedWidth, 256*4096); | ||
307 : | Isibaar | 1604 | sad += (data->lambda16 * t); |
308 : | edgomez | 1382 | |
309 : | if (data->chroma && sad < *data->iMinSAD) | ||
310 : | sad += xvid_me_ChromaSAD((xc >> 1) + roundtab_79[xc & 0x3], | ||
311 : | (yc >> 1) + roundtab_79[yc & 0x3], data); | ||
312 : | |||
313 : | if (sad < *(data->iMinSAD)) { | ||
314 : | *(data->iMinSAD) = sad; | ||
315 : | current->x = x; current->y = y; | ||
316 : | data->dir = Direction; | ||
317 : | } | ||
318 : | } | ||
319 : | |||
320 : | syskin | 1478 | |
321 : | static void | ||
322 : | initialize_searchData(SearchData * Data_d, | ||
323 : | SearchData * Data_f, | ||
324 : | SearchData * Data_b, | ||
325 : | SearchData * Data_i, | ||
326 : | int x, int y, | ||
327 : | const IMAGE * const f_Ref, | ||
328 : | const uint8_t * const f_RefH, | ||
329 : | const uint8_t * const f_RefV, | ||
330 : | const uint8_t * const f_RefHV, | ||
331 : | const IMAGE * const b_Ref, | ||
332 : | const uint8_t * const b_RefH, | ||
333 : | const uint8_t * const b_RefV, | ||
334 : | const uint8_t * const b_RefHV, | ||
335 : | const IMAGE * const pCur, | ||
336 : | const MACROBLOCK * const b_mb) | ||
337 : | { | ||
338 : | |||
339 : | /* per-macroblock SearchData initialization - too many things would be repeated 4 times */ | ||
340 : | const uint8_t * RefP[6], * b_RefP[6], * Cur[3]; | ||
341 : | const uint32_t iEdgedWidth = Data_d->iEdgedWidth; | ||
342 : | unsigned int lambda; | ||
343 : | int i; | ||
344 : | |||
345 : | /* luma */ | ||
346 : | int offset = (x + iEdgedWidth*y) * 16; | ||
347 : | RefP[0] = f_Ref->y + offset; | ||
348 : | RefP[2] = f_RefH + offset; | ||
349 : | RefP[1] = f_RefV + offset; | ||
350 : | RefP[3] = f_RefHV + offset; | ||
351 : | b_RefP[0] = b_Ref->y + offset; | ||
352 : | b_RefP[2] = b_RefH + offset; | ||
353 : | b_RefP[1] = b_RefV + offset; | ||
354 : | b_RefP[3] = b_RefHV + offset; | ||
355 : | Cur[0] = pCur->y + offset; | ||
356 : | |||
357 : | /* chroma */ | ||
358 : | offset = (x + (iEdgedWidth/2)*y) * 8; | ||
359 : | RefP[4] = f_Ref->u + offset; | ||
360 : | RefP[5] = f_Ref->v + offset; | ||
361 : | b_RefP[4] = b_Ref->u + offset; | ||
362 : | b_RefP[5] = b_Ref->v + offset; | ||
363 : | Cur[1] = pCur->u + offset; | ||
364 : | Cur[2] = pCur->v + offset; | ||
365 : | |||
366 : | lambda = xvid_me_lambda_vec16[b_mb->quant]; | ||
367 : | |||
368 : | for (i = 0; i < 6; i++) { | ||
369 : | Data_d->RefP[i] = Data_f->RefP[i] = Data_i->RefP[i] = RefP[i]; | ||
370 : | Data_d->b_RefP[i] = Data_b->RefP[i] = Data_i->b_RefP[i] = b_RefP[i]; | ||
371 : | } | ||
372 : | Data_d->Cur = Data_f->Cur = Data_b->Cur = Data_i->Cur = Cur[0]; | ||
373 : | Data_d->CurU = Data_f->CurU = Data_b->CurU = Data_i->CurU = Cur[1]; | ||
374 : | Data_d->CurV = Data_f->CurV = Data_b->CurV = Data_i->CurV = Cur[2]; | ||
375 : | |||
376 : | syskin | 1515 | Data_d->lambda16 = Data_f->lambda16 = Data_b->lambda16 = Data_i->lambda16 = lambda; |
377 : | syskin | 1478 | |
378 : | /* reset chroma-sad cache */ | ||
379 : | Data_d->b_chromaX = Data_d->b_chromaY = Data_d->chromaX = Data_d->chromaY = Data_d->chromaSAD = 256*4096; | ||
380 : | Data_i->b_chromaX = Data_i->b_chromaY = Data_i->chromaX = Data_i->chromaY = Data_i->chromaSAD = 256*4096; | ||
381 : | Data_f->chromaX = Data_f->chromaY = Data_f->chromaSAD = 256*4096; | ||
382 : | Data_b->chromaX = Data_b->chromaY = Data_b->chromaSAD = 256*4096; | ||
383 : | |||
384 : | *Data_d->iMinSAD = *Data_b->iMinSAD = *Data_f->iMinSAD = *Data_i->iMinSAD = 4096*256; | ||
385 : | } | ||
386 : | |||
387 : | edgomez | 1382 | static __inline VECTOR |
388 : | ChoosePred(const MACROBLOCK * const pMB, const uint32_t mode) | ||
389 : | { | ||
390 : | /* the stupidiest function ever */ | ||
391 : | return (mode == MODE_FORWARD ? pMB->mvs[0] : pMB->b_mvs[0]); | ||
392 : | } | ||
393 : | |||
394 : | static void __inline | ||
395 : | PreparePredictionsBF(VECTOR * const pmv, const int x, const int y, | ||
396 : | const uint32_t iWcount, | ||
397 : | const MACROBLOCK * const pMB, | ||
398 : | syskin | 1478 | const uint32_t mode_curr, |
399 : | Isibaar | 1931 | const VECTOR hint, const int bound) |
400 : | edgomez | 1382 | { |
401 : | Isibaar | 1931 | int lx, ly; /* left */ |
402 : | int tx, ty; /* top */ | ||
403 : | int rtx, rty; /* top-right */ | ||
404 : | int ltx, lty; /* top-left */ | ||
405 : | int lpos, tpos, rtpos, ltpos; | ||
406 : | |||
407 : | lx = x - 1; ly = y; | ||
408 : | tx = x; ty = y - 1; | ||
409 : | rtx = x + 1; rty = y - 1; | ||
410 : | ltx = x - 1; lty = y - 1; | ||
411 : | |||
412 : | lpos = lx + ly * iWcount; | ||
413 : | rtpos = rtx + rty * iWcount; | ||
414 : | tpos = tx + ty * iWcount; | ||
415 : | ltpos = ltx + lty * iWcount; | ||
416 : | |||
417 : | |||
418 : | edgomez | 1382 | /* [0] is prediction */ |
419 : | syskin | 1515 | /* [1] is zero */ |
420 : | pmv[1].x = pmv[1].y = 0; | ||
421 : | edgomez | 1382 | |
422 : | syskin | 1478 | pmv[2].x = hint.x; pmv[2].y = hint.y; |
423 : | edgomez | 1382 | |
424 : | Isibaar | 1931 | if (rtpos >= bound && rtx < (int)iWcount) { /* [3] top-right neighbour */ |
425 : | edgomez | 1382 | pmv[3] = ChoosePred(pMB+1-iWcount, mode_curr); |
426 : | } else pmv[3].x = pmv[3].y = 0; | ||
427 : | |||
428 : | Isibaar | 1931 | if (tpos >= bound) { |
429 : | pmv[4] = ChoosePred(pMB-iWcount, mode_curr); /* [4] top */ | ||
430 : | edgomez | 1382 | } else pmv[4].x = pmv[4].y = 0; |
431 : | |||
432 : | Isibaar | 1931 | if (lpos >= bound && lx >= 0) { |
433 : | pmv[5] = ChoosePred(pMB-1, mode_curr); /* [5] left */ | ||
434 : | edgomez | 1382 | } else pmv[5].x = pmv[5].y = 0; |
435 : | |||
436 : | Isibaar | 1931 | if (ltpos >= bound && ltx >= 0) { |
437 : | pmv[6] = ChoosePred(pMB-1-iWcount, mode_curr); /* [6] top-left */ | ||
438 : | edgomez | 1382 | } else pmv[6].x = pmv[6].y = 0; |
439 : | } | ||
440 : | |||
441 : | /* search backward or forward */ | ||
442 : | static void | ||
443 : | syskin | 1478 | SearchBF_initial(const int x, const int y, |
444 : | edgomez | 1382 | const uint32_t MotionFlags, |
445 : | const uint32_t iFcode, | ||
446 : | const MBParam * const pParam, | ||
447 : | MACROBLOCK * const pMB, | ||
448 : | const VECTOR * const predMV, | ||
449 : | int32_t * const best_sad, | ||
450 : | const int32_t mode_current, | ||
451 : | syskin | 1478 | SearchData * const Data, |
452 : | Isibaar | 1931 | VECTOR hint, const int bound) |
453 : | edgomez | 1382 | { |
454 : | |||
455 : | int i; | ||
456 : | VECTOR pmv[7]; | ||
457 : | *Data->iMinSAD = MV_MAX_ERROR; | ||
458 : | Data->qpel_precision = 0; | ||
459 : | |||
460 : | Data->predMV = *predMV; | ||
461 : | |||
462 : | get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 4, | ||
463 : | syskin | 1564 | pParam->width, pParam->height, iFcode - Data->qpel, 1); |
464 : | edgomez | 1382 | |
465 : | pmv[0] = Data->predMV; | ||
466 : | syskin | 1478 | if (Data->qpel) { |
467 : | pmv[0].x /= 2; pmv[0].y /= 2; | ||
468 : | hint.x /= 2; hint.y /= 2; | ||
469 : | } | ||
470 : | edgomez | 1382 | |
471 : | Isibaar | 1931 | PreparePredictionsBF(pmv, x, y, pParam->mb_width, pMB, mode_current, hint, bound); |
472 : | edgomez | 1382 | |
473 : | Data->currentMV->x = Data->currentMV->y = 0; | ||
474 : | |||
475 : | /* main loop. checking all predictions */ | ||
476 : | for (i = 0; i < 7; i++) | ||
477 : | if (!vector_repeats(pmv, i) ) | ||
478 : | CheckCandidate16no4v(pmv[i].x, pmv[i].y, Data, i); | ||
479 : | |||
480 : | if (*Data->iMinSAD > 512) { | ||
481 : | unsigned int mask = make_mask(pmv, 7, Data->dir); | ||
482 : | |||
483 : | MainSearchFunc *MainSearchPtr; | ||
484 : | if (MotionFlags & XVID_ME_USESQUARES16) MainSearchPtr = xvid_me_SquareSearch; | ||
485 : | else if (MotionFlags & XVID_ME_ADVANCEDDIAMOND16) MainSearchPtr = xvid_me_AdvDiamondSearch; | ||
486 : | else MainSearchPtr = xvid_me_DiamondSearch; | ||
487 : | |||
488 : | MainSearchPtr(Data->currentMV->x, Data->currentMV->y, Data, mask, CheckCandidate16no4v); | ||
489 : | } | ||
490 : | |||
491 : | syskin | 1478 | if (Data->iMinSAD[0] < *best_sad) *best_sad = Data->iMinSAD[0]; |
492 : | } | ||
493 : | edgomez | 1382 | |
494 : | syskin | 1478 | static void |
495 : | SearchBF_final(const int x, const int y, | ||
496 : | const uint32_t MotionFlags, | ||
497 : | const MBParam * const pParam, | ||
498 : | int32_t * const best_sad, | ||
499 : | SearchData * const Data) | ||
500 : | { | ||
501 : | syskin | 1441 | if(!Data->qpel) { |
502 : | /* halfpel mode */ | ||
503 : | if (MotionFlags & XVID_ME_HALFPELREFINE16) | ||
504 : | syskin | 1478 | xvid_me_SubpelRefine(Data->currentMV[0], Data, CheckCandidate16no4v, 0); |
505 : | syskin | 1441 | } else { |
506 : | /* qpel mode */ | ||
507 : | if(MotionFlags & XVID_ME_FASTREFINE16) { | ||
508 : | /* fast */ | ||
509 : | get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 4, | ||
510 : | syskin | 1564 | pParam->width, pParam->height, Data->iFcode, 2); |
511 : | syskin | 1478 | FullRefine_Fast(Data, CheckCandidate16no4v, 0); |
512 : | |||
513 : | syskin | 1441 | } else { |
514 : | syskin | 1478 | |
515 : | syskin | 1441 | Data->currentQMV->x = 2*Data->currentMV->x; |
516 : | Data->currentQMV->y = 2*Data->currentMV->y; | ||
517 : | if(MotionFlags & XVID_ME_QUARTERPELREFINE16) { | ||
518 : | /* full */ | ||
519 : | if (MotionFlags & XVID_ME_HALFPELREFINE16) { | ||
520 : | syskin | 1478 | xvid_me_SubpelRefine(Data->currentMV[0], Data, CheckCandidate16no4v, 0); /* hpel part */ |
521 : | syskin | 1441 | Data->currentQMV->x = 2*Data->currentMV->x; |
522 : | Data->currentQMV->y = 2*Data->currentMV->y; | ||
523 : | } | ||
524 : | get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, x, y, 4, | ||
525 : | syskin | 1564 | pParam->width, pParam->height, Data->iFcode, 2); |
526 : | syskin | 1441 | Data->qpel_precision = 1; |
527 : | syskin | 1478 | xvid_me_SubpelRefine(Data->currentQMV[0], Data, CheckCandidate16no4v, 0); /* qpel part */ |
528 : | syskin | 1441 | } |
529 : | edgomez | 1382 | } |
530 : | } | ||
531 : | syskin | 1478 | if (Data->iMinSAD[0] < *best_sad) *best_sad = Data->iMinSAD[0]; |
532 : | edgomez | 1382 | |
533 : | } | ||
534 : | |||
535 : | static void | ||
536 : | syskin | 1478 | SkipDecisionB(MACROBLOCK * const pMB, const SearchData * const Data) |
537 : | edgomez | 1382 | { |
538 : | int k; | ||
539 : | |||
540 : | if (!Data->chroma) { | ||
541 : | int dx = 0, dy = 0, b_dx = 0, b_dy = 0; | ||
542 : | int32_t sum; | ||
543 : | const uint32_t stride = Data->iEdgedWidth/2; | ||
544 : | /* this is not full chroma compensation, only it's fullpel approximation. should work though */ | ||
545 : | |||
546 : | for (k = 0; k < 4; k++) { | ||
547 : | dy += Data->directmvF[k].y >> Data->qpel; | ||
548 : | dx += Data->directmvF[k].x >> Data->qpel; | ||
549 : | b_dy += Data->directmvB[k].y >> Data->qpel; | ||
550 : | b_dx += Data->directmvB[k].x >> Data->qpel; | ||
551 : | } | ||
552 : | |||
553 : | dy = (dy >> 3) + roundtab_76[dy & 0xf]; | ||
554 : | dx = (dx >> 3) + roundtab_76[dx & 0xf]; | ||
555 : | b_dy = (b_dy >> 3) + roundtab_76[b_dy & 0xf]; | ||
556 : | b_dx = (b_dx >> 3) + roundtab_76[b_dx & 0xf]; | ||
557 : | |||
558 : | syskin | 1478 | sum = sad8bi(Data->CurU, |
559 : | edgomez | 1556 | Data->RefP[4] + (dy/2) * (int)stride + dx/2, |
560 : | Data->b_RefP[4] + (b_dy/2) * (int)stride + b_dx/2, | ||
561 : | edgomez | 1382 | stride); |
562 : | |||
563 : | if (sum >= MAX_CHROMA_SAD_FOR_SKIP * (int)Data->iQuant) return; /* no skip */ | ||
564 : | |||
565 : | syskin | 1478 | sum += sad8bi(Data->CurV, |
566 : | edgomez | 1556 | Data->RefP[5] + (dy/2) * (int)stride + dx/2, |
567 : | Data->b_RefP[5] + (b_dy/2) * (int)stride + b_dx/2, | ||
568 : | edgomez | 1382 | stride); |
569 : | syskin | 1481 | |
570 : | edgomez | 1382 | if (sum >= MAX_CHROMA_SAD_FOR_SKIP * (int)Data->iQuant) return; /* no skip */ |
571 : | syskin | 1481 | } else { |
572 : | int sum = Data->chromaSAD; /* chroma-sad SAD caching keeps it there */ | ||
573 : | |||
574 : | if (sum >= MAX_CHROMA_SAD_FOR_SKIP * (int)Data->iQuant) return; /* no skip */ | ||
575 : | edgomez | 1382 | } |
576 : | |||
577 : | /* skip */ | ||
578 : | pMB->mode = MODE_DIRECT_NONE_MV; /* skipped */ | ||
579 : | for (k = 0; k < 4; k++) { | ||
580 : | pMB->qmvs[k] = pMB->mvs[k] = Data->directmvF[k]; | ||
581 : | pMB->b_qmvs[k] = pMB->b_mvs[k] = Data->directmvB[k]; | ||
582 : | syskin | 1517 | if (Data->qpel) { |
583 : | pMB->mvs[k].x /= 2; pMB->mvs[k].y /= 2; /* it's a hint for future searches */ | ||
584 : | pMB->b_mvs[k].x /= 2; pMB->b_mvs[k].y /= 2; | ||
585 : | } | ||
586 : | edgomez | 1382 | } |
587 : | } | ||
588 : | |||
589 : | static uint32_t | ||
590 : | syskin | 1478 | SearchDirect_initial(const int x, const int y, |
591 : | edgomez | 1382 | const uint32_t MotionFlags, |
592 : | const int32_t TRB, const int32_t TRD, | ||
593 : | const MBParam * const pParam, | ||
594 : | MACROBLOCK * const pMB, | ||
595 : | const MACROBLOCK * const b_mb, | ||
596 : | int32_t * const best_sad, | ||
597 : | SearchData * const Data) | ||
598 : | |||
599 : | { | ||
600 : | int32_t skip_sad; | ||
601 : | int k = (x + Data->iEdgedWidth*y) * 16; | ||
602 : | |||
603 : | k = Data->qpel ? 4 : 2; | ||
604 : | Data->max_dx = k * (pParam->width - x * 16); | ||
605 : | Data->max_dy = k * (pParam->height - y * 16); | ||
606 : | Data->min_dx = -k * (16 + x * 16); | ||
607 : | Data->min_dy = -k * (16 + y * 16); | ||
608 : | |||
609 : | Data->referencemv = Data->qpel ? b_mb->qmvs : b_mb->mvs; | ||
610 : | |||
611 : | for (k = 0; k < 4; k++) { | ||
612 : | syskin | 1478 | Data->directmvF[k].x = ((TRB * Data->referencemv[k].x) / TRD); |
613 : | Data->directmvB[k].x = ((TRB - TRD) * Data->referencemv[k].x) / TRD; | ||
614 : | Data->directmvF[k].y = ((TRB * Data->referencemv[k].y) / TRD); | ||
615 : | Data->directmvB[k].y = ((TRB - TRD) * Data->referencemv[k].y) / TRD; | ||
616 : | edgomez | 1382 | |
617 : | syskin | 1478 | if ( (Data->directmvB[k].x > Data->max_dx) | (Data->directmvB[k].x < Data->min_dx) |
618 : | | (Data->directmvB[k].y > Data->max_dy) | (Data->directmvB[k].y < Data->min_dy) ) { | ||
619 : | edgomez | 1382 | |
620 : | syskin | 1478 | Data->iMinSAD[0] = *best_sad = 256*4096; /* in that case, we won't use direct mode */ |
621 : | edgomez | 1382 | return 256*4096; |
622 : | } | ||
623 : | if (b_mb->mode != MODE_INTER4V) { | ||
624 : | Data->directmvF[1] = Data->directmvF[2] = Data->directmvF[3] = Data->directmvF[0]; | ||
625 : | Data->directmvB[1] = Data->directmvB[2] = Data->directmvB[3] = Data->directmvB[0]; | ||
626 : | break; | ||
627 : | } | ||
628 : | } | ||
629 : | syskin | 1478 | Data->qpel_precision = Data->qpel; /* this initial check is done with full precision, to find real |
630 : | SKIP sad */ | ||
631 : | edgomez | 1382 | |
632 : | syskin | 1443 | CheckCandidateDirect(0, 0, Data, 255); /* will also fill iMinSAD[1..4] with 8x8 SADs */ |
633 : | edgomez | 1382 | |
634 : | /* initial (fast) skip decision */ | ||
635 : | syskin | 1443 | if (Data->iMinSAD[1] < (int)Data->iQuant * INITIAL_SKIP_THRESH |
636 : | && Data->iMinSAD[2] < (int)Data->iQuant * INITIAL_SKIP_THRESH | ||
637 : | && Data->iMinSAD[3] < (int)Data->iQuant * INITIAL_SKIP_THRESH | ||
638 : | && Data->iMinSAD[4] < (int)Data->iQuant * INITIAL_SKIP_THRESH) { | ||
639 : | edgomez | 1382 | /* possible skip */ |
640 : | syskin | 1478 | SkipDecisionB(pMB, Data); |
641 : | syskin | 1443 | if (pMB->mode == MODE_DIRECT_NONE_MV) |
642 : | return *Data->iMinSAD; /* skipped */ | ||
643 : | edgomez | 1382 | } |
644 : | |||
645 : | syskin | 1516 | if (Data->chroma && Data->chromaSAD >= MAX_CHROMA_SAD_FOR_SKIP * (int)Data->iQuant) /* chroma doesn't allow skip */ |
646 : | skip_sad = 256*4096; | ||
647 : | else | ||
648 : | skip_sad = 4*MAX(MAX(Data->iMinSAD[1],Data->iMinSAD[2]), MAX(Data->iMinSAD[3],Data->iMinSAD[4])); | ||
649 : | edgomez | 1382 | |
650 : | syskin | 1478 | Data->currentMV[1].x = Data->directmvF[0].x + Data->currentMV->x; /* hints for forward and backward searches */ |
651 : | Data->currentMV[1].y = Data->directmvF[0].y + Data->currentMV->y; | ||
652 : | syskin | 1443 | |
653 : | syskin | 1478 | Data->currentMV[2].x = ((Data->currentMV->x == 0) ? |
654 : | Data->directmvB[0].x | ||
655 : | : Data->currentMV[1].x - Data->referencemv[0].x); | ||
656 : | edgomez | 1382 | |
657 : | syskin | 1478 | Data->currentMV[2].y = ((Data->currentMV->y == 0) ? |
658 : | Data->directmvB[0].y | ||
659 : | : Data->currentMV[1].y - Data->referencemv[0].y); | ||
660 : | edgomez | 1382 | |
661 : | syskin | 1548 | *best_sad = Data->iMinSAD[0]; |
662 : | |||
663 : | syskin | 1478 | return skip_sad; |
664 : | } | ||
665 : | edgomez | 1382 | |
666 : | syskin | 1478 | static void |
667 : | SearchDirect_final( const uint32_t MotionFlags, | ||
668 : | const MACROBLOCK * const b_mb, | ||
669 : | int32_t * const best_sad, | ||
670 : | SearchData * const Data) | ||
671 : | edgomez | 1382 | |
672 : | syskin | 1478 | { |
673 : | CheckFunc * CheckCandidate = b_mb->mode == MODE_INTER4V ? | ||
674 : | CheckCandidateDirect : CheckCandidateDirectno4v; | ||
675 : | MainSearchFunc *MainSearchPtr; | ||
676 : | edgomez | 1382 | |
677 : | syskin | 1478 | if (MotionFlags & XVID_ME_USESQUARES16) MainSearchPtr = xvid_me_SquareSearch; |
678 : | else if (MotionFlags & XVID_ME_ADVANCEDDIAMOND16) MainSearchPtr = xvid_me_AdvDiamondSearch; | ||
679 : | else MainSearchPtr = xvid_me_DiamondSearch; | ||
680 : | edgomez | 1382 | |
681 : | syskin | 1478 | Data->qpel_precision = 0; |
682 : | MainSearchPtr(0, 0, Data, 255, CheckCandidate); | ||
683 : | edgomez | 1382 | |
684 : | syskin | 1478 | Data->qpel_precision = Data->qpel; |
685 : | if(Data->qpel) { | ||
686 : | *Data->iMinSAD = 256*4096; /* this old SAD was not real, it was in hpel precision */ | ||
687 : | CheckCandidate(Data->currentMV->x, Data->currentMV->y, Data, 255); | ||
688 : | edgomez | 1382 | } |
689 : | syskin | 1478 | |
690 : | xvid_me_SubpelRefine(Data->currentMV[0], Data, CheckCandidate, 0); | ||
691 : | |||
692 : | if (Data->iMinSAD[0] < *best_sad) { | ||
693 : | *best_sad = Data->iMinSAD[0]; | ||
694 : | } | ||
695 : | |||
696 : | edgomez | 1382 | } |
697 : | |||
698 : | |||
699 : | syskin | 1478 | static __inline void |
700 : | set_range(int * range, SearchData * Data) | ||
701 : | edgomez | 1382 | { |
702 : | Data->min_dx = range[0]; | ||
703 : | Data->max_dx = range[1]; | ||
704 : | Data->min_dy = range[2]; | ||
705 : | Data->max_dy = range[3]; | ||
706 : | } | ||
707 : | |||
708 : | static void | ||
709 : | syskin | 1478 | SearchInterpolate_initial( |
710 : | edgomez | 1382 | const int x, const int y, |
711 : | const uint32_t MotionFlags, | ||
712 : | const MBParam * const pParam, | ||
713 : | const VECTOR * const f_predMV, | ||
714 : | const VECTOR * const b_predMV, | ||
715 : | int32_t * const best_sad, | ||
716 : | syskin | 1478 | SearchData * const Data, |
717 : | const VECTOR startF, | ||
718 : | const VECTOR startB) | ||
719 : | edgomez | 1382 | |
720 : | { | ||
721 : | int b_range[4], f_range[4]; | ||
722 : | |||
723 : | Data->qpel_precision = 0; | ||
724 : | |||
725 : | Data->predMV = *f_predMV; | ||
726 : | Data->bpredMV = *b_predMV; | ||
727 : | |||
728 : | syskin | 1478 | Data->currentMV[0] = startF; |
729 : | Data->currentMV[1] = startB; | ||
730 : | edgomez | 1382 | |
731 : | syskin | 1564 | get_range(f_range, f_range+1, f_range+2, f_range+3, x, y, 4, pParam->width, pParam->height, Data->iFcode - Data->qpel, 1); |
732 : | get_range(b_range, b_range+1, b_range+2, b_range+3, x, y, 4, pParam->width, pParam->height, Data->bFcode - Data->qpel, 1); | ||
733 : | edgomez | 1382 | |
734 : | if (Data->currentMV[0].x > f_range[1]) Data->currentMV[0].x = f_range[1]; | ||
735 : | if (Data->currentMV[0].x < f_range[0]) Data->currentMV[0].x = f_range[0]; | ||
736 : | if (Data->currentMV[0].y > f_range[3]) Data->currentMV[0].y = f_range[3]; | ||
737 : | if (Data->currentMV[0].y < f_range[2]) Data->currentMV[0].y = f_range[2]; | ||
738 : | |||
739 : | if (Data->currentMV[1].x > b_range[1]) Data->currentMV[1].x = b_range[1]; | ||
740 : | if (Data->currentMV[1].x < b_range[0]) Data->currentMV[1].x = b_range[0]; | ||
741 : | if (Data->currentMV[1].y > b_range[3]) Data->currentMV[1].y = b_range[3]; | ||
742 : | if (Data->currentMV[1].y < b_range[2]) Data->currentMV[1].y = b_range[2]; | ||
743 : | |||
744 : | set_range(f_range, Data); | ||
745 : | |||
746 : | CheckCandidateInt(Data->currentMV[0].x, Data->currentMV[0].y, Data, 1); | ||
747 : | |||
748 : | syskin | 1478 | if (Data->iMinSAD[0] < *best_sad) *best_sad = Data->iMinSAD[0]; |
749 : | } | ||
750 : | |||
751 : | static void | ||
752 : | SearchInterpolate_final(const int x, const int y, | ||
753 : | const uint32_t MotionFlags, | ||
754 : | const MBParam * const pParam, | ||
755 : | int32_t * const best_sad, | ||
756 : | SearchData * const Data) | ||
757 : | { | ||
758 : | int i, j; | ||
759 : | int b_range[4], f_range[4]; | ||
760 : | |||
761 : | syskin | 1564 | get_range(f_range, f_range+1, f_range+2, f_range+3, x, y, 4, pParam->width, pParam->height, Data->iFcode - Data->qpel, 1); |
762 : | get_range(b_range, b_range+1, b_range+2, b_range+3, x, y, 4, pParam->width, pParam->height, Data->bFcode - Data->qpel, 1); | ||
763 : | syskin | 1478 | |
764 : | edgomez | 1382 | /* diamond */ |
765 : | do { | ||
766 : | Data->dir = 0; | ||
767 : | /* forward MV moves */ | ||
768 : | i = Data->currentMV[0].x; j = Data->currentMV[0].y; | ||
769 : | |||
770 : | CheckCandidateInt(i + 1, j, Data, 1); | ||
771 : | CheckCandidateInt(i, j + 1, Data, 1); | ||
772 : | CheckCandidateInt(i - 1, j, Data, 1); | ||
773 : | CheckCandidateInt(i, j - 1, Data, 1); | ||
774 : | |||
775 : | /* backward MV moves */ | ||
776 : | set_range(b_range, Data); | ||
777 : | i = Data->currentMV[1].x; j = Data->currentMV[1].y; | ||
778 : | |||
779 : | CheckCandidateInt(i + 1, j, Data, 2); | ||
780 : | CheckCandidateInt(i, j + 1, Data, 2); | ||
781 : | CheckCandidateInt(i - 1, j, Data, 2); | ||
782 : | CheckCandidateInt(i, j - 1, Data, 2); | ||
783 : | |||
784 : | set_range(f_range, Data); | ||
785 : | |||
786 : | } while (Data->dir != 0); | ||
787 : | |||
788 : | /* qpel refinement */ | ||
789 : | if (Data->qpel) { | ||
790 : | Data->qpel_precision = 1; | ||
791 : | syskin | 1478 | get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, |
792 : | syskin | 1564 | x, y, 4, pParam->width, pParam->height, Data->iFcode, 2); |
793 : | edgomez | 1382 | |
794 : | Data->currentQMV[0].x = 2 * Data->currentMV[0].x; | ||
795 : | Data->currentQMV[0].y = 2 * Data->currentMV[0].y; | ||
796 : | Data->currentQMV[1].x = 2 * Data->currentMV[1].x; | ||
797 : | Data->currentQMV[1].y = 2 * Data->currentMV[1].y; | ||
798 : | |||
799 : | syskin | 1478 | if (MotionFlags & XVID_ME_QUARTERPELREFINE16) { |
800 : | xvid_me_SubpelRefine(Data->currentQMV[0], Data, CheckCandidateInt, 1); | ||
801 : | edgomez | 1382 | |
802 : | syskin | 1478 | get_range(&Data->min_dx, &Data->max_dx, &Data->min_dy, &Data->max_dy, |
803 : | syskin | 1564 | x, y, 4, pParam->width, pParam->height, Data->bFcode, 2); |
804 : | edgomez | 1382 | |
805 : | syskin | 1478 | xvid_me_SubpelRefine(Data->currentQMV[1], Data, CheckCandidateInt, 2); |
806 : | } | ||
807 : | edgomez | 1382 | } |
808 : | |||
809 : | syskin | 1478 | if (Data->iMinSAD[0] < *best_sad) *best_sad = Data->iMinSAD[0]; |
810 : | } | ||
811 : | edgomez | 1382 | |
812 : | syskin | 1478 | static void |
813 : | ModeDecision_BVOP_SAD(const SearchData * const Data_d, | ||
814 : | const SearchData * const Data_b, | ||
815 : | const SearchData * const Data_f, | ||
816 : | const SearchData * const Data_i, | ||
817 : | MACROBLOCK * const pMB, | ||
818 : | const MACROBLOCK * const b_mb, | ||
819 : | VECTOR * f_predMV, | ||
820 : | Isibaar | 1919 | VECTOR * b_predMV, |
821 : | int force_direct) | ||
822 : | syskin | 1478 | { |
823 : | int mode = MODE_DIRECT, k; | ||
824 : | int best_sad, f_sad, b_sad, i_sad; | ||
825 : | const int qpel = Data_d->qpel; | ||
826 : | |||
827 : | /* evaluate cost of all modes - quite simple in SAD */ | ||
828 : | best_sad = Data_d->iMinSAD[0] + 1*Data_d->lambda16; | ||
829 : | b_sad = Data_b->iMinSAD[0] + 3*Data_d->lambda16; | ||
830 : | f_sad = Data_f->iMinSAD[0] + 4*Data_d->lambda16; | ||
831 : | i_sad = Data_i->iMinSAD[0] + 2*Data_d->lambda16; | ||
832 : | |||
833 : | Isibaar | 1919 | if (force_direct) |
834 : | goto set_mode; /* bypass checks for non-direct modes */ | ||
835 : | |||
836 : | syskin | 1478 | if (b_sad < best_sad) { |
837 : | mode = MODE_BACKWARD; | ||
838 : | best_sad = b_sad; | ||
839 : | } | ||
840 : | |||
841 : | if (f_sad < best_sad) { | ||
842 : | mode = MODE_FORWARD; | ||
843 : | best_sad = f_sad; | ||
844 : | } | ||
845 : | |||
846 : | if (i_sad < best_sad) { | ||
847 : | mode = MODE_INTERPOLATE; | ||
848 : | best_sad = i_sad; | ||
849 : | } | ||
850 : | |||
851 : | Isibaar | 1919 | set_mode: |
852 : | syskin | 1478 | pMB->sad16 = best_sad; |
853 : | pMB->mode = mode; | ||
854 : | syskin | 1568 | pMB->cbp = 63; |
855 : | syskin | 1478 | |
856 : | switch (mode) { | ||
857 : | |||
858 : | case MODE_DIRECT: | ||
859 : | if (!qpel && b_mb->mode != MODE_INTER4V) pMB->mode = MODE_DIRECT_NO4V; /* for faster compensation */ | ||
860 : | |||
861 : | pMB->pmvs[3] = Data_d->currentMV[0]; | ||
862 : | |||
863 : | for (k = 0; k < 4; k++) { | ||
864 : | pMB->mvs[k].x = Data_d->directmvF[k].x + Data_d->currentMV->x; | ||
865 : | pMB->b_mvs[k].x = ( (Data_d->currentMV->x == 0) | ||
866 : | ? Data_d->directmvB[k].x | ||
867 : | :pMB->mvs[k].x - Data_d->referencemv[k].x); | ||
868 : | pMB->mvs[k].y = (Data_d->directmvF[k].y + Data_d->currentMV->y); | ||
869 : | pMB->b_mvs[k].y = ((Data_d->currentMV->y == 0) | ||
870 : | ? Data_d->directmvB[k].y | ||
871 : | : pMB->mvs[k].y - Data_d->referencemv[k].y); | ||
872 : | if (qpel) { | ||
873 : | pMB->qmvs[k].x = pMB->mvs[k].x; pMB->mvs[k].x /= 2; | ||
874 : | pMB->b_qmvs[k].x = pMB->b_mvs[k].x; pMB->b_mvs[k].x /= 2; | ||
875 : | pMB->qmvs[k].y = pMB->mvs[k].y; pMB->mvs[k].y /= 2; | ||
876 : | pMB->b_qmvs[k].y = pMB->b_mvs[k].y; pMB->b_mvs[k].y /= 2; | ||
877 : | } | ||
878 : | |||
879 : | if (b_mb->mode != MODE_INTER4V) { | ||
880 : | pMB->mvs[3] = pMB->mvs[2] = pMB->mvs[1] = pMB->mvs[0]; | ||
881 : | pMB->b_mvs[3] = pMB->b_mvs[2] = pMB->b_mvs[1] = pMB->b_mvs[0]; | ||
882 : | pMB->qmvs[3] = pMB->qmvs[2] = pMB->qmvs[1] = pMB->qmvs[0]; | ||
883 : | pMB->b_qmvs[3] = pMB->b_qmvs[2] = pMB->b_qmvs[1] = pMB->b_qmvs[0]; | ||
884 : | break; | ||
885 : | } | ||
886 : | } | ||
887 : | break; | ||
888 : | |||
889 : | case MODE_FORWARD: | ||
890 : | if (qpel) { | ||
891 : | pMB->pmvs[0].x = Data_f->currentQMV->x - f_predMV->x; | ||
892 : | pMB->pmvs[0].y = Data_f->currentQMV->y - f_predMV->y; | ||
893 : | pMB->qmvs[0] = *Data_f->currentQMV; | ||
894 : | *f_predMV = Data_f->currentQMV[0]; | ||
895 : | } else { | ||
896 : | pMB->pmvs[0].x = Data_f->currentMV->x - f_predMV->x; | ||
897 : | pMB->pmvs[0].y = Data_f->currentMV->y - f_predMV->y; | ||
898 : | *f_predMV = Data_f->currentMV[0]; | ||
899 : | } | ||
900 : | pMB->mvs[0] = *Data_f->currentMV; | ||
901 : | syskin | 1515 | pMB->b_mvs[0] = *Data_b->currentMV; /* hint for future searches */ |
902 : | syskin | 1478 | break; |
903 : | |||
904 : | case MODE_BACKWARD: | ||
905 : | if (qpel) { | ||
906 : | pMB->pmvs[0].x = Data_b->currentQMV->x - b_predMV->x; | ||
907 : | pMB->pmvs[0].y = Data_b->currentQMV->y - b_predMV->y; | ||
908 : | pMB->b_qmvs[0] = *Data_b->currentQMV; | ||
909 : | *b_predMV = Data_b->currentQMV[0]; | ||
910 : | } else { | ||
911 : | pMB->pmvs[0].x = Data_b->currentMV->x - b_predMV->x; | ||
912 : | pMB->pmvs[0].y = Data_b->currentMV->y - b_predMV->y; | ||
913 : | *b_predMV = Data_b->currentMV[0]; | ||
914 : | } | ||
915 : | pMB->b_mvs[0] = *Data_b->currentMV; | ||
916 : | syskin | 1515 | pMB->mvs[0] = *Data_f->currentMV; /* hint for future searches */ |
917 : | syskin | 1478 | break; |
918 : | |||
919 : | |||
920 : | case MODE_INTERPOLATE: | ||
921 : | pMB->mvs[0] = Data_i->currentMV[0]; | ||
922 : | pMB->b_mvs[0] = Data_i->currentMV[1]; | ||
923 : | if (qpel) { | ||
924 : | pMB->qmvs[0] = Data_i->currentQMV[0]; | ||
925 : | pMB->b_qmvs[0] = Data_i->currentQMV[1]; | ||
926 : | edgomez | 1382 | pMB->pmvs[1].x = pMB->qmvs[0].x - f_predMV->x; |
927 : | pMB->pmvs[1].y = pMB->qmvs[0].y - f_predMV->y; | ||
928 : | pMB->pmvs[0].x = pMB->b_qmvs[0].x - b_predMV->x; | ||
929 : | pMB->pmvs[0].y = pMB->b_qmvs[0].y - b_predMV->y; | ||
930 : | syskin | 1478 | *f_predMV = Data_i->currentQMV[0]; |
931 : | *b_predMV = Data_i->currentQMV[1]; | ||
932 : | edgomez | 1382 | } else { |
933 : | pMB->pmvs[1].x = pMB->mvs[0].x - f_predMV->x; | ||
934 : | pMB->pmvs[1].y = pMB->mvs[0].y - f_predMV->y; | ||
935 : | pMB->pmvs[0].x = pMB->b_mvs[0].x - b_predMV->x; | ||
936 : | pMB->pmvs[0].y = pMB->b_mvs[0].y - b_predMV->y; | ||
937 : | syskin | 1478 | *f_predMV = Data_i->currentMV[0]; |
938 : | *b_predMV = Data_i->currentMV[1]; | ||
939 : | edgomez | 1382 | } |
940 : | syskin | 1478 | break; |
941 : | edgomez | 1382 | } |
942 : | } | ||
943 : | |||
944 : | syskin | 1567 | static __inline void |
945 : | maxMotionBVOP(int * const MVmaxF, int * const MVmaxB, const MACROBLOCK * const pMB, const int qpel) | ||
946 : | { | ||
947 : | if (pMB->mode == MODE_FORWARD || pMB->mode == MODE_INTERPOLATE) { | ||
948 : | const VECTOR * const mv = qpel ? pMB->qmvs : pMB->mvs; | ||
949 : | int max = *MVmaxF; | ||
950 : | if (mv[0].x > max) max = mv[0].x; | ||
951 : | else if (-mv[0].x - 1 > max) max = -mv[0].x - 1; | ||
952 : | if (mv[0].y > max) max = mv[0].y; | ||
953 : | else if (-mv[0].y - 1 > max) max = -mv[0].y - 1; | ||
954 : | |||
955 : | *MVmaxF = max; | ||
956 : | } | ||
957 : | |||
958 : | if (pMB->mode == MODE_BACKWARD || pMB->mode == MODE_INTERPOLATE) { | ||
959 : | const VECTOR * const mv = qpel ? pMB->b_qmvs : pMB->b_mvs; | ||
960 : | int max = *MVmaxB; | ||
961 : | if (mv[0].x > max) max = mv[0].x; | ||
962 : | else if (-mv[0].x - 1 > max) max = -mv[0].x - 1; | ||
963 : | if (mv[0].y > max) max = mv[0].y; | ||
964 : | else if (-mv[0].y - 1 > max) max = -mv[0].y - 1; | ||
965 : | *MVmaxB = max; | ||
966 : | } | ||
967 : | } | ||
968 : | |||
969 : | |||
970 : | edgomez | 1382 | void |
971 : | MotionEstimationBVOP(MBParam * const pParam, | ||
972 : | FRAMEINFO * const frame, | ||
973 : | const int32_t time_bp, | ||
974 : | const int32_t time_pp, | ||
975 : | /* forward (past) reference */ | ||
976 : | const MACROBLOCK * const f_mbs, | ||
977 : | const IMAGE * const f_ref, | ||
978 : | const IMAGE * const f_refH, | ||
979 : | const IMAGE * const f_refV, | ||
980 : | const IMAGE * const f_refHV, | ||
981 : | /* backward (future) reference */ | ||
982 : | const FRAMEINFO * const b_reference, | ||
983 : | const IMAGE * const b_ref, | ||
984 : | const IMAGE * const b_refH, | ||
985 : | const IMAGE * const b_refV, | ||
986 : | Isibaar | 1919 | const IMAGE * const b_refHV, |
987 : | const int num_slices) | ||
988 : | edgomez | 1382 | { |
989 : | uint32_t i, j; | ||
990 : | edgomez | 1547 | int32_t best_sad = 256*4096; |
991 : | edgomez | 1382 | uint32_t skip_sad; |
992 : | syskin | 1575 | int fb_thresh; |
993 : | edgomez | 1382 | const MACROBLOCK * const b_mbs = b_reference->mbs; |
994 : | |||
995 : | VECTOR f_predMV, b_predMV; | ||
996 : | |||
997 : | Isibaar | 1919 | int mb_width = pParam->mb_width; |
998 : | int mb_height = pParam->mb_height; | ||
999 : | syskin | 1567 | int MVmaxF = 0, MVmaxB = 0; |
1000 : | edgomez | 1382 | const int32_t TRB = time_pp - time_bp; |
1001 : | const int32_t TRD = time_pp; | ||
1002 : | syskin | 1506 | DECLARE_ALIGNED_MATRIX(dct_space, 3, 64, int16_t, CACHE_LINE); |
1003 : | edgomez | 1382 | |
1004 : | /* some pre-inintialized data for the rest of the search */ | ||
1005 : | syskin | 1478 | SearchData Data_d, Data_f, Data_b, Data_i; |
1006 : | memset(&Data_d, 0, sizeof(SearchData)); | ||
1007 : | edgomez | 1382 | |
1008 : | syskin | 1478 | Data_d.iEdgedWidth = pParam->edged_width; |
1009 : | Data_d.qpel = pParam->vol_flags & XVID_VOL_QUARTERPEL ? 1 : 0; | ||
1010 : | Data_d.rounding = 0; | ||
1011 : | Data_d.chroma = frame->motion_flags & XVID_ME_CHROMA_BVOP; | ||
1012 : | Data_d.iQuant = frame->quant; | ||
1013 : | syskin | 1569 | Data_d.quant_sq = frame->quant*frame->quant; |
1014 : | syskin | 1506 | Data_d.dctSpace = dct_space; |
1015 : | Data_d.quant_type = !(pParam->vol_flags & XVID_VOL_MPEGQUANT); | ||
1016 : | Data_d.mpeg_quant_matrices = pParam->mpeg_quant_matrices; | ||
1017 : | edgomez | 1382 | |
1018 : | syskin | 1478 | Data_d.RefQ = f_refV->u; /* a good place, also used in MC (for similar purpose) */ |
1019 : | edgomez | 1382 | |
1020 : | syskin | 1478 | memcpy(&Data_f, &Data_d, sizeof(SearchData)); |
1021 : | memcpy(&Data_b, &Data_d, sizeof(SearchData)); | ||
1022 : | memcpy(&Data_i, &Data_d, sizeof(SearchData)); | ||
1023 : | edgomez | 1382 | |
1024 : | syskin | 1567 | Data_f.iFcode = Data_i.iFcode = frame->fcode = b_reference->fcode; |
1025 : | Data_b.iFcode = Data_i.bFcode = frame->bcode = b_reference->fcode; | ||
1026 : | syskin | 1478 | |
1027 : | edgomez | 1382 | for (j = 0; j < pParam->mb_height; j++) { |
1028 : | Isibaar | 1919 | int new_bound = mb_width * ((((j*num_slices) / mb_height) * mb_height + (num_slices-1)) / num_slices); |
1029 : | edgomez | 1382 | |
1030 : | f_predMV = b_predMV = zeroMV; /* prediction is reset at left boundary */ | ||
1031 : | |||
1032 : | for (i = 0; i < pParam->mb_width; i++) { | ||
1033 : | MACROBLOCK * const pMB = frame->mbs + i + j * pParam->mb_width; | ||
1034 : | const MACROBLOCK * const b_mb = b_mbs + i + j * pParam->mb_width; | ||
1035 : | Isibaar | 1919 | int force_direct = (((j*mb_width+i)==new_bound) && (j > 0)) ? 1 : 0; /* MTK decoder chipsets do NOT reset predMVs upon resync marker in BVOPs. We workaround this problem |
1036 : | by placing the slice border on second MB in a row and then force the first MB to be direct mode */ | ||
1037 : | |||
1038 : | syskin | 1478 | pMB->mode = -1; |
1039 : | edgomez | 1382 | |
1040 : | syskin | 1478 | initialize_searchData(&Data_d, &Data_f, &Data_b, &Data_i, |
1041 : | i, j, f_ref, f_refH->y, f_refV->y, f_refHV->y, | ||
1042 : | b_ref, b_refH->y, b_refV->y, b_refHV->y, | ||
1043 : | &frame->image, b_mb); | ||
1044 : | |||
1045 : | edgomez | 1382 | /* special case, if collocated block is SKIPed in P-VOP: encoding is forward (0,0), cpb=0 without further ado */ |
1046 : | if (b_reference->coding_type != S_VOP) | ||
1047 : | if (b_mb->mode == MODE_NOT_CODED) { | ||
1048 : | pMB->mode = MODE_NOT_CODED; | ||
1049 : | pMB->mvs[0] = pMB->b_mvs[0] = zeroMV; | ||
1050 : | pMB->sad16 = 0; | ||
1051 : | continue; | ||
1052 : | } | ||
1053 : | |||
1054 : | /* direct search comes first, because it (1) checks for SKIP-mode | ||
1055 : | and (2) sets very good predictions for forward and backward search */ | ||
1056 : | syskin | 1478 | skip_sad = SearchDirect_initial(i, j, frame->motion_flags, TRB, TRD, pParam, pMB, |
1057 : | b_mb, &best_sad, &Data_d); | ||
1058 : | edgomez | 1382 | |
1059 : | if (pMB->mode == MODE_DIRECT_NONE_MV) { | ||
1060 : | pMB->sad16 = best_sad; | ||
1061 : | syskin | 1568 | pMB->cbp = 0; |
1062 : | edgomez | 1382 | continue; |
1063 : | } | ||
1064 : | syskin | 1515 | |
1065 : | syskin | 1478 | SearchBF_initial(i, j, frame->motion_flags, frame->fcode, pParam, pMB, |
1066 : | Isibaar | 1931 | &f_predMV, &best_sad, MODE_FORWARD, &Data_f, Data_d.currentMV[1], new_bound); |
1067 : | edgomez | 1382 | |
1068 : | syskin | 1478 | SearchBF_initial(i, j, frame->motion_flags, frame->bcode, pParam, pMB, |
1069 : | Isibaar | 1931 | &b_predMV, &best_sad, MODE_BACKWARD, &Data_b, Data_d.currentMV[2], new_bound); |
1070 : | edgomez | 1382 | |
1071 : | syskin | 1575 | if (frame->motion_flags&XVID_ME_BFRAME_EARLYSTOP) |
1072 : | fb_thresh = best_sad; | ||
1073 : | else | ||
1074 : | fb_thresh = best_sad + (best_sad>>1); | ||
1075 : | edgomez | 1382 | |
1076 : | syskin | 1575 | if (Data_f.iMinSAD[0] <= fb_thresh) |
1077 : | syskin | 1478 | SearchBF_final(i, j, frame->motion_flags, pParam, &best_sad, &Data_f); |
1078 : | edgomez | 1382 | |
1079 : | syskin | 1575 | if (Data_b.iMinSAD[0] <= fb_thresh) |
1080 : | syskin | 1478 | SearchBF_final(i, j, frame->motion_flags, pParam, &best_sad, &Data_b); |
1081 : | edgomez | 1382 | |
1082 : | syskin | 1478 | SearchInterpolate_initial(i, j, frame->motion_flags, pParam, &f_predMV, &b_predMV, &best_sad, |
1083 : | &Data_i, Data_f.currentMV[0], Data_b.currentMV[0]); | ||
1084 : | edgomez | 1382 | |
1085 : | syskin | 1575 | if (((Data_i.iMinSAD[0] < best_sad +(best_sad>>3)) && !(frame->motion_flags&XVID_ME_FAST_MODEINTERPOLATE)) |
1086 : | edgomez | 1485 | || Data_i.iMinSAD[0] <= best_sad) |
1087 : | edgomez | 1382 | |
1088 : | syskin | 1478 | SearchInterpolate_final(i, j, frame->motion_flags, pParam, &best_sad, &Data_i); |
1089 : | |||
1090 : | syskin | 1575 | if (Data_d.iMinSAD[0] <= 2*best_sad) |
1091 : | if ((!(frame->motion_flags&XVID_ME_SKIP_DELTASEARCH) && (best_sad > 750)) | ||
1092 : | || (best_sad > 1000)) | ||
1093 : | |||
1094 : | SearchDirect_final(frame->motion_flags, b_mb, &best_sad, &Data_d); | ||
1095 : | edgomez | 1382 | |
1096 : | syskin | 1478 | /* final skip decision */ |
1097 : | syskin | 1516 | if ( (skip_sad < 2 * Data_d.iQuant * MAX_SAD00_FOR_SKIP ) |
1098 : | syskin | 1478 | && ((100*best_sad)/(skip_sad+1) > FINAL_SKIP_THRESH) ) { |
1099 : | edgomez | 1382 | |
1100 : | syskin | 1517 | Data_d.chromaSAD = 0; /* green light for chroma check */ |
1101 : | |||
1102 : | SkipDecisionB(pMB, &Data_d); | ||
1103 : | |||
1104 : | if (pMB->mode == MODE_DIRECT_NONE_MV) { /* skipped? */ | ||
1105 : | syskin | 1516 | pMB->sad16 = skip_sad; |
1106 : | syskin | 1568 | pMB->cbp = 0; |
1107 : | syskin | 1478 | continue; |
1108 : | } | ||
1109 : | edgomez | 1382 | } |
1110 : | |||
1111 : | syskin | 1506 | if (frame->vop_flags & XVID_VOP_RD_BVOP) |
1112 : | ModeDecision_BVOP_RD(&Data_d, &Data_b, &Data_f, &Data_i, | ||
1113 : | Isibaar | 1919 | pMB, b_mb, &f_predMV, &b_predMV, frame->motion_flags, frame->vop_flags, pParam, i, j, best_sad, force_direct); |
1114 : | syskin | 1506 | else |
1115 : | Isibaar | 1919 | ModeDecision_BVOP_SAD(&Data_d, &Data_b, &Data_f, &Data_i, pMB, b_mb, &f_predMV, &b_predMV, force_direct); |
1116 : | edgomez | 1382 | |
1117 : | syskin | 1567 | maxMotionBVOP(&MVmaxF, &MVmaxB, pMB, Data_d.qpel); |
1118 : | |||
1119 : | edgomez | 1382 | } |
1120 : | } | ||
1121 : | syskin | 1567 | |
1122 : | frame->fcode = getMinFcode(MVmaxF); | ||
1123 : | frame->bcode = getMinFcode(MVmaxB); | ||
1124 : | edgomez | 1382 | } |
1125 : | syskin | 1682 | |
1126 : | |||
1127 : | |||
1128 : | void | ||
1129 : | Isibaar | 1913 | SMPMotionEstimationBVOP(SMPData * h) |
1130 : | syskin | 1682 | { |
1131 : | Isibaar | 1913 | Encoder *pEnc = (Encoder *) h->pEnc; |
1132 : | |||
1133 : | const MBParam * const pParam = &pEnc->mbParam; | ||
1134 : | syskin | 1682 | const FRAMEINFO * const frame = h->current; |
1135 : | Isibaar | 1913 | const int32_t time_bp = (int32_t)(pEnc->current->stamp - frame->stamp); |
1136 : | const int32_t time_pp = (int32_t)(pEnc->current->stamp - pEnc->reference->stamp); | ||
1137 : | syskin | 1682 | /* forward (past) reference */ |
1138 : | Isibaar | 1913 | const IMAGE * const f_ref = &pEnc->reference->image; |
1139 : | const IMAGE * const f_refH = &pEnc->f_refh; | ||
1140 : | const IMAGE * const f_refV = &pEnc->f_refv; | ||
1141 : | const IMAGE * const f_refHV = &pEnc->f_refhv; | ||
1142 : | syskin | 1682 | /* backward (future) reference */ |
1143 : | Isibaar | 1913 | const FRAMEINFO * const b_reference = pEnc->current; |
1144 : | const IMAGE * const b_ref = &pEnc->current->image; | ||
1145 : | const IMAGE * const b_refH = &pEnc->vInterH; | ||
1146 : | const IMAGE * const b_refV = &pEnc->vInterV; | ||
1147 : | const IMAGE * const b_refHV = &pEnc->vInterHV; | ||
1148 : | syskin | 1682 | |
1149 : | Isibaar | 1919 | int mb_width = pParam->mb_width; |
1150 : | int mb_height = pParam->mb_height; | ||
1151 : | int num_slices = pEnc->num_slices; | ||
1152 : | Isibaar | 1913 | int y_row = h->y_row; |
1153 : | syskin | 1682 | int y_step = h->y_step; |
1154 : | int start_y = h->start_y; | ||
1155 : | Isibaar | 1913 | int stop_y = h->stop_y; |
1156 : | syskin | 1682 | int * complete_count_self = h->complete_count_self; |
1157 : | const int * complete_count_above = h->complete_count_above; | ||
1158 : | int max_mbs; | ||
1159 : | int current_mb = 0; | ||
1160 : | |||
1161 : | syskin | 1687 | int32_t i, j; |
1162 : | syskin | 1682 | int32_t best_sad = 256*4096; |
1163 : | uint32_t skip_sad; | ||
1164 : | int fb_thresh; | ||
1165 : | const MACROBLOCK * const b_mbs = b_reference->mbs; | ||
1166 : | |||
1167 : | VECTOR f_predMV, b_predMV; | ||
1168 : | |||
1169 : | int MVmaxF = 0, MVmaxB = 0; | ||
1170 : | const int32_t TRB = time_pp - time_bp; | ||
1171 : | const int32_t TRD = time_pp; | ||
1172 : | DECLARE_ALIGNED_MATRIX(dct_space, 3, 64, int16_t, CACHE_LINE); | ||
1173 : | |||
1174 : | /* some pre-inintialized data for the rest of the search */ | ||
1175 : | SearchData Data_d, Data_f, Data_b, Data_i; | ||
1176 : | memset(&Data_d, 0, sizeof(SearchData)); | ||
1177 : | |||
1178 : | Data_d.iEdgedWidth = pParam->edged_width; | ||
1179 : | Data_d.qpel = pParam->vol_flags & XVID_VOL_QUARTERPEL ? 1 : 0; | ||
1180 : | Data_d.rounding = 0; | ||
1181 : | Data_d.chroma = frame->motion_flags & XVID_ME_CHROMA_BVOP; | ||
1182 : | Data_d.iQuant = frame->quant; | ||
1183 : | Data_d.quant_sq = frame->quant*frame->quant; | ||
1184 : | Data_d.dctSpace = dct_space; | ||
1185 : | Data_d.quant_type = !(pParam->vol_flags & XVID_VOL_MPEGQUANT); | ||
1186 : | Data_d.mpeg_quant_matrices = pParam->mpeg_quant_matrices; | ||
1187 : | |||
1188 : | Data_d.RefQ = h->RefQ; | ||
1189 : | |||
1190 : | memcpy(&Data_f, &Data_d, sizeof(SearchData)); | ||
1191 : | memcpy(&Data_b, &Data_d, sizeof(SearchData)); | ||
1192 : | memcpy(&Data_i, &Data_d, sizeof(SearchData)); | ||
1193 : | |||
1194 : | Data_f.iFcode = Data_i.iFcode = frame->fcode; | ||
1195 : | Data_b.iFcode = Data_i.bFcode = frame->bcode; | ||
1196 : | |||
1197 : | max_mbs = 0; | ||
1198 : | |||
1199 : | Isibaar | 1913 | for (j = (start_y+y_row); j < stop_y; j += y_step) { |
1200 : | Isibaar | 1919 | int new_bound = mb_width * ((((j*num_slices) / mb_height) * mb_height + (num_slices-1)) / num_slices); |
1201 : | |||
1202 : | Isibaar | 1913 | if (j == start_y) max_mbs = pParam->mb_width; /* we can process all blocks of the first row */ |
1203 : | syskin | 1682 | |
1204 : | f_predMV = b_predMV = zeroMV; /* prediction is reset at left boundary */ | ||
1205 : | |||
1206 : | Isibaar | 1913 | for (i = 0; i < (int) pParam->mb_width; i++) { |
1207 : | syskin | 1682 | MACROBLOCK * const pMB = frame->mbs + i + j * pParam->mb_width; |
1208 : | const MACROBLOCK * const b_mb = b_mbs + i + j * pParam->mb_width; | ||
1209 : | Isibaar | 1919 | int force_direct = (((j*mb_width+i)==new_bound) && (j > 0)) ? 1 : 0; /* MTK decoder chipsets do NOT reset predMVs upon resync marker in BVOPs. We workaround this problem |
1210 : | by placing the slice border on second MB in a row and then force the first MB to be direct mode */ | ||
1211 : | syskin | 1682 | pMB->mode = -1; |
1212 : | |||
1213 : | initialize_searchData(&Data_d, &Data_f, &Data_b, &Data_i, | ||
1214 : | i, j, f_ref, f_refH->y, f_refV->y, f_refHV->y, | ||
1215 : | b_ref, b_refH->y, b_refV->y, b_refHV->y, | ||
1216 : | &frame->image, b_mb); | ||
1217 : | |||
1218 : | if (current_mb >= max_mbs) { | ||
1219 : | /* we ME-ed all macroblocks we safely could. grab next portion */ | ||
1220 : | int above_count = *complete_count_above; /* sync point */ | ||
1221 : | if (above_count == pParam->mb_width) { | ||
1222 : | /* full line above is ready */ | ||
1223 : | above_count = pParam->mb_width+1; | ||
1224 : | Isibaar | 1913 | if (j < stop_y-y_step) { |
1225 : | syskin | 1682 | /* this is not last line, grab a portion of MBs from the next line too */ |
1226 : | above_count += MAX(0, complete_count_above[1] - 1); | ||
1227 : | } | ||
1228 : | } | ||
1229 : | |||
1230 : | max_mbs = current_mb + above_count - i - 1; | ||
1231 : | |||
1232 : | if (current_mb >= max_mbs) { | ||
1233 : | /* current workload is zero */ | ||
1234 : | i--; | ||
1235 : | sched_yield(); | ||
1236 : | continue; | ||
1237 : | } | ||
1238 : | } | ||
1239 : | |||
1240 : | /* special case, if collocated block is SKIPed in P-VOP: encoding is forward (0,0), cpb=0 without further ado */ | ||
1241 : | if (b_reference->coding_type != S_VOP) | ||
1242 : | if (b_mb->mode == MODE_NOT_CODED) { | ||
1243 : | pMB->mode = MODE_NOT_CODED; | ||
1244 : | pMB->mvs[0] = pMB->b_mvs[0] = zeroMV; | ||
1245 : | pMB->sad16 = 0; | ||
1246 : | *complete_count_self = i+1; | ||
1247 : | current_mb++; | ||
1248 : | continue; | ||
1249 : | } | ||
1250 : | |||
1251 : | /* direct search comes first, because it (1) checks for SKIP-mode | ||
1252 : | and (2) sets very good predictions for forward and backward search */ | ||
1253 : | skip_sad = SearchDirect_initial(i, j, frame->motion_flags, TRB, TRD, pParam, pMB, | ||
1254 : | b_mb, &best_sad, &Data_d); | ||
1255 : | |||
1256 : | if (pMB->mode == MODE_DIRECT_NONE_MV) { | ||
1257 : | pMB->sad16 = best_sad; | ||
1258 : | pMB->cbp = 0; | ||
1259 : | *complete_count_self = i+1; | ||
1260 : | current_mb++; | ||
1261 : | continue; | ||
1262 : | } | ||
1263 : | |||
1264 : | SearchBF_initial(i, j, frame->motion_flags, frame->fcode, pParam, pMB, | ||
1265 : | Isibaar | 1931 | &f_predMV, &best_sad, MODE_FORWARD, &Data_f, Data_d.currentMV[1], new_bound); |
1266 : | syskin | 1682 | |
1267 : | SearchBF_initial(i, j, frame->motion_flags, frame->bcode, pParam, pMB, | ||
1268 : | Isibaar | 1931 | &b_predMV, &best_sad, MODE_BACKWARD, &Data_b, Data_d.currentMV[2], new_bound); |
1269 : | syskin | 1682 | |
1270 : | if (frame->motion_flags&XVID_ME_BFRAME_EARLYSTOP) | ||
1271 : | fb_thresh = best_sad; | ||
1272 : | else | ||
1273 : | fb_thresh = best_sad + (best_sad>>1); | ||
1274 : | |||
1275 : | if (Data_f.iMinSAD[0] <= fb_thresh) | ||
1276 : | SearchBF_final(i, j, frame->motion_flags, pParam, &best_sad, &Data_f); | ||
1277 : | |||
1278 : | if (Data_b.iMinSAD[0] <= fb_thresh) | ||
1279 : | SearchBF_final(i, j, frame->motion_flags, pParam, &best_sad, &Data_b); | ||
1280 : | |||
1281 : | SearchInterpolate_initial(i, j, frame->motion_flags, pParam, &f_predMV, &b_predMV, &best_sad, | ||
1282 : | &Data_i, Data_f.currentMV[0], Data_b.currentMV[0]); | ||
1283 : | |||
1284 : | if (((Data_i.iMinSAD[0] < best_sad +(best_sad>>3)) && !(frame->motion_flags&XVID_ME_FAST_MODEINTERPOLATE)) | ||
1285 : | || Data_i.iMinSAD[0] <= best_sad) | ||
1286 : | |||
1287 : | SearchInterpolate_final(i, j, frame->motion_flags, pParam, &best_sad, &Data_i); | ||
1288 : | |||
1289 : | if (Data_d.iMinSAD[0] <= 2*best_sad) | ||
1290 : | if ((!(frame->motion_flags&XVID_ME_SKIP_DELTASEARCH) && (best_sad > 750)) | ||
1291 : | || (best_sad > 1000)) | ||
1292 : | |||
1293 : | SearchDirect_final(frame->motion_flags, b_mb, &best_sad, &Data_d); | ||
1294 : | |||
1295 : | /* final skip decision */ | ||
1296 : | if ( (skip_sad < 2 * Data_d.iQuant * MAX_SAD00_FOR_SKIP ) | ||
1297 : | && ((100*best_sad)/(skip_sad+1) > FINAL_SKIP_THRESH) ) { | ||
1298 : | |||
1299 : | Data_d.chromaSAD = 0; /* green light for chroma check */ | ||
1300 : | |||
1301 : | SkipDecisionB(pMB, &Data_d); | ||
1302 : | |||
1303 : | if (pMB->mode == MODE_DIRECT_NONE_MV) { /* skipped? */ | ||
1304 : | pMB->sad16 = skip_sad; | ||
1305 : | pMB->cbp = 0; | ||
1306 : | *complete_count_self = i+1; | ||
1307 : | current_mb++; | ||
1308 : | continue; | ||
1309 : | } | ||
1310 : | } | ||
1311 : | |||
1312 : | if (frame->vop_flags & XVID_VOP_RD_BVOP) | ||
1313 : | ModeDecision_BVOP_RD(&Data_d, &Data_b, &Data_f, &Data_i, | ||
1314 : | Isibaar | 1919 | pMB, b_mb, &f_predMV, &b_predMV, frame->motion_flags, frame->vop_flags, pParam, i, j, best_sad, force_direct); |
1315 : | syskin | 1682 | else |
1316 : | Isibaar | 1919 | ModeDecision_BVOP_SAD(&Data_d, &Data_b, &Data_f, &Data_i, pMB, b_mb, &f_predMV, &b_predMV, force_direct); |
1317 : | syskin | 1682 | |
1318 : | *complete_count_self = i+1; | ||
1319 : | current_mb++; | ||
1320 : | syskin | 1687 | maxMotionBVOP(&MVmaxF, &MVmaxB, pMB, Data_d.qpel); |
1321 : | syskin | 1682 | } |
1322 : | |||
1323 : | complete_count_self++; | ||
1324 : | complete_count_above++; | ||
1325 : | } | ||
1326 : | syskin | 1687 | |
1327 : | h->minfcode = getMinFcode(MVmaxF); | ||
1328 : | h->minbcode = getMinFcode(MVmaxB); | ||
1329 : | syskin | 1682 | } |
No admin address has been configured | ViewVC Help |
Powered by ViewVC 1.0.4 |