21 |
; * along with this program ; if not, write to the Free Software |
; * along with this program ; if not, write to the Free Software |
22 |
; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
; * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
23 |
; * |
; * |
24 |
; * $Id: quantize_h263_mmx.asm,v 1.2 2008-08-19 09:06:48 Isibaar Exp $ |
; * $Id: quantize_h263_mmx.asm,v 1.3 2008-11-11 20:46:24 Isibaar Exp $ |
25 |
; * |
; * |
26 |
; ****************************************************************************/ |
; ****************************************************************************/ |
27 |
|
|
35 |
%ifdef MARK_FUNCS |
%ifdef MARK_FUNCS |
36 |
global _%1:function %1.endfunc-%1 |
global _%1:function %1.endfunc-%1 |
37 |
%define %1 _%1:function %1.endfunc-%1 |
%define %1 _%1:function %1.endfunc-%1 |
38 |
|
%define ENDFUNC .endfunc |
39 |
%else |
%else |
40 |
global _%1 |
global _%1 |
41 |
%define %1 _%1 |
%define %1 _%1 |
42 |
|
%define ENDFUNC |
43 |
%endif |
%endif |
44 |
%else |
%else |
45 |
%ifdef MARK_FUNCS |
%ifdef MARK_FUNCS |
46 |
global %1:function %1.endfunc-%1 |
global %1:function %1.endfunc-%1 |
47 |
|
%define ENDFUNC .endfunc |
48 |
%else |
%else |
49 |
global %1 |
global %1 |
50 |
|
%define ENDFUNC |
51 |
%endif |
%endif |
52 |
%endif |
%endif |
53 |
%endmacro |
%endmacro |
188 |
movq mm7, [r9 + rax * 8 - 8] |
movq mm7, [r9 + rax * 8 - 8] |
189 |
|
|
190 |
ALIGN 16 |
ALIGN 16 |
191 |
.loop |
.loop: |
192 |
movq mm0, [rsi + 8*rcx] ; mm0 = [1st] |
movq mm0, [rsi + 8*rcx] ; mm0 = [1st] |
193 |
movq mm3, [rsi + 8*rcx + 8] |
movq mm3, [rsi + 8*rcx + 8] |
194 |
pxor mm1, mm1 ; mm1 = 0 |
pxor mm1, mm1 ; mm1 = 0 |
212 |
cmp rcx, 16 |
cmp rcx, 16 |
213 |
jnz .loop |
jnz .loop |
214 |
|
|
215 |
.done |
.done: |
216 |
|
|
217 |
; caclulate data[0] // (int32_t)dcscalar) |
; caclulate data[0] // (int32_t)dcscalar) |
218 |
mov rcx, r8 ; dscalar |
mov rcx, r8 ; dscalar |
225 |
sub rax, rdx |
sub rax, rdx |
226 |
jmp short .mul |
jmp short .mul |
227 |
|
|
228 |
.gtzero |
.gtzero: |
229 |
add rax, rdx |
add rax, rdx |
230 |
.mul |
.mul: |
231 |
cdq ; expand eax -> edx:eax |
cdq ; expand eax -> edx:eax |
232 |
idiv ecx ; eax = edx:eax / dcscalar |
idiv ecx ; eax = edx:eax / dcscalar |
233 |
mov [rdi], ax ; coeff[0] = ax |
mov [rdi], ax ; coeff[0] = ax |
237 |
ret |
ret |
238 |
|
|
239 |
ALIGN 16 |
ALIGN 16 |
240 |
.q1loop |
.q1loop: |
241 |
movq mm0, [rsi + 8*rcx] ; mm0 = [1st] |
movq mm0, [rsi + 8*rcx] ; mm0 = [1st] |
242 |
movq mm3, [rsi + 8*rcx + 8] |
movq mm3, [rsi + 8*rcx + 8] |
243 |
pxor mm1, mm1 ; mm1 = 0 |
pxor mm1, mm1 ; mm1 = 0 |
262 |
jnz .q1loop |
jnz .q1loop |
263 |
|
|
264 |
jmp .done |
jmp .done |
265 |
.endfunc |
ENDFUNC |
266 |
|
|
267 |
|
|
268 |
;----------------------------------------------------------------------------- |
;----------------------------------------------------------------------------- |
293 |
movq mm7, [r9 + rax * 8 - 8] ; divider |
movq mm7, [r9 + rax * 8 - 8] ; divider |
294 |
|
|
295 |
ALIGN 8 |
ALIGN 8 |
296 |
.loop |
.loop: |
297 |
movq mm0, [rsi + 8*rcx] ; mm0 = [1st] |
movq mm0, [rsi + 8*rcx] ; mm0 = [1st] |
298 |
movq mm3, [rsi + 8*rcx + 8] |
movq mm3, [rsi + 8*rcx + 8] |
299 |
pxor mm1, mm1 ; mm1 = 0 |
pxor mm1, mm1 ; mm1 = 0 |
321 |
cmp rcx, 16 |
cmp rcx, 16 |
322 |
jnz .loop |
jnz .loop |
323 |
|
|
324 |
.done |
.done: |
325 |
pmaddwd mm5, [plus_one wrt rip] |
pmaddwd mm5, [plus_one wrt rip] |
326 |
movq mm0, mm5 |
movq mm0, mm5 |
327 |
psrlq mm5, 32 |
psrlq mm5, 32 |
332 |
ret |
ret |
333 |
|
|
334 |
ALIGN 8 |
ALIGN 8 |
335 |
.q1loop |
.q1loop: |
336 |
movq mm0, [rsi + 8*rcx] ; mm0 = [1st] |
movq mm0, [rsi + 8*rcx] ; mm0 = [1st] |
337 |
movq mm3, [rsi + 8*rcx+ 8] ; |
movq mm3, [rsi + 8*rcx+ 8] ; |
338 |
pxor mm1, mm1 ; mm1 = 0 |
pxor mm1, mm1 ; mm1 = 0 |
361 |
jnz .q1loop |
jnz .q1loop |
362 |
|
|
363 |
jmp .done |
jmp .done |
364 |
.endfunc |
ENDFUNC |
365 |
|
|
366 |
|
|
367 |
;----------------------------------------------------------------------------- |
;----------------------------------------------------------------------------- |
392 |
mov rax, -16 |
mov rax, -16 |
393 |
|
|
394 |
ALIGN 16 |
ALIGN 16 |
395 |
.loop |
.loop: |
396 |
movq mm0, [rcx+8*rax+8*16] ; c = coeff[i] |
movq mm0, [rcx+8*rax+8*16] ; c = coeff[i] |
397 |
movq mm3, [rcx+8*rax+8*16 + 8] ; c' = coeff[i+1] |
movq mm3, [rcx+8*rax+8*16 + 8] ; c' = coeff[i+1] |
398 |
pxor mm1, mm1 |
pxor mm1, mm1 |
442 |
|
|
443 |
xor rax, rax |
xor rax, rax |
444 |
ret |
ret |
445 |
.endfunc |
ENDFUNC |
446 |
|
|
447 |
|
|
448 |
;----------------------------------------------------------------------------- |
;----------------------------------------------------------------------------- |
471 |
mov rax, -16 |
mov rax, -16 |
472 |
|
|
473 |
ALIGN 16 |
ALIGN 16 |
474 |
.loop |
.loop: |
475 |
movq mm0, [rcx+8*rax+8*16] ; c = coeff[i] |
movq mm0, [rcx+8*rax+8*16] ; c = coeff[i] |
476 |
movq mm3, [rcx+8*rax+8*16 + 8] ; c' = coeff[i+1] |
movq mm3, [rcx+8*rax+8*16 + 8] ; c' = coeff[i+1] |
477 |
pxor mm1, mm1 |
pxor mm1, mm1 |
508 |
|
|
509 |
xor rax, rax |
xor rax, rax |
510 |
ret |
ret |
511 |
.endfunc |
ENDFUNC |
512 |
|
|
513 |
%ifidn __OUTPUT_FORMAT__,elf |
%ifidn __OUTPUT_FORMAT__,elf |
514 |
section ".note.GNU-stack" noalloc noexec nowrite progbits |
section ".note.GNU-stack" noalloc noexec nowrite progbits |