You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
84 lines
1.7 KiB
84 lines
1.7 KiB
# MMX assist routines for sumsq
|
|
# Copyright 2001 Phil Karn, KA9Q
|
|
# May be used under the terms of the GNU Public License (GPL)
|
|
|
|
.text
|
|
|
|
# Evaluate sum of squares of signed 16-bit input samples
|
|
# long long sumsq_mmx_assist(signed short *in,int cnt);
|
|
.global sumsq_mmx_assist
|
|
.type sumsq_mmx_assist,@function
|
|
.align 16
|
|
sumsq_mmx_assist:
|
|
pushl %ebp
|
|
movl %esp,%ebp
|
|
pushl %esi
|
|
pushl %ecx
|
|
pushl %ebx
|
|
|
|
movl 8(%ebp),%esi
|
|
movl 12(%ebp),%ecx
|
|
xor %eax,%eax
|
|
xor %edx,%edx
|
|
|
|
# Since 4 * 32767**2 < 2**32, we can accumulate two at a time
|
|
1: subl $8,%ecx
|
|
jl 2f
|
|
movq (%esi),%mm0 # S0 S1 S2 S3
|
|
pmaddwd %mm0,%mm0 # (S0^2+S1^2) (S2^2+S3^2)
|
|
movq 8(%esi),%mm6 # S4 S5 S6 S7
|
|
pmaddwd %mm6,%mm6 # (S4^2+S5^2) (S6^2+S7^2)
|
|
paddd %mm6,%mm0 # (S0^2+S1^2+S4^2+S5^2)(S2^2+S3^2+S6^2+S7^2)
|
|
movd %mm0,%ebx
|
|
addl %ebx,%eax
|
|
adcl $0,%edx
|
|
psrlq $32,%mm0
|
|
movd %mm0,%ebx
|
|
addl %ebx,%eax
|
|
adcl $0,%edx
|
|
addl $16,%esi
|
|
jmp 1b
|
|
|
|
2: emms
|
|
popl %ebx
|
|
popl %ecx
|
|
popl %esi
|
|
popl %ebp
|
|
ret
|
|
|
|
# Evaluate sum of squares of signed 16-bit input samples
|
|
# long sumsq_wd_mmx_assist(signed short *in,int cnt);
|
|
# Quick version, only safe for small numbers of small input values...
|
|
.global sumsq_wd_mmx_assist
|
|
.type sumsq_wd_mmx_assist,@function
|
|
.align 16
|
|
sumsq_wd_mmx_assist:
|
|
pushl %ebp
|
|
movl %esp,%ebp
|
|
pushl %esi
|
|
|
|
movl 8(%ebp),%esi
|
|
movl 12(%ebp),%ecx
|
|
pxor %mm2,%mm2 # zero sum
|
|
|
|
1: subl $8,%ecx
|
|
jl 2f
|
|
movq (%esi),%mm0 # S0 S1 S2 S3
|
|
pmaddwd %mm0,%mm0 # (S0*S0+S1*S1) (S2*S2+S3*S3)
|
|
movq 8(%esi),%mm1
|
|
pmaddwd %mm1,%mm1
|
|
paddd %mm1,%mm2
|
|
paddd %mm0,%mm2 # accumulate
|
|
|
|
addl $16,%esi
|
|
jmp 1b
|
|
|
|
2: movd %mm2,%eax # even sum
|
|
psrlq $32,%mm2
|
|
movd %mm2,%edx # odd sum
|
|
addl %edx,%eax
|
|
emms
|
|
popl %esi
|
|
popl %ebp
|
|
ret
|