You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
221 lines
6.9 KiB
221 lines
6.9 KiB
/*
|
|
Copyright (c) 2014, Intel Corporation
|
|
All rights reserved.
|
|
|
|
Redistribution and use in source and binary forms, with or without
|
|
modification, are permitted provided that the following conditions are met:
|
|
|
|
* Redistributions of source code must retain the above copyright notice,
|
|
* this list of conditions and the following disclaimer.
|
|
|
|
* Redistributions in binary form must reproduce the above copyright notice,
|
|
* this list of conditions and the following disclaimer in the documentation
|
|
* and/or other materials provided with the distribution.
|
|
|
|
* Neither the name of Intel Corporation nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
|
|
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
|
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
|
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
|
|
ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
|
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
|
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
|
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
/******************************************************************************/
|
|
// ALGORITHM DESCRIPTION
|
|
// ---------------------
|
|
//
|
|
// X87 version:
|
|
// Use 80-bit FPU precision fmul, fsqrt to compute square and sqrt.
|
|
//
|
|
// SSE version:
|
|
// Swap x, y if |x|<|y|
|
|
// For x=2^k*x, get y=y*2^(-k)
|
|
// Get S ~ sqrt(x^2+y^2) (leading 1 + leading 25 mantissa bits)
|
|
//
|
|
// Get D = ( RN(x^2+y^2) - S^2 ) + ( x^2 - RN(x^2) ) +
|
|
// + ( y^2 - ((RN(x^2+y^2)-RN(x^2)) )
|
|
//
|
|
// Result is 2^k*(S + Se), where Se = S*e
|
|
// S*e is approximated as (D/2S)*( 1 - (D/2S)^2*1.0/S )
|
|
//
|
|
// Return 2^k*(S+Se)
|
|
//
|
|
// For |y/x|<2^(-64), return x
|
|
//
|
|
// For cases where maximum biased exponent is either greater than 7fdh or
|
|
// below 32, take a special path to check for special cases (0, NaN, Inf),
|
|
// possible overflow, and more accurate computation for denormal results
|
|
//
|
|
// Special cases:
|
|
// hypot(x,y), hypot(y,x), and hypot(x,-y) are equivalent
|
|
// hypot(x,+-0) is equivalent to fabs(x)
|
|
// hypot(x,y) = y if (x==NaN or x==INF) and y==INF
|
|
// hypot(x,y) = x if (x==NaN or x==INF) and y!=INF (even if y==NaN!)
|
|
// hypot(x,y) = y if (x!=NaN and x!=INF) and (y==NaN or y==INF)
|
|
//
|
|
/******************************************************************************/
|
|
|
|
#include <private/bionic_asm.h>
|
|
# -- Begin static_func
|
|
.text
|
|
.align __bionic_asm_align
|
|
.type static_func, @function
|
|
static_func:
|
|
..B1.1:
|
|
call ..L2
|
|
..L2:
|
|
popl %eax
|
|
lea _GLOBAL_OFFSET_TABLE_+[. - ..L2](%eax), %eax
|
|
lea static_const_table@GOTOFF(%eax), %eax
|
|
ret
|
|
.size static_func,.-static_func
|
|
# -- End static_func
|
|
|
|
# -- Begin hypot
|
|
ENTRY(hypot)
|
|
# parameter 1: 8 + %ebp
|
|
# parameter 2: 16 + %ebp
|
|
..B2.1:
|
|
..B2.2:
|
|
pushl %ebp
|
|
movl %esp, %ebp
|
|
subl $152, %esp
|
|
movl %ebx, 96(%esp)
|
|
call static_func
|
|
movl %eax, %ebx
|
|
movapd (%ebx), %xmm3
|
|
movsd 160(%esp), %xmm0
|
|
movsd 168(%esp), %xmm1
|
|
andpd %xmm3, %xmm0
|
|
andpd %xmm3, %xmm1
|
|
pextrw $3, %xmm0, %eax
|
|
pextrw $3, %xmm1, %edx
|
|
cmpl $24528, %eax
|
|
ja .L_2TAG_PACKET_0.0.2
|
|
cmpl $24528, %edx
|
|
ja .L_2TAG_PACKET_0.0.2
|
|
.L_2TAG_PACKET_1.0.2:
|
|
fldl 160(%esp)
|
|
fldl 168(%esp)
|
|
fxch %st(1)
|
|
fmul %st(0), %st
|
|
fxch %st(1)
|
|
nop
|
|
fmul %st(0), %st
|
|
faddp %st, %st(1)
|
|
fsqrt
|
|
jmp .L_2TAG_PACKET_2.0.2
|
|
.L_2TAG_PACKET_0.0.2:
|
|
cmpl $32752, %eax
|
|
movl %eax, %ecx
|
|
jae .L_2TAG_PACKET_3.0.2
|
|
subl %edx, %ecx
|
|
cmpl $32752, %edx
|
|
jae .L_2TAG_PACKET_3.0.2
|
|
addl $928, %ecx
|
|
addl %edx, %eax
|
|
cmpl $1856, %ecx
|
|
ja .L_2TAG_PACKET_4.0.2
|
|
cmpl $49056, %eax
|
|
jb .L_2TAG_PACKET_1.0.2
|
|
fldl 160(%esp)
|
|
fldl 168(%esp)
|
|
fxch %st(1)
|
|
fmul %st(0), %st
|
|
fxch %st(1)
|
|
nop
|
|
fmul %st(0), %st
|
|
faddp %st, %st(1)
|
|
fsqrt
|
|
.L_2TAG_PACKET_5.0.2:
|
|
fstl (%esp)
|
|
fstpt 16(%esp)
|
|
xorl %eax, %eax
|
|
movw 24(%esp), %ax
|
|
cmpl $17407, %eax
|
|
jae .L_2TAG_PACKET_6.0.2
|
|
fldl (%esp)
|
|
jmp .L_2TAG_PACKET_7.0.2
|
|
.L_2TAG_PACKET_4.0.2:
|
|
movsd %xmm0, 32(%esp)
|
|
movsd %xmm1, 40(%esp)
|
|
fldl 32(%esp)
|
|
faddl 40(%esp)
|
|
jmp .L_2TAG_PACKET_5.0.2
|
|
.L_2TAG_PACKET_6.0.2:
|
|
movl $46, %edx
|
|
.L_2TAG_PACKET_8.0.2:
|
|
movsd 160(%esp), %xmm0
|
|
movsd 168(%esp), %xmm1
|
|
fldl (%esp)
|
|
jmp .L_2TAG_PACKET_7.0.2
|
|
.L_2TAG_PACKET_3.0.2:
|
|
shufpd $0, %xmm1, %xmm0
|
|
movdqa %xmm0, %xmm2
|
|
movdqa 16(%ebx), %xmm3
|
|
movsd %xmm0, 32(%esp)
|
|
movsd %xmm1, 40(%esp)
|
|
cmppd $3, %xmm0, %xmm2
|
|
cmppd $0, %xmm0, %xmm3
|
|
movmskpd %xmm2, %edx
|
|
movmskpd %xmm3, %eax
|
|
testl %edx, %edx
|
|
je .L_2TAG_PACKET_9.0.2
|
|
fldl 32(%esp)
|
|
fmull 40(%esp)
|
|
testl $1, %eax
|
|
jne .L_2TAG_PACKET_10.0.2
|
|
testl $2, %eax
|
|
jne .L_2TAG_PACKET_11.0.2
|
|
jmp .L_2TAG_PACKET_2.0.2
|
|
.L_2TAG_PACKET_9.0.2:
|
|
fldl 32(%esp)
|
|
faddl 40(%esp)
|
|
jmp .L_2TAG_PACKET_2.0.2
|
|
.L_2TAG_PACKET_10.0.2:
|
|
fstpl 40(%esp)
|
|
fldl 32(%esp)
|
|
jmp .L_2TAG_PACKET_7.0.2
|
|
.L_2TAG_PACKET_11.0.2:
|
|
fstpl 32(%esp)
|
|
fldl 40(%esp)
|
|
jmp .L_2TAG_PACKET_7.0.2
|
|
.L_2TAG_PACKET_2.0.2:
|
|
.L_2TAG_PACKET_7.0.2:
|
|
movl 96(%esp), %ebx
|
|
movl %ebp, %esp
|
|
popl %ebp
|
|
ret
|
|
..B2.3:
|
|
END(hypot)
|
|
# -- End hypot
|
|
|
|
# Start file scope ASM
|
|
ALIAS_SYMBOL(hypotl, hypot);
|
|
# End file scope ASM
|
|
.section .rodata, "a"
|
|
.align 16
|
|
.align 16
|
|
static_const_table:
|
|
.long 4294967295
|
|
.long 2147483647
|
|
.long 4294967295
|
|
.long 2147483647
|
|
.long 0
|
|
.long 2146435072
|
|
.long 0
|
|
.long 2146435072
|
|
.type static_const_table,@object
|
|
.size static_const_table,32
|
|
.data
|
|
.section .note.GNU-stack, "",@progbits
|
|
# End
|