You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
51 lines
1.3 KiB
51 lines
1.3 KiB
from peachpy import *
|
|
from peachpy.x86_64 import *
|
|
|
|
|
|
def fp16_alt_xmm_to_fp32_xmm(xmm_half):
|
|
xmm_zero = XMMRegister()
|
|
VPXOR(xmm_zero, xmm_zero, xmm_zero)
|
|
|
|
xmm_word = XMMRegister()
|
|
VPUNPCKLWD(xmm_word, xmm_zero, xmm_half)
|
|
|
|
xmm_shl1_half = XMMRegister()
|
|
VPADDW(xmm_shl1_half, xmm_half, xmm_half)
|
|
|
|
xmm_shl1_nonsign = XMMRegister()
|
|
VPADDD(xmm_shl1_nonsign, xmm_word, xmm_word)
|
|
|
|
sign_mask = Constant.float32x4(-0.0)
|
|
|
|
xmm_sign = XMMRegister()
|
|
VANDPS(xmm_sign, xmm_word, sign_mask)
|
|
|
|
xmm_shr3_nonsign = XMMRegister()
|
|
VPSRLD(xmm_shr3_nonsign, xmm_shl1_nonsign, 4)
|
|
|
|
exp_offset = Constant.uint32x4(0x38000000)
|
|
|
|
xmm_norm_nonsign = XMMRegister()
|
|
VPADDD(xmm_norm_nonsign, xmm_shr3_nonsign, exp_offset)
|
|
|
|
magic_mask = Constant.uint16x8(0x3E80)
|
|
xmm_denorm_nonsign = XMMRegister()
|
|
VPUNPCKLWD(xmm_denorm_nonsign, xmm_shl1_half, magic_mask)
|
|
|
|
magic_bias = Constant.float32x4(0.25)
|
|
VSUBPS(xmm_denorm_nonsign, xmm_denorm_nonsign, magic_bias)
|
|
|
|
xmm_denorm_cutoff = XMMRegister()
|
|
VMOVDQA(xmm_denorm_cutoff, Constant.uint32x4(0x00800000))
|
|
|
|
xmm_denorm_mask = XMMRegister()
|
|
VPCMPGTD(xmm_denorm_mask, xmm_denorm_cutoff, xmm_shr3_nonsign)
|
|
|
|
xmm_nonsign = XMMRegister()
|
|
VBLENDVPS(xmm_nonsign, xmm_norm_nonsign, xmm_denorm_nonsign, xmm_denorm_mask)
|
|
|
|
xmm_float = XMMRegister()
|
|
VORPS(xmm_float, xmm_nonsign, xmm_sign)
|
|
|
|
return xmm_float
|