You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
39 lines
1.0 KiB
39 lines
1.0 KiB
4 months ago
|
from peachpy import *
|
||
|
from peachpy.x86_64 import *
|
||
|
|
||
|
import fp16.avx, fp16.avx2
|
||
|
|
||
|
|
||
|
arg_fp16 = Argument(ptr(const_uint16_t), name="fp16")
|
||
|
arg_fp32 = Argument(ptr(uint32_t), name="fp32")
|
||
|
|
||
|
with Function("fp16_alt_xmm_to_fp32_ymm_peachpy__avx2", (arg_fp16, arg_fp32), target=uarch.default + isa.avx2):
|
||
|
|
||
|
reg_fp16 = GeneralPurposeRegister64()
|
||
|
LOAD.ARGUMENT(reg_fp16, arg_fp16)
|
||
|
|
||
|
reg_fp32 = GeneralPurposeRegister64()
|
||
|
LOAD.ARGUMENT(reg_fp32, arg_fp32)
|
||
|
|
||
|
xmm_fp16 = XMMRegister()
|
||
|
VMOVUPS(xmm_fp16, [reg_fp16])
|
||
|
ymm_fp32 = fp16.avx2.fp16_alt_xmm_to_fp32_ymm(xmm_fp16)
|
||
|
VMOVUPS([reg_fp32], ymm_fp32)
|
||
|
|
||
|
RETURN()
|
||
|
|
||
|
with Function("fp16_alt_xmm_to_fp32_xmm_peachpy__avx", (arg_fp16, arg_fp32), target=uarch.default + isa.avx):
|
||
|
|
||
|
reg_fp16 = GeneralPurposeRegister64()
|
||
|
LOAD.ARGUMENT(reg_fp16, arg_fp16)
|
||
|
|
||
|
reg_fp32 = GeneralPurposeRegister64()
|
||
|
LOAD.ARGUMENT(reg_fp32, arg_fp32)
|
||
|
|
||
|
xmm_fp16 = XMMRegister()
|
||
|
VMOVUPS(xmm_fp16, [reg_fp16])
|
||
|
xmm_fp32 = fp16.avx.fp16_alt_xmm_to_fp32_xmm(xmm_fp16)
|
||
|
VMOVUPS([reg_fp32], xmm_fp32)
|
||
|
|
||
|
RETURN()
|