You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
394 lines
13 KiB
394 lines
13 KiB
4 months ago
|
# OpenCL built-in library: type conversion functions
|
||
|
#
|
||
|
# Copyright (c) 2013 Victor Oliveira <victormatheus@gmail.com>
|
||
|
# Copyright (c) 2013 Jesse Towner <jessetowner@lavabit.com>
|
||
|
#
|
||
|
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
||
|
# of this software and associated documentation files (the "Software"), to deal
|
||
|
# in the Software without restriction, including without limitation the rights
|
||
|
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||
|
# copies of the Software, and to permit persons to whom the Software is
|
||
|
# furnished to do so, subject to the following conditions:
|
||
|
#
|
||
|
# The above copyright notice and this permission notice shall be included in
|
||
|
# all copies or substantial portions of the Software.
|
||
|
#
|
||
|
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||
|
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||
|
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||
|
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||
|
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||
|
# THE SOFTWARE.
|
||
|
|
||
|
# This script generates the file convert_type.cl, which contains all of the
|
||
|
# OpenCL functions in the form:
|
||
|
#
|
||
|
# convert_<destTypen><_sat><_roundingMode>(<sourceTypen>)
|
||
|
|
||
|
types = ['char', 'uchar', 'short', 'ushort', 'int', 'uint', 'long', 'ulong', 'float', 'double']
|
||
|
int_types = ['char', 'uchar', 'short', 'ushort', 'int', 'uint', 'long', 'ulong']
|
||
|
unsigned_types = ['uchar', 'ushort', 'uint', 'ulong']
|
||
|
float_types = ['float', 'double']
|
||
|
int64_types = ['long', 'ulong']
|
||
|
float64_types = ['double']
|
||
|
vector_sizes = ['', '2', '3', '4', '8', '16']
|
||
|
half_sizes = [('2',''), ('4','2'), ('8','4'), ('16','8')]
|
||
|
|
||
|
saturation = ['','_sat']
|
||
|
rounding_modes = ['_rtz','_rte','_rtp','_rtn']
|
||
|
float_prefix = {'float':'FLT_', 'double':'DBL_'}
|
||
|
float_suffix = {'float':'f', 'double':''}
|
||
|
|
||
|
bool_type = {'char' : 'char',
|
||
|
'uchar' : 'char',
|
||
|
'short' : 'short',
|
||
|
'ushort': 'short',
|
||
|
'int' : 'int',
|
||
|
'uint' : 'int',
|
||
|
'long' : 'long',
|
||
|
'ulong' : 'long',
|
||
|
'float' : 'int',
|
||
|
'double' : 'long'}
|
||
|
|
||
|
unsigned_type = {'char' : 'uchar',
|
||
|
'uchar' : 'uchar',
|
||
|
'short' : 'ushort',
|
||
|
'ushort': 'ushort',
|
||
|
'int' : 'uint',
|
||
|
'uint' : 'uint',
|
||
|
'long' : 'ulong',
|
||
|
'ulong' : 'ulong'}
|
||
|
|
||
|
sizeof_type = {'char' : 1, 'uchar' : 1,
|
||
|
'short' : 2, 'ushort' : 2,
|
||
|
'int' : 4, 'uint' : 4,
|
||
|
'long' : 8, 'ulong' : 8,
|
||
|
'float' : 4, 'double' : 8}
|
||
|
|
||
|
limit_max = {'char' : 'CHAR_MAX',
|
||
|
'uchar' : 'UCHAR_MAX',
|
||
|
'short' : 'SHRT_MAX',
|
||
|
'ushort': 'USHRT_MAX',
|
||
|
'int' : 'INT_MAX',
|
||
|
'uint' : 'UINT_MAX',
|
||
|
'long' : 'LONG_MAX',
|
||
|
'ulong' : 'ULONG_MAX'}
|
||
|
|
||
|
limit_min = {'char' : 'CHAR_MIN',
|
||
|
'uchar' : '0',
|
||
|
'short' : 'SHRT_MIN',
|
||
|
'ushort': '0',
|
||
|
'int' : 'INT_MIN',
|
||
|
'uint' : '0',
|
||
|
'long' : 'LONG_MIN',
|
||
|
'ulong' : '0'}
|
||
|
|
||
|
def conditional_guard(src, dst):
|
||
|
int64_count = 0
|
||
|
float64_count = 0
|
||
|
if src in int64_types:
|
||
|
int64_count = int64_count +1
|
||
|
elif src in float64_types:
|
||
|
float64_count = float64_count + 1
|
||
|
if dst in int64_types:
|
||
|
int64_count = int64_count +1
|
||
|
elif dst in float64_types:
|
||
|
float64_count = float64_count + 1
|
||
|
if float64_count > 0:
|
||
|
#In embedded profile, if cl_khr_fp64 is supported cles_khr_int64 has to be
|
||
|
print("#ifdef cl_khr_fp64")
|
||
|
return True
|
||
|
elif int64_count > 0:
|
||
|
print("#if defined cles_khr_int64 || !defined(__EMBEDDED_PROFILE__)")
|
||
|
return True
|
||
|
return False
|
||
|
|
||
|
|
||
|
print("""/* !!!! AUTOGENERATED FILE generated by convert_type.py !!!!!
|
||
|
|
||
|
DON'T CHANGE THIS FILE. MAKE YOUR CHANGES TO convert_type.py AND RUN:
|
||
|
$ ./generate-conversion-type-cl.sh
|
||
|
|
||
|
OpenCL type conversion functions
|
||
|
|
||
|
Copyright (c) 2013 Victor Oliveira <victormatheus@gmail.com>
|
||
|
Copyright (c) 2013 Jesse Towner <jessetowner@lavabit.com>
|
||
|
|
||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||
|
of this software and associated documentation files (the "Software"), to deal
|
||
|
in the Software without restriction, including without limitation the rights
|
||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||
|
copies of the Software, and to permit persons to whom the Software is
|
||
|
furnished to do so, subject to the following conditions:
|
||
|
|
||
|
The above copyright notice and this permission notice shall be included in
|
||
|
all copies or substantial portions of the Software.
|
||
|
|
||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||
|
THE SOFTWARE.
|
||
|
*/
|
||
|
|
||
|
#include <clc/clc.h>
|
||
|
|
||
|
#ifdef cl_khr_fp64
|
||
|
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
|
||
|
|
||
|
#if defined(__EMBEDDED_PROFILE__) && !defined(cles_khr_int64)
|
||
|
#error Embedded profile that supports cl_khr_fp64 also has to support cles_khr_int64
|
||
|
#endif
|
||
|
|
||
|
#endif
|
||
|
|
||
|
#ifdef cles_khr_int64
|
||
|
#pragma OPENCL EXTENSION cles_khr_int64 : enable
|
||
|
#endif
|
||
|
|
||
|
""")
|
||
|
|
||
|
#
|
||
|
# Default Conversions
|
||
|
#
|
||
|
# All conversions are in accordance with the OpenCL specification,
|
||
|
# which cites the C99 conversion rules.
|
||
|
#
|
||
|
# Casting from floating point to integer results in conversions
|
||
|
# with truncation, so it should be suitable for the default convert
|
||
|
# functions.
|
||
|
#
|
||
|
# Conversions from integer to floating-point, and floating-point to
|
||
|
# floating-point through casting is done with the default rounding
|
||
|
# mode. While C99 allows dynamically changing the rounding mode
|
||
|
# during runtime, it is not a supported feature in OpenCL according
|
||
|
# to Section 7.1 - Rounding Modes in the OpenCL 1.2 specification.
|
||
|
#
|
||
|
# Therefore, we can assume for optimization purposes that the
|
||
|
# rounding mode is fixed to round-to-nearest-even. Platform target
|
||
|
# authors should ensure that the rounding-control registers remain
|
||
|
# in this state, and that this invariant holds.
|
||
|
#
|
||
|
# Also note, even though the OpenCL specification isn't entirely
|
||
|
# clear on this matter, we implement all rounding mode combinations
|
||
|
# even for integer-to-integer conversions. When such a conversion
|
||
|
# is used, the rounding mode is ignored.
|
||
|
#
|
||
|
|
||
|
def generate_default_conversion(src, dst, mode):
|
||
|
close_conditional = conditional_guard(src, dst)
|
||
|
|
||
|
# scalar conversions
|
||
|
print("""_CLC_DEF _CLC_OVERLOAD
|
||
|
{DST} convert_{DST}{M}({SRC} x)
|
||
|
{{
|
||
|
return ({DST})x;
|
||
|
}}
|
||
|
""".format(SRC=src, DST=dst, M=mode))
|
||
|
|
||
|
# vector conversions, done through decomposition to components
|
||
|
for size, half_size in half_sizes:
|
||
|
print("""_CLC_DEF _CLC_OVERLOAD
|
||
|
{DST}{N} convert_{DST}{N}{M}({SRC}{N} x)
|
||
|
{{
|
||
|
return ({DST}{N})(convert_{DST}{H}(x.lo), convert_{DST}{H}(x.hi));
|
||
|
}}
|
||
|
""".format(SRC=src, DST=dst, N=size, H=half_size, M=mode))
|
||
|
|
||
|
# 3-component vector conversions
|
||
|
print("""_CLC_DEF _CLC_OVERLOAD
|
||
|
{DST}3 convert_{DST}3{M}({SRC}3 x)
|
||
|
{{
|
||
|
return ({DST}3)(convert_{DST}2(x.s01), convert_{DST}(x.s2));
|
||
|
}}""".format(SRC=src, DST=dst, M=mode))
|
||
|
|
||
|
if close_conditional:
|
||
|
print("#endif")
|
||
|
|
||
|
|
||
|
for src in types:
|
||
|
for dst in types:
|
||
|
generate_default_conversion(src, dst, '')
|
||
|
|
||
|
for src in int_types:
|
||
|
for dst in int_types:
|
||
|
for mode in rounding_modes:
|
||
|
generate_default_conversion(src, dst, mode)
|
||
|
|
||
|
#
|
||
|
# Saturated Conversions To Integers
|
||
|
#
|
||
|
# These functions are dependent on the unsaturated conversion functions
|
||
|
# generated above, and use clamp, max, min, and select to eliminate
|
||
|
# branching and vectorize the conversions.
|
||
|
#
|
||
|
# Again, as above, we allow all rounding modes for integer-to-integer
|
||
|
# conversions with saturation.
|
||
|
#
|
||
|
|
||
|
def generate_saturated_conversion(src, dst, size):
|
||
|
# Header
|
||
|
close_conditional = conditional_guard(src, dst)
|
||
|
print("""_CLC_DEF _CLC_OVERLOAD
|
||
|
{DST}{N} convert_{DST}{N}_sat({SRC}{N} x)
|
||
|
{{""".format(DST=dst, SRC=src, N=size))
|
||
|
|
||
|
# FIXME: This is a work around for lack of select function with
|
||
|
# signed third argument when the first two arguments are unsigned types.
|
||
|
# We cast to the signed type for sign-extension, then do a bitcast to
|
||
|
# the unsigned type.
|
||
|
if dst in unsigned_types:
|
||
|
bool_prefix = "as_{DST}{N}(convert_{BOOL}{N}".format(DST=dst, BOOL=bool_type[dst], N=size);
|
||
|
bool_suffix = ")"
|
||
|
else:
|
||
|
bool_prefix = "convert_{BOOL}{N}".format(BOOL=bool_type[dst], N=size);
|
||
|
bool_suffix = ""
|
||
|
|
||
|
# Body
|
||
|
if src == dst:
|
||
|
|
||
|
# Conversion between same types
|
||
|
print(" return x;")
|
||
|
|
||
|
elif src in float_types:
|
||
|
|
||
|
# Conversion from float to int
|
||
|
print(""" {DST}{N} y = convert_{DST}{N}(x);
|
||
|
y = select(y, ({DST}{N}){DST_MIN}, {BP}(x < ({SRC}{N}){DST_MIN}){BS});
|
||
|
y = select(y, ({DST}{N}){DST_MAX}, {BP}(x > ({SRC}{N}){DST_MAX}){BS});
|
||
|
return y;""".format(SRC=src, DST=dst, N=size,
|
||
|
DST_MIN=limit_min[dst], DST_MAX=limit_max[dst],
|
||
|
BP=bool_prefix, BS=bool_suffix))
|
||
|
|
||
|
else:
|
||
|
|
||
|
# Integer to integer convesion with sizeof(src) == sizeof(dst)
|
||
|
if sizeof_type[src] == sizeof_type[dst]:
|
||
|
if src in unsigned_types:
|
||
|
print(" x = min(x, ({SRC}){DST_MAX});".format(SRC=src, DST_MAX=limit_max[dst]))
|
||
|
else:
|
||
|
print(" x = max(x, ({SRC})0);".format(SRC=src))
|
||
|
|
||
|
# Integer to integer conversion where sizeof(src) > sizeof(dst)
|
||
|
elif sizeof_type[src] > sizeof_type[dst]:
|
||
|
if src in unsigned_types:
|
||
|
print(" x = min(x, ({SRC}){DST_MAX});".format(SRC=src, DST_MAX=limit_max[dst]))
|
||
|
else:
|
||
|
print(" x = clamp(x, ({SRC}){DST_MIN}, ({SRC}){DST_MAX});"
|
||
|
.format(SRC=src, DST_MIN=limit_min[dst], DST_MAX=limit_max[dst]))
|
||
|
|
||
|
# Integer to integer conversion where sizeof(src) < sizeof(dst)
|
||
|
elif src not in unsigned_types and dst in unsigned_types:
|
||
|
print(" x = max(x, ({SRC})0);".format(SRC=src))
|
||
|
|
||
|
print(" return convert_{DST}{N}(x);".format(DST=dst, N=size))
|
||
|
|
||
|
# Footer
|
||
|
print("}")
|
||
|
if close_conditional:
|
||
|
print("#endif")
|
||
|
|
||
|
|
||
|
for src in types:
|
||
|
for dst in int_types:
|
||
|
for size in vector_sizes:
|
||
|
generate_saturated_conversion(src, dst, size)
|
||
|
|
||
|
|
||
|
def generate_saturated_conversion_with_rounding(src, dst, size, mode):
|
||
|
# Header
|
||
|
close_conditional = conditional_guard(src, dst)
|
||
|
|
||
|
# Body
|
||
|
print("""_CLC_DEF _CLC_OVERLOAD
|
||
|
{DST}{N} convert_{DST}{N}_sat{M}({SRC}{N} x)
|
||
|
{{
|
||
|
return convert_{DST}{N}_sat(x);
|
||
|
}}
|
||
|
""".format(DST=dst, SRC=src, N=size, M=mode))
|
||
|
|
||
|
# Footer
|
||
|
if close_conditional:
|
||
|
print("#endif")
|
||
|
|
||
|
|
||
|
for src in int_types:
|
||
|
for dst in int_types:
|
||
|
for size in vector_sizes:
|
||
|
for mode in rounding_modes:
|
||
|
generate_saturated_conversion_with_rounding(src, dst, size, mode)
|
||
|
|
||
|
#
|
||
|
# Conversions To/From Floating-Point With Rounding
|
||
|
#
|
||
|
# Note that we assume as above that casts from floating-point to
|
||
|
# integer are done with truncation, and that the default rounding
|
||
|
# mode is fixed to round-to-nearest-even, as per C99 and OpenCL
|
||
|
# rounding rules.
|
||
|
#
|
||
|
# These functions rely on the use of abs, ceil, fabs, floor,
|
||
|
# nextafter, sign, rint and the above generated conversion functions.
|
||
|
#
|
||
|
# Only conversions to integers can have saturation.
|
||
|
#
|
||
|
|
||
|
def generate_float_conversion(src, dst, size, mode, sat):
|
||
|
# Header
|
||
|
close_conditional = conditional_guard(src, dst)
|
||
|
print("""_CLC_DEF _CLC_OVERLOAD
|
||
|
{DST}{N} convert_{DST}{N}{S}{M}({SRC}{N} x)
|
||
|
{{""".format(SRC=src, DST=dst, N=size, M=mode, S=sat))
|
||
|
|
||
|
# Perform conversion
|
||
|
if dst in int_types:
|
||
|
if mode == '_rte':
|
||
|
print(" x = rint(x);");
|
||
|
elif mode == '_rtp':
|
||
|
print(" x = ceil(x);");
|
||
|
elif mode == '_rtn':
|
||
|
print(" x = floor(x);");
|
||
|
print(" return convert_{DST}{N}{S}(x);".format(DST=dst, N=size, S=sat))
|
||
|
elif mode == '_rte':
|
||
|
print(" return convert_{DST}{N}(x);".format(DST=dst, N=size))
|
||
|
else:
|
||
|
print(" {DST}{N} r = convert_{DST}{N}(x);".format(DST=dst, N=size))
|
||
|
print(" {SRC}{N} y = convert_{SRC}{N}(y);".format(SRC=src, N=size))
|
||
|
if mode == '_rtz':
|
||
|
if src in int_types:
|
||
|
print(" {USRC}{N} abs_x = abs(x);".format(USRC=unsigned_type[src], N=size))
|
||
|
print(" {USRC}{N} abs_y = abs(y);".format(USRC=unsigned_type[src], N=size))
|
||
|
else:
|
||
|
print(" {SRC}{N} abs_x = fabs(x);".format(SRC=src, N=size))
|
||
|
print(" {SRC}{N} abs_y = fabs(y);".format(SRC=src, N=size))
|
||
|
print(" return select(r, nextafter(r, sign(r) * ({DST}{N})-INFINITY), convert_{BOOL}{N}(abs_y > abs_x));"
|
||
|
.format(DST=dst, N=size, BOOL=bool_type[dst]))
|
||
|
if mode == '_rtp':
|
||
|
print(" return select(r, nextafter(r, ({DST}{N})INFINITY), convert_{BOOL}{N}(y < x));"
|
||
|
.format(DST=dst, N=size, BOOL=bool_type[dst]))
|
||
|
if mode == '_rtn':
|
||
|
print(" return select(r, nextafter(r, ({DST}{N})-INFINITY), convert_{BOOL}{N}(y > x));"
|
||
|
.format(DST=dst, N=size, BOOL=bool_type[dst]))
|
||
|
|
||
|
# Footer
|
||
|
print("}")
|
||
|
if close_conditional:
|
||
|
print("#endif")
|
||
|
|
||
|
|
||
|
for src in float_types:
|
||
|
for dst in int_types:
|
||
|
for size in vector_sizes:
|
||
|
for mode in rounding_modes:
|
||
|
for sat in saturation:
|
||
|
generate_float_conversion(src, dst, size, mode, sat)
|
||
|
|
||
|
|
||
|
for src in types:
|
||
|
for dst in float_types:
|
||
|
for size in vector_sizes:
|
||
|
for mode in rounding_modes:
|
||
|
generate_float_conversion(src, dst, size, mode, '')
|