// // Copyright (c) 2017, 2020 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include "harness/compat.h" // Bug: Missing in spec: atomic_intptr_t is always supported if device is 32-bits. // Bug: Missing in spec: CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE #define FLUSH fflush(stdout) #define MAX_STR 16*1024 #define ALIGNMENT 128 #define OPTIONS "-cl-std=CL2.0" // NUM_ROUNDS must be at least 1. // It determines how many sets of random data we push through the global // variables. #define NUM_ROUNDS 1 // This is a shared property of the writer and reader kernels. #define NUM_TESTED_VALUES 5 // TODO: pointer-to-half (and its vectors) // TODO: union of... #include #include #include #include #include #include #include #include #include #include "harness/typeWrappers.h" #include "harness/errorHelpers.h" #include "harness/mt19937.h" #include "procs.h" //////////////////// // Device capabilities static int l_has_double = 0; static int l_has_half = 0; static int l_64bit_device = 0; static int l_has_int64_atomics = 0; static int l_has_intptr_atomics = 0; static int l_has_cles_int64 = 0; static int l_host_is_big_endian = 1; static size_t l_max_global_id0 = 0; static cl_bool l_linker_available = false; #define check_error(errCode,msg,...) ((errCode != CL_SUCCESS) ? (log_error("ERROR: " msg "! (%s:%d)\n", ## __VA_ARGS__, __FILE__, __LINE__), 1) : 0) //////////////////// // Info about types we can use for program scope variables. class TypeInfo { public: TypeInfo() : name(""), m_buf_elem_type(""), m_is_vecbase(false), m_is_atomic(false), m_is_like_size_t(false), m_is_bool(false), m_elem_type(0), m_num_elem(0), m_size(0), m_value_size(0) {} TypeInfo(const char* name_arg) : name(name_arg), m_buf_elem_type(name_arg), m_is_vecbase(false), m_is_atomic(false), m_is_like_size_t(false), m_is_bool(false), m_elem_type(0), m_num_elem(0), m_size(0), m_value_size(0) { } // Vectors TypeInfo( TypeInfo* elem_type, int num_elem ) : m_is_vecbase(false), m_is_atomic(false), m_is_like_size_t(false), m_is_bool(false), m_elem_type(elem_type), m_num_elem(num_elem) { char the_name[10]; // long enough for longest vector type name "double16" snprintf(the_name,sizeof(the_name),"%s%d",elem_type->get_name_c_str(),m_num_elem); this->name = std::string(the_name); this->m_buf_elem_type = std::string(the_name); this->m_value_size = num_elem * elem_type->get_size(); if ( m_num_elem == 3 ) { this->m_size = 4 * elem_type->get_size(); } else { this->m_size = num_elem * elem_type->get_size(); } } const std::string& get_name(void) const { return name; } const char* get_name_c_str(void) const { return name.c_str(); } TypeInfo& set_vecbase(void) { this->m_is_vecbase = true; return *this; } TypeInfo& set_atomic(void) { this->m_is_atomic = true; return *this; } TypeInfo& set_like_size_t(void) { this->m_is_like_size_t = true; this->set_size( l_64bit_device ? 8 : 4 ); this->m_buf_elem_type = l_64bit_device ? "ulong" : "uint"; return *this; } TypeInfo& set_bool(void) { this->m_is_bool = true; return *this; } TypeInfo& set_size(size_t n) { this->m_value_size = this->m_size = n; return *this; } TypeInfo& set_buf_elem_type( const char* name ) { this->m_buf_elem_type = std::string(name); return *this; } const TypeInfo* elem_type(void) const { return m_elem_type; } int num_elem(void) const { return m_num_elem; } bool is_vecbase(void) const {return m_is_vecbase;} bool is_atomic(void) const {return m_is_atomic;} bool is_atomic_64bit(void) const {return m_is_atomic && m_size == 8;} bool is_like_size_t(void) const {return m_is_like_size_t;} bool is_bool(void) const {return m_is_bool;} size_t get_size(void) const {return m_size;} size_t get_value_size(void) const {return m_value_size;} // When passing values of this type to a kernel, what buffer type // should be used? const char* get_buf_elem_type(void) const { return m_buf_elem_type.c_str(); } std::string as_string(const cl_uchar* value_ptr) const { // This method would be shorter if I had a real handle to element // vector type. if ( this->is_bool() ) { std::string result( name ); result += "<"; result += (*value_ptr ? "true" : "false"); result += ", "; char buf[10]; sprintf(buf,"%02x",*value_ptr); result += buf; result += ">"; return result; } else if ( this->num_elem() ) { std::string result( name ); result += "<"; for ( unsigned ielem = 0 ; ielem < this->num_elem() ; ielem++ ) { char buf[MAX_STR]; if ( ielem ) result += ", "; for ( unsigned ibyte = 0; ibyte < this->m_elem_type->get_size() ; ibyte++ ) { sprintf(buf + 2*ibyte,"%02x", value_ptr[ ielem * this->m_elem_type->get_size() + ibyte ] ); } result += buf; } result += ">"; return result; } else { std::string result( name ); result += "<"; char buf[MAX_STR]; for ( unsigned ibyte = 0; ibyte < this->get_size() ; ibyte++ ) { sprintf(buf + 2*ibyte,"%02x", value_ptr[ ibyte ] ); } result += buf; result += ">"; return result; } } // Initialize the given buffer to a constant value initialized as if it // were from the INIT_VAR macro below. // Only needs to support values 0 and 1. void init( cl_uchar* buf, cl_uchar val) const { if ( this->num_elem() ) { for ( unsigned ielem = 0 ; ielem < this->num_elem() ; ielem++ ) { // Delegate! this->init_elem( buf + ielem * this->get_value_size()/this->num_elem(), val ); } } else { init_elem( buf, val ); } } private: void init_elem( cl_uchar* buf, cl_uchar val ) const { size_t elem_size = this->num_elem() ? this->get_value_size()/this->num_elem() : this->get_size(); memset(buf,0,elem_size); if ( val ) { if ( strstr( name.c_str(), "float" ) ) { *(float*)buf = (float)val; return; } if ( strstr( name.c_str(), "double" ) ) { *(double*)buf = (double)val; return; } if ( this->is_bool() ) { *buf = (bool)val; return; } // Write a single character value to the correct spot, // depending on host endianness. if ( l_host_is_big_endian ) *(buf + elem_size-1) = (cl_uchar)val; else *buf = (cl_uchar)val; } } public: void dump(FILE* fp) const { fprintf(fp,"Type %s : <%d,%d,%s> ", name.c_str(), (int)m_size, (int)m_value_size, m_buf_elem_type.c_str() ); if ( this->m_elem_type ) fprintf(fp, " vec(%s,%d)", this->m_elem_type->get_name_c_str(), this->num_elem() ); if ( this->m_is_vecbase ) fprintf(fp, " vecbase"); if ( this->m_is_bool ) fprintf(fp, " bool"); if ( this->m_is_like_size_t ) fprintf(fp, " like-size_t"); if ( this->m_is_atomic ) fprintf(fp, " atomic"); fprintf(fp,"\n"); fflush(fp); } private: std::string name; TypeInfo* m_elem_type; int m_num_elem; bool m_is_vecbase; bool m_is_atomic; bool m_is_like_size_t; bool m_is_bool; size_t m_size; // Number of bytes of storage occupied by this type. size_t m_value_size; // Number of bytes of value significant for this type. Differs for vec3. // When passing values of this type to a kernel, what buffer type // should be used? // For most types, it's just itself. // Use a std::string so I don't have to make a copy constructor. std::string m_buf_elem_type; }; #define NUM_SCALAR_TYPES (8+2) // signed and unsigned integral types, float and double #define NUM_VECTOR_SIZES (5) // 2,3,4,8,16 #define NUM_PLAIN_TYPES \ 5 /*boolean and size_t family */ \ + NUM_SCALAR_TYPES \ + NUM_SCALAR_TYPES*NUM_VECTOR_SIZES \ + 10 /* atomic types */ // Need room for plain, array, pointer, struct #define MAX_TYPES (4*NUM_PLAIN_TYPES) static TypeInfo type_info[MAX_TYPES]; static int num_type_info = 0; // Number of valid entries in type_info[] // A helper class to form kernel source arguments for clCreateProgramWithSource. class StringTable { public: StringTable() : m_c_strs(NULL), m_lengths(NULL), m_frozen(false), m_strings() {} ~StringTable() { release_frozen(); } void add(std::string s) { release_frozen(); m_strings.push_back(s); } const size_t num_str() { freeze(); return m_strings.size(); } const char** strs() { freeze(); return m_c_strs; } const size_t* lengths() { freeze(); return m_lengths; } private: void freeze(void) { if ( !m_frozen ) { release_frozen(); m_c_strs = (const char**) malloc(sizeof(const char*) * m_strings.size()); m_lengths = (size_t*) malloc(sizeof(size_t) * m_strings.size()); assert( m_c_strs ); assert( m_lengths ); for ( size_t i = 0; i < m_strings.size() ; i++ ) { m_c_strs[i] = m_strings[i].c_str(); m_lengths[i] = strlen(m_c_strs[i]); } m_frozen = true; } } void release_frozen(void) { if ( m_c_strs ) { free(m_c_strs); m_c_strs = 0; } if ( m_lengths ) { free(m_lengths); m_lengths = 0; } m_frozen = false; } typedef std::vector strlist_t; strlist_t m_strings; const char** m_c_strs; size_t* m_lengths; bool m_frozen; }; //////////////////// // File scope function declarations static void l_load_abilities(cl_device_id device); static const char* l_get_fp64_pragma(void); static const char* l_get_cles_int64_pragma(void); static int l_build_type_table(cl_device_id device); static int l_get_device_info(cl_device_id device, size_t* max_size_ret, size_t* pref_size_ret); static void l_set_randomly( cl_uchar* buf, size_t buf_size, RandomSeed& rand_state ); static int l_compare( const cl_uchar* expected, const cl_uchar* received, unsigned num_values, const TypeInfo&ti ); static int l_copy( cl_uchar* dest, unsigned dest_idx, const cl_uchar* src, unsigned src_idx, const TypeInfo&ti ); static std::string conversion_functions(const TypeInfo& ti); static std::string global_decls(const TypeInfo& ti, bool with_init); static std::string global_check_function(const TypeInfo& ti); static std::string writer_function(const TypeInfo& ti); static std::string reader_function(const TypeInfo& ti); static int l_write_read( cl_device_id device, cl_context context, cl_command_queue queue ); static int l_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state ); static int l_init_write_read( cl_device_id device, cl_context context, cl_command_queue queue ); static int l_init_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state ); static int l_capacity( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size ); static int l_user_type( cl_device_id device, cl_context context, cl_command_queue queue, size_t max_size, bool separate_compilation ); //////////////////// // File scope function definitions static cl_int print_build_log(cl_program program, cl_uint num_devices, cl_device_id *device_list, cl_uint count, const char **strings, const size_t *lengths, const char* options) { cl_uint i; cl_int error; BufferOwningPtr devices; if(num_devices == 0 || device_list == NULL) { error = clGetProgramInfo(program, CL_PROGRAM_NUM_DEVICES, sizeof(num_devices), &num_devices, NULL); test_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed"); device_list = (cl_device_id*)malloc(sizeof(cl_device_id)*num_devices); devices.reset(device_list); memset(device_list, 0, sizeof(cl_device_id) * num_devices); error = clGetProgramInfo(program, CL_PROGRAM_DEVICES, sizeof(cl_device_id) * num_devices, device_list, NULL); test_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed"); } cl_uint z; bool sourcePrinted = false; for(z = 0; z < num_devices; z++) { char deviceName[4096] = ""; error = clGetDeviceInfo(device_list[z], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL); check_error(error, "Device \"%d\" failed to return a name. clGetDeviceInfo CL_DEVICE_NAME failed", z); cl_build_status buildStatus; error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); check_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed"); if(buildStatus != CL_BUILD_SUCCESS) { if(!sourcePrinted) { log_error("Build options: %s\n", options); if(count && strings) { log_error("Original source is: ------------\n"); for(i = 0; i < count; i++) log_error("%s", strings[i]); } sourcePrinted = true; } char statusString[64] = ""; if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS) sprintf(statusString, "CL_BUILD_SUCCESS"); else if (buildStatus == (cl_build_status)CL_BUILD_NONE) sprintf(statusString, "CL_BUILD_NONE"); else if (buildStatus == (cl_build_status)CL_BUILD_ERROR) sprintf(statusString, "CL_BUILD_ERROR"); else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS) sprintf(statusString, "CL_BUILD_IN_PROGRESS"); else sprintf(statusString, "UNKNOWN (%d)", buildStatus); log_error("Build not successful for device \"%s\", status: %s\n", deviceName, statusString); size_t paramSize = 0; error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_LOG, 0, NULL, ¶mSize); if(check_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed")) break; std::string log; log.resize(paramSize/sizeof(char)); error = clGetProgramBuildInfo(program, device_list[z], CL_PROGRAM_BUILD_LOG, paramSize, &log[0], NULL); if(check_error(error, "Device %d (%s) failed to return a build log", z, deviceName)) break; if(log[0] == 0) log_error("clGetProgramBuildInfo returned an empty log.\n"); else { log_error("Build log:\n", deviceName); log_error("%s\n", log.c_str()); } } } return error; } static void l_load_abilities(cl_device_id device) { l_has_half = is_extension_available(device,"cl_khr_fp16"); l_has_double = is_extension_available(device,"cl_khr_fp64"); l_has_cles_int64 = is_extension_available(device,"cles_khr_int64"); l_has_int64_atomics = is_extension_available(device,"cl_khr_int64_base_atomics") && is_extension_available(device,"cl_khr_int64_extended_atomics"); { int status = CL_SUCCESS; cl_uint addr_bits = 32; status = clGetDeviceInfo(device,CL_DEVICE_ADDRESS_BITS,sizeof(addr_bits),&addr_bits,0); l_64bit_device = ( status == CL_SUCCESS && addr_bits == 64 ); } // 32-bit devices always have intptr atomics. l_has_intptr_atomics = !l_64bit_device || l_has_int64_atomics; union { char c[4]; int i; } probe; probe.i = 1; l_host_is_big_endian = !probe.c[0]; // Determine max global id. { int status = CL_SUCCESS; cl_uint max_dim = 0; status = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,sizeof(max_dim),&max_dim,0); assert( status == CL_SUCCESS ); assert( max_dim > 0 ); size_t max_id[3]; max_id[0] = 0; status = clGetDeviceInfo(device,CL_DEVICE_MAX_WORK_ITEM_SIZES,max_dim*sizeof(size_t),&max_id[0],0); assert( status == CL_SUCCESS ); l_max_global_id0 = max_id[0]; } { // Is separate compilation supported? int status = CL_SUCCESS; l_linker_available = false; status = clGetDeviceInfo(device,CL_DEVICE_LINKER_AVAILABLE,sizeof(l_linker_available),&l_linker_available,0); assert( status == CL_SUCCESS ); } } static const char* l_get_fp64_pragma(void) { return l_has_double ? "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n" : ""; } static const char* l_get_cles_int64_pragma(void) { return l_has_cles_int64 ? "#pragma OPENCL EXTENSION cles_khr_int64 : enable\n" : ""; } static const char* l_get_int64_atomic_pragma(void) { return "#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable\n" "#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable\n"; } static int l_build_type_table(cl_device_id device) { int status = CL_SUCCESS; size_t iscalar = 0; size_t ivecsize = 0; int vecsizes[] = { 2, 3, 4, 8, 16 }; const char* vecbase[] = { "uchar", "char", "ushort", "short", "uint", "int", "ulong", "long", "float", "double" }; int vecbase_size[] = { 1, 1, 2, 2, 4, 4, 8, 8, 4, 8 }; const char* like_size_t[] = { "intptr_t", "uintptr_t", "size_t", "ptrdiff_t" }; const char* atomics[] = { "atomic_int", "atomic_uint", "atomic_long", "atomic_ulong", "atomic_float", "atomic_double", }; int atomics_size[] = { 4, 4, 8, 8, 4, 8 }; const char* intptr_atomics[] = { "atomic_intptr_t", "atomic_uintptr_t", "atomic_size_t", "atomic_ptrdiff_t" }; l_load_abilities(device); num_type_info = 0; // Boolean. type_info[ num_type_info++ ] = TypeInfo( "bool" ).set_bool().set_size(1).set_buf_elem_type("uchar"); // Vector types, and the related scalar element types. for ( iscalar=0; iscalar < sizeof(vecbase)/sizeof(vecbase[0]) ; ++iscalar ) { if ( !gHasLong && strstr(vecbase[iscalar],"long") ) continue; if ( !l_has_double && strstr(vecbase[iscalar],"double") ) continue; // Scalar TypeInfo* elem_type = type_info + num_type_info++; *elem_type = TypeInfo( vecbase[iscalar] ).set_vecbase().set_size( vecbase_size[iscalar] ); // Vector for ( ivecsize=0; ivecsize < sizeof(vecsizes)/sizeof(vecsizes[0]) ; ivecsize++ ) { type_info[ num_type_info++ ] = TypeInfo( elem_type, vecsizes[ivecsize] ); } } // Size_t-like types for ( iscalar=0; iscalar < sizeof(like_size_t)/sizeof(like_size_t[0]) ; ++iscalar ) { type_info[ num_type_info++ ] = TypeInfo( like_size_t[iscalar] ).set_like_size_t(); } // Atomic types. for ( iscalar=0; iscalar < sizeof(atomics)/sizeof(atomics[0]) ; ++iscalar ) { if ( !l_has_int64_atomics && strstr(atomics[iscalar],"long") ) continue; if ( !(l_has_int64_atomics && l_has_double) && strstr(atomics[iscalar],"double") ) continue; // The +7 is used to skip over the "atomic_" prefix. const char* buf_type = atomics[iscalar] + 7; type_info[ num_type_info++ ] = TypeInfo( atomics[iscalar] ).set_atomic().set_size( atomics_size[iscalar] ).set_buf_elem_type( buf_type ); } if ( l_has_intptr_atomics ) { for ( iscalar=0; iscalar < sizeof(intptr_atomics)/sizeof(intptr_atomics[0]) ; ++iscalar ) { type_info[ num_type_info++ ] = TypeInfo( intptr_atomics[iscalar] ).set_atomic().set_like_size_t(); } } assert( num_type_info <= MAX_TYPES ); // or increase MAX_TYPES #if 0 for ( size_t i = 0 ; i < num_type_info ; i++ ) { type_info[ i ].dump(stdout); } exit(0); #endif return status; } static const TypeInfo& l_find_type( const char* name ) { auto itr = std::find_if(type_info, type_info + num_type_info, [name](TypeInfo& ti) { return ti.get_name() == name; }); assert(itr != type_info + num_type_info); return *itr; } // Populate return parameters for max program variable size, preferred program variable size. static int l_get_device_info(cl_device_id device, size_t* max_size_ret, size_t* pref_size_ret) { int err = CL_SUCCESS; size_t return_size = 0; err = clGetDeviceInfo(device, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, sizeof(*max_size_ret), max_size_ret, &return_size); if ( err != CL_SUCCESS ) { log_error("Error: Failed to get device info for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n"); return err; } if ( return_size != sizeof(size_t) ) { log_error("Error: Invalid size %d returned for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n", (int)return_size ); return 1; } if ( return_size != sizeof(size_t) ) { log_error("Error: Invalid size %d returned for CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE\n", (int)return_size ); return 1; } return_size = 0; err = clGetDeviceInfo(device, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, sizeof(*pref_size_ret), pref_size_ret, &return_size); if ( err != CL_SUCCESS ) { log_error("Error: Failed to get device info for CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE: %d\n",err); return err; } if ( return_size != sizeof(size_t) ) { log_error("Error: Invalid size %d returned for CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE\n", (int)return_size ); return 1; } return CL_SUCCESS; } static void l_set_randomly( cl_uchar* buf, size_t buf_size, RandomSeed& rand_state ) { assert( 0 == (buf_size % sizeof(cl_uint) ) ); for ( size_t i = 0; i < buf_size ; i += sizeof(cl_uint) ) { *( (cl_uint*)(buf + i) ) = genrand_int32( rand_state ); } #if 0 for ( size_t i = 0; i < buf_size ; i++ ) { printf("%02x",buf[i]); } printf("\n"); #endif } // Return num_value values of the given type. // Returns CL_SUCCESS if they compared as equal. static int l_compare( const char* test_name, const cl_uchar* expected, const cl_uchar* received, size_t num_values, const TypeInfo&ti ) { // Compare only the valid returned bytes. for ( unsigned value_idx = 0; value_idx < num_values; value_idx++ ) { const cl_uchar* expv = expected + value_idx * ti.get_size(); const cl_uchar* gotv = received + value_idx * ti.get_size(); if ( memcmp( expv, gotv, ti.get_value_size() ) ) { std::string exp_str = ti.as_string( expv ); std::string got_str = ti.as_string( gotv ); log_error("Error: %s test for type %s, at index %d: Expected %s got %s\n", test_name, ti.get_name_c_str(), value_idx, exp_str.c_str(), got_str.c_str() ); return 1; } } return CL_SUCCESS; } // Copy a target value from src[idx] to dest[idx] static int l_copy( cl_uchar* dest, unsigned dest_idx, const cl_uchar* src, unsigned src_idx, const TypeInfo&ti ) { cl_uchar* raw_dest = dest + dest_idx * ti.get_size(); const cl_uchar* raw_src = src + src_idx * ti.get_size(); memcpy( raw_dest, raw_src, ti.get_value_size() ); return 0; } static std::string conversion_functions(const TypeInfo& ti) { std::string result; static char buf[MAX_STR]; int num_printed = 0; // The atomic types just use the base type. if ( ti.is_atomic() || 0 == strcmp( ti.get_buf_elem_type(), ti.get_name_c_str() ) ) { // The type is represented in a buffer by itself. num_printed = snprintf(buf,MAX_STR, "%s from_buf(%s a) { return a; }\n" "%s to_buf(%s a) { return a; }\n", ti.get_buf_elem_type(), ti.get_buf_elem_type(), ti.get_buf_elem_type(), ti.get_buf_elem_type() ); } else { // Just use C-style cast. num_printed = snprintf(buf,MAX_STR, "%s from_buf(%s a) { return (%s)a; }\n" "%s to_buf(%s a) { return (%s)a; }\n", ti.get_name_c_str(), ti.get_buf_elem_type(), ti.get_name_c_str(), ti.get_buf_elem_type(), ti.get_name_c_str(), ti.get_buf_elem_type() ); } // Add initializations. if ( ti.is_atomic() ) { num_printed += snprintf( buf + num_printed, MAX_STR-num_printed, "#define INIT_VAR(a) ATOMIC_VAR_INIT(a)\n" ); } else { // This cast works even if the target type is a vector type. num_printed += snprintf( buf + num_printed, MAX_STR-num_printed, "#define INIT_VAR(a) ((%s)(a))\n", ti.get_name_c_str()); } assert( num_printed < MAX_STR ); // or increase MAX_STR result = buf; return result; } static std::string global_decls(const TypeInfo& ti, bool with_init ) { const char* tn = ti.get_name_c_str(); const char* vol = (ti.is_atomic() ? " volatile " : " "); static char decls[MAX_STR]; int num_printed = 0; if ( with_init ) { const char *decls_template_with_init = "%s %s var = INIT_VAR(0);\n" "global %s %s g_var = INIT_VAR(1);\n" "%s %s a_var[2] = { INIT_VAR(1), INIT_VAR(1) };\n" "volatile global %s %s* p_var = &a_var[1];\n\n"; num_printed = snprintf(decls,sizeof(decls),decls_template_with_init, vol,tn,vol,tn,vol,tn,vol,tn); } else { const char *decls_template_no_init = "%s %s var;\n" "global %s %s g_var;\n" "%s %s a_var[2];\n" "global %s %s* p_var;\n\n"; num_printed = snprintf(decls,sizeof(decls),decls_template_no_init, vol,tn,vol,tn,vol,tn,vol,tn); } assert( num_printed < sizeof(decls) ); return std::string(decls); } // Return the source code for the "global_check" function for the given type. // This function checks that all program-scope variables have appropriate // initial values when no explicit initializer is used. If all tests pass the // kernel writes a non-zero value to its output argument, otherwise it writes // zero. static std::string global_check_function(const TypeInfo& ti) { const std::string type_name = ti.get_buf_elem_type(); // all() should only be used on vector inputs. For scalar comparison, the // result of the equality operator can be used as a bool value. const bool is_scalar = ti.num_elem() == 0; // 0 is used to represent scalar types, not 1. const std::string is_equality_true = is_scalar ? "" : "all"; std::string code = "kernel void global_check(global int* out) {\n"; code += " const " + type_name + " zero = ((" + type_name + ")0);\n"; code += " bool status = true;\n"; if (ti.is_atomic()) { code += " status &= " + is_equality_true + "(atomic_load(&var) == zero);\n"; code += " status &= " + is_equality_true + "(atomic_load(&g_var) == zero);\n"; code += " status &= " + is_equality_true + "(atomic_load(&a_var[0]) == zero);\n"; code += " status &= " + is_equality_true + "(atomic_load(&a_var[1]) == zero);\n"; } else { code += " status &= " + is_equality_true + "(var == zero);\n"; code += " status &= " + is_equality_true + "(g_var == zero);\n"; code += " status &= " + is_equality_true + "(a_var[0] == zero);\n"; code += " status &= " + is_equality_true + "(a_var[1] == zero);\n"; } code += " status &= (p_var == NULL);\n"; code += " *out = status ? 1 : 0;\n"; code += "}\n\n"; return code; } // Return the source text for the writer function for the given type. // For types that can't be passed as pointer-to-type as a kernel argument, // use a substitute base type of the same size. static std::string writer_function(const TypeInfo& ti) { static char writer_src[MAX_STR]; int num_printed = 0; if ( !ti.is_atomic() ) { const char* writer_template_normal = "kernel void writer( global %s* src, uint idx ) {\n" " var = from_buf(src[0]);\n" " g_var = from_buf(src[1]);\n" " a_var[0] = from_buf(src[2]);\n" " a_var[1] = from_buf(src[3]);\n" " p_var = a_var + idx;\n" "}\n\n"; num_printed = snprintf(writer_src,sizeof(writer_src),writer_template_normal,ti.get_buf_elem_type()); } else { const char* writer_template_atomic = "kernel void writer( global %s* src, uint idx ) {\n" " atomic_store( &var, from_buf(src[0]) );\n" " atomic_store( &g_var, from_buf(src[1]) );\n" " atomic_store( &a_var[0], from_buf(src[2]) );\n" " atomic_store( &a_var[1], from_buf(src[3]) );\n" " p_var = a_var + idx;\n" "}\n\n"; num_printed = snprintf(writer_src,sizeof(writer_src),writer_template_atomic,ti.get_buf_elem_type()); } assert( num_printed < sizeof(writer_src) ); std::string result = writer_src; return result; } // Return source text for teh reader function for the given type. // For types that can't be passed as pointer-to-type as a kernel argument, // use a substitute base type of the same size. static std::string reader_function(const TypeInfo& ti) { static char reader_src[MAX_STR]; int num_printed = 0; if ( !ti.is_atomic() ) { const char* reader_template_normal = "kernel void reader( global %s* dest, %s ptr_write_val ) {\n" " *p_var = from_buf(ptr_write_val);\n" " dest[0] = to_buf(var);\n" " dest[1] = to_buf(g_var);\n" " dest[2] = to_buf(a_var[0]);\n" " dest[3] = to_buf(a_var[1]);\n" "}\n\n"; num_printed = snprintf(reader_src,sizeof(reader_src),reader_template_normal,ti.get_buf_elem_type(),ti.get_buf_elem_type()); } else { const char* reader_template_atomic = "kernel void reader( global %s* dest, %s ptr_write_val ) {\n" " atomic_store( p_var, from_buf(ptr_write_val) );\n" " dest[0] = to_buf( atomic_load( &var ) );\n" " dest[1] = to_buf( atomic_load( &g_var ) );\n" " dest[2] = to_buf( atomic_load( &a_var[0] ) );\n" " dest[3] = to_buf( atomic_load( &a_var[1] ) );\n" "}\n\n"; num_printed = snprintf(reader_src,sizeof(reader_src),reader_template_atomic,ti.get_buf_elem_type(),ti.get_buf_elem_type()); } assert( num_printed < sizeof(reader_src) ); std::string result = reader_src; return result; } // Check that all globals where appropriately default-initialized. static int check_global_initialization(cl_context context, cl_program program, cl_command_queue queue) { int status = CL_SUCCESS; // Create a buffer on device to store a unique integer. cl_int is_init_valid = 0; clMemWrapper buffer(clCreateBuffer(context, CL_MEM_WRITE_ONLY | CL_MEM_COPY_HOST_PTR, sizeof(is_init_valid), &is_init_valid, &status)); test_error_ret(status, "Failed to allocate buffer", status); // Create, setup and invoke kernel. clKernelWrapper global_check(clCreateKernel(program, "global_check", &status)); test_error_ret(status, "Failed to create global_check kernel", status); status = clSetKernelArg(global_check, 0, sizeof(cl_mem), &buffer); test_error_ret(status, "Failed to set up argument for the global_check kernel", status); const cl_uint work_dim = 1; const size_t global_work_offset[] = { 0 }; const size_t global_work_size[] = { 1 }; status = clEnqueueNDRangeKernel(queue, global_check, work_dim, global_work_offset, global_work_size, nullptr, 0, nullptr, nullptr); test_error_ret(status, "Failed to run global_check kernel", status); status = clFinish(queue); test_error_ret(status, "clFinish() failed", status); // Read back the memory buffer from the device. status = clEnqueueReadBuffer(queue, buffer, CL_TRUE, 0, sizeof(is_init_valid), &is_init_valid, 0, nullptr, nullptr); test_error_ret(status, "Failed to read buffer from device", status); if (is_init_valid == 0) { log_error("Unexpected default values were detected"); return 1; } return CL_SUCCESS; } // Check write-then-read. static int l_write_read( cl_device_id device, cl_context context, cl_command_queue queue ) { int status = CL_SUCCESS; int itype; RandomSeed rand_state( gRandomSeed ); for ( itype = 0; itype < num_type_info ; itype++ ) { status = status | l_write_read_for_type(device,context,queue,type_info[itype], rand_state ); FLUSH; } return status; } static int l_write_read_for_type( cl_device_id device, cl_context context, cl_command_queue queue, const TypeInfo& ti, RandomSeed& rand_state ) { int err = CL_SUCCESS; std::string type_name( ti.get_name() ); const char* tn = type_name.c_str(); log_info(" %s ",tn); StringTable ksrc; ksrc.add( l_get_fp64_pragma() ); ksrc.add( l_get_cles_int64_pragma() ); if (ti.is_atomic_64bit()) ksrc.add( l_get_int64_atomic_pragma() ); ksrc.add( conversion_functions(ti) ); ksrc.add( global_decls(ti,false) ); ksrc.add( global_check_function(ti) ); ksrc.add( writer_function(ti) ); ksrc.add( reader_function(ti) ); int status = CL_SUCCESS; clProgramWrapper program; clKernelWrapper writer; status = create_single_kernel_helper_with_build_options(context, &program, &writer, ksrc.num_str(), ksrc.strs(), "writer", OPTIONS); test_error_ret(status,"Failed to create program for read-after-write test",status); clKernelWrapper reader( clCreateKernel( program, "reader", &status ) ); test_error_ret(status,"Failed to create reader kernel for read-after-write test",status); // Check size query. size_t used_bytes = 0; status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 ); test_error_ret(status,"Failed to query global variable total size",status); size_t expected_used_bytes = (NUM_TESTED_VALUES-1)*ti.get_size() // Two regular variables and an array of 2 elements. + ( l_64bit_device ? 8 : 4 ); // The pointer if ( used_bytes < expected_used_bytes ) { log_error("Error program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_used_bytes, (unsigned long long)used_bytes ); err |= 1; } err |= check_global_initialization(context, program, queue); // We need to create 5 random values of the given type, // and read 4 of them back. const size_t write_data_size = NUM_TESTED_VALUES * sizeof(cl_ulong16); const size_t read_data_size = (NUM_TESTED_VALUES - 1) * sizeof(cl_ulong16); cl_uchar* write_data = (cl_uchar*)align_malloc(write_data_size, ALIGNMENT); cl_uchar* read_data = (cl_uchar*)align_malloc(read_data_size, ALIGNMENT); clMemWrapper write_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, write_data_size, write_data, &status ) ); test_error_ret(status,"Failed to allocate write buffer",status); clMemWrapper read_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, read_data_size, read_data, &status ) ); test_error_ret(status,"Failed to allocate read buffer",status); status = clSetKernelArg(writer,0,sizeof(cl_mem),&write_mem); test_error_ret(status,"set arg",status); status = clSetKernelArg(reader,0,sizeof(cl_mem),&read_mem); test_error_ret(status,"set arg",status); // Boolean random data needs to be massaged a bit more. const int num_rounds = ti.is_bool() ? (1 << NUM_TESTED_VALUES ) : NUM_ROUNDS; unsigned bool_iter = 0; for ( int iround = 0; iround < num_rounds ; iround++ ) { for ( cl_uint iptr_idx = 0; iptr_idx < 2 ; iptr_idx++ ) { // Index into array, to write via pointer // Generate new random data to push through. // Generate 5 * 128 bytes all the time, even though the test for many types use less than all that. cl_uchar *write_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, write_mem, CL_TRUE, CL_MAP_WRITE, 0, write_data_size, 0, 0, 0, 0); if ( ti.is_bool() ) { // For boolean, random data cast to bool isn't very random. // So use the bottom bit of bool_value_iter to get true // diversity. for ( unsigned value_idx = 0; value_idx < NUM_TESTED_VALUES ; value_idx++ ) { write_data[value_idx] = (1<= Version(3, 0)) { size_t max_global_variable_size{}; test_error(clGetDeviceInfo(device, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, sizeof(max_global_variable_size), &max_global_variable_size, nullptr), "clGetDeviceInfo failed"); skip = (max_global_variable_size != 0) ? CL_FALSE : CL_TRUE; } return CL_SUCCESS; } //////////////////// // Global functions // Test support for variables at program scope. Miscellaneous int test_progvar_prog_scope_misc(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) { cl_bool skip{ CL_FALSE }; auto error = should_skip(device, skip); if (CL_SUCCESS != error) { return TEST_FAIL; } if (skip) { log_info("Skipping progvar_prog_scope_misc since it is optionally not " "supported on this device\n"); return TEST_SKIPPED_ITSELF; } size_t max_size = 0; size_t pref_size = 0; cl_int err = CL_SUCCESS; err = l_get_device_info( device, &max_size, &pref_size ); err |= l_build_type_table( device ); err |= l_capacity( device, context, queue, max_size ); err |= l_user_type( device, context, queue, false ); err |= l_user_type( device, context, queue, true ); return err; } // Test support for variables at program scope. Unitialized data int test_progvar_prog_scope_uninit(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) { cl_bool skip{ CL_FALSE }; auto error = should_skip(device, skip); if (CL_SUCCESS != error) { return TEST_FAIL; } if (skip) { log_info( "Skipping progvar_prog_scope_uninit since it is optionally not " "supported on this device\n"); return TEST_SKIPPED_ITSELF; } size_t max_size = 0; size_t pref_size = 0; cl_int err = CL_SUCCESS; err = l_get_device_info( device, &max_size, &pref_size ); err |= l_build_type_table( device ); err |= l_write_read( device, context, queue ); return err; } // Test support for variables at program scope. Initialized data. int test_progvar_prog_scope_init(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) { cl_bool skip{ CL_FALSE }; auto error = should_skip(device, skip); if (CL_SUCCESS != error) { return TEST_FAIL; } if (skip) { log_info("Skipping progvar_prog_scope_init since it is optionally not " "supported on this device\n"); return TEST_SKIPPED_ITSELF; } size_t max_size = 0; size_t pref_size = 0; cl_int err = CL_SUCCESS; err = l_get_device_info( device, &max_size, &pref_size ); err |= l_build_type_table( device ); err |= l_init_write_read( device, context, queue ); return err; } // A simple test for support of static variables inside a kernel. int test_progvar_func_scope(cl_device_id device, cl_context context, cl_command_queue queue, int num_elements) { cl_bool skip{ CL_FALSE }; auto error = should_skip(device, skip); if (CL_SUCCESS != error) { return TEST_FAIL; } if (skip) { log_info("Skipping progvar_func_scope since it is optionally not " "supported on this device\n"); return TEST_SKIPPED_ITSELF; } size_t max_size = 0; size_t pref_size = 0; cl_int err = CL_SUCCESS; // Deliberately have two variables with the same name but in different // scopes. // Also, use a large initialized structure in both cases. const char prog_src[] = "typedef struct { char c; int16 i; } mystruct_t;\n" "kernel void test_bump( global int* value, int which ) {\n" " if ( which ) {\n" // Explicit address space. // Last element set to 0 " static global mystruct_t persistent = {'a',(int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,0) };\n" " *value = persistent.i.sf++;\n" " } else {\n" // Implicitly global // Last element set to 100 " static mystruct_t persistent = {'b',(int16)(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,100) };\n" " *value = persistent.i.sf++;\n" " }\n" "}\n"; StringTable ksrc; ksrc.add( prog_src ); int status = CL_SUCCESS; clProgramWrapper program; clKernelWrapper test_bump; status = create_single_kernel_helper_with_build_options(context, &program, &test_bump, ksrc.num_str(), ksrc.strs(), "test_bump", OPTIONS); test_error_ret(status, "Failed to create program for function static variable test", status); // Check size query. size_t used_bytes = 0; status = clGetProgramBuildInfo( program, device, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, sizeof(used_bytes), &used_bytes, 0 ); test_error_ret(status,"Failed to query global variable total size",status); size_t expected_size = 2 * sizeof(cl_int); // Two ints. if ( used_bytes < expected_size ) { log_error("Error: program query for global variable total size query failed: Expected at least %llu but got %llu\n", (unsigned long long)expected_size, (unsigned long long)used_bytes ); err |= 1; } // Prepare the data. cl_int counter_value = 0; clMemWrapper counter_value_mem( clCreateBuffer( context, CL_MEM_USE_HOST_PTR, sizeof(counter_value), &counter_value, &status ) ); test_error_ret(status,"Failed to allocate counter query buffer",status); status = clSetKernelArg(test_bump,0,sizeof(cl_mem),&counter_value_mem); test_error_ret(status,"set arg",status); // Go a few rounds, alternating between the two counters in the kernel. // Same as initial values in kernel. // But "true" which increments the 0-based counter, and "false" which // increments the 100-based counter. cl_int expected_counter[2] = { 100, 0 }; const size_t one = 1; for ( int iround = 0; iround < 5 ; iround++ ) { // Must go at least twice around for ( int iwhich = 0; iwhich < 2 ; iwhich++ ) { // Cover both counters status = clSetKernelArg(test_bump,1,sizeof(iwhich),&iwhich); test_error_ret(status,"set arg",status); status = clEnqueueNDRangeKernel(queue,test_bump,1,0,&one,0,0,0,0); test_error_ret(status,"enqueue test_bump",status); status = clFinish(queue); test_error_ret(status,"finish",status); cl_uchar *counter_value_ptr = (cl_uchar *)clEnqueueMapBuffer(queue, counter_value_mem, CL_TRUE, CL_MAP_READ, 0, sizeof(counter_value), 0, 0, 0, 0); if ( counter_value != expected_counter[iwhich] ) { log_error("Error: Round %d on counter %d: Expected %d but got %d\n", iround, iwhich, expected_counter[iwhich], counter_value ); err |= 1; } expected_counter[iwhich]++; // Emulate behaviour of the kernel. clEnqueueUnmapMemObject(queue, counter_value_mem, counter_value_ptr, 0, 0, 0); } } if ( CL_SUCCESS == err ) { log_info("OK\n"); FLUSH; } return err; }