// // Copyright (c) 2017 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include "crc32.h" #include "kernelHelpers.h" #include "deviceInfo.h" #include "errorHelpers.h" #include "imageHelpers.h" #include "typeWrappers.h" #include "testHarness.h" #include "parseParameters.h" #include <cassert> #include <vector> #include <string> #include <fstream> #include <sstream> #include <iomanip> #include <mutex> #include <algorithm> #if defined(_WIN32) std::string slash = "\\"; #else std::string slash = "/"; #endif static std::mutex gCompilerMutex; static cl_int get_first_device_id(const cl_context context, cl_device_id &device); long get_file_size(const std::string &fileName) { std::ifstream ifs(fileName.c_str(), std::ios::binary); if (!ifs.good()) return 0; // get length of file: ifs.seekg(0, std::ios::end); std::ios::pos_type length = ifs.tellg(); return static_cast<long>(length); } static std::string get_kernel_content(unsigned int numKernelLines, const char *const *kernelProgram) { std::string kernel; for (size_t i = 0; i < numKernelLines; ++i) { std::string chunk(kernelProgram[i], 0, std::string::npos); kernel += chunk; } return kernel; } std::string get_kernel_name(const std::string &source) { // Create list of kernel names std::string kernelsList; size_t kPos = source.find("kernel"); while (kPos != std::string::npos) { // check for '__kernel' size_t pos = kPos; if (pos >= 2 && source[pos - 1] == '_' && source[pos - 2] == '_') pos -= 2; // check character before 'kernel' (white space expected) size_t wsPos = source.find_last_of(" \t\r\n", pos); if (wsPos == std::string::npos || wsPos + 1 == pos) { // check character after 'kernel' (white space expected) size_t akPos = kPos + sizeof("kernel") - 1; wsPos = source.find_first_of(" \t\r\n", akPos); if (!(wsPos == akPos)) { kPos = source.find("kernel", kPos + 1); continue; } bool attributeFound; do { attributeFound = false; // find '(' after kernel name name size_t pPos = source.find("(", akPos); if (!(pPos != std::string::npos)) continue; // check for not empty kernel name before '(' pos = source.find_last_not_of(" \t\r\n", pPos - 1); if (!(pos != std::string::npos && pos > akPos)) continue; // find character before kernel name wsPos = source.find_last_of(" \t\r\n", pos); if (!(wsPos != std::string::npos && wsPos >= akPos)) continue; std::string name = source.substr(wsPos + 1, pos + 1 - (wsPos + 1)); // check for kernel attribute if (name == "__attribute__") { attributeFound = true; int pCount = 1; akPos = pPos + 1; while (pCount > 0 && akPos != std::string::npos) { akPos = source.find_first_of("()", akPos + 1); if (akPos != std::string::npos) { if (source[akPos] == '(') pCount++; else pCount--; } } } else { kernelsList += name + "."; } } while (attributeFound); } kPos = source.find("kernel", kPos + 1); } std::ostringstream oss; if (MAX_LEN_FOR_KERNEL_LIST > 0) { if (kernelsList.size() > MAX_LEN_FOR_KERNEL_LIST + 1) { kernelsList = kernelsList.substr(0, MAX_LEN_FOR_KERNEL_LIST + 1); kernelsList[kernelsList.size() - 1] = '.'; kernelsList[kernelsList.size() - 1] = '.'; } oss << kernelsList; } return oss.str(); } static std::string get_offline_compilation_file_type_str(const CompilationMode compilationMode) { switch (compilationMode) { default: assert(0 && "Invalid compilation mode"); abort(); case kOnline: assert(0 && "Invalid compilation mode for offline compilation"); abort(); case kBinary: return "binary"; case kSpir_v: return "SPIR-V"; } } static std::string get_unique_filename_prefix(unsigned int numKernelLines, const char *const *kernelProgram, const char *buildOptions) { std::string kernel = get_kernel_content(numKernelLines, kernelProgram); std::string kernelName = get_kernel_name(kernel); cl_uint kernelCrc = crc32(kernel.data(), kernel.size()); std::ostringstream oss; oss << kernelName << std::hex << std::setfill('0') << std::setw(8) << kernelCrc; if (buildOptions) { cl_uint bOptionsCrc = crc32(buildOptions, strlen(buildOptions)); oss << '.' << std::hex << std::setfill('0') << std::setw(8) << bOptionsCrc; } return oss.str(); } static std::string get_cl_build_options_filename_with_path(const std::string &filePath, const std::string &fileNamePrefix) { return filePath + slash + fileNamePrefix + ".options"; } static std::string get_cl_source_filename_with_path(const std::string &filePath, const std::string &fileNamePrefix) { return filePath + slash + fileNamePrefix + ".cl"; } static std::string get_binary_filename_with_path(CompilationMode mode, cl_uint deviceAddrSpaceSize, const std::string &filePath, const std::string &fileNamePrefix) { std::string binaryFilename = filePath + slash + fileNamePrefix; if (kSpir_v == mode) { std::ostringstream extension; extension << ".spv" << deviceAddrSpaceSize; binaryFilename += extension.str(); } return binaryFilename; } static bool file_exist_on_disk(const std::string &filePath, const std::string &fileName) { std::string fileNameWithPath = filePath + slash + fileName; bool exist = false; std::ifstream ifs; ifs.open(fileNameWithPath.c_str(), std::ios::binary); if (ifs.good()) exist = true; ifs.close(); return exist; } static bool should_save_kernel_source_to_disk(CompilationMode mode, CompilationCacheMode cacheMode, const std::string &binaryPath, const std::string &binaryName) { bool saveToDisk = false; if (cacheMode == kCacheModeDumpCl || (cacheMode == kCacheModeOverwrite && mode != kOnline)) { saveToDisk = true; } if (cacheMode == kCacheModeCompileIfAbsent && mode != kOnline) { saveToDisk = !file_exist_on_disk(binaryPath, binaryName); } return saveToDisk; } static int save_kernel_build_options_to_disk(const std::string &path, const std::string &prefix, const char *buildOptions) { std::string filename = get_cl_build_options_filename_with_path(path, prefix); std::ofstream ofs(filename.c_str(), std::ios::binary); if (!ofs.good()) { log_info("Can't save kernel build options: %s\n", filename.c_str()); return -1; } ofs.write(buildOptions, strlen(buildOptions)); ofs.close(); log_info("Saved kernel build options to file: %s\n", filename.c_str()); return CL_SUCCESS; } static int save_kernel_source_to_disk(const std::string &path, const std::string &prefix, const std::string &source) { std::string filename = get_cl_source_filename_with_path(path, prefix); std::ofstream ofs(filename.c_str(), std::ios::binary); if (!ofs.good()) { log_info("Can't save kernel source: %s\n", filename.c_str()); return -1; } ofs.write(source.c_str(), source.size()); ofs.close(); log_info("Saved kernel source to file: %s\n", filename.c_str()); return CL_SUCCESS; } static int save_kernel_source_and_options_to_disk(unsigned int numKernelLines, const char *const *kernelProgram, const char *buildOptions) { int error; std::string kernel = get_kernel_content(numKernelLines, kernelProgram); std::string kernelNamePrefix = get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions); // save kernel source to disk error = save_kernel_source_to_disk(gCompilationCachePath, kernelNamePrefix, kernel); // save kernel build options to disk if exists if (buildOptions != NULL) error |= save_kernel_build_options_to_disk( gCompilationCachePath, kernelNamePrefix, buildOptions); return error; } static std::string get_compilation_mode_str(const CompilationMode compilationMode) { switch (compilationMode) { default: assert(0 && "Invalid compilation mode"); abort(); case kOnline: return "online"; case kBinary: return "binary"; case kSpir_v: return "spir-v"; } } static cl_int get_cl_device_info_str(const cl_device_id device, const cl_uint device_address_space_size, const CompilationMode compilationMode, std::string &clDeviceInfo) { std::string extensionsString = get_device_extensions_string(device); std::string versionString = get_device_version_string(device); std::ostringstream clDeviceInfoStream; std::string file_type = get_offline_compilation_file_type_str(compilationMode); clDeviceInfoStream << "# OpenCL device info affecting " << file_type << " offline compilation:" << std::endl << "CL_DEVICE_ADDRESS_BITS=" << device_address_space_size << std::endl << "CL_DEVICE_EXTENSIONS=\"" << extensionsString << "\"" << std::endl; /* We only need the device's supported IL version(s) when compiling IL * that will be loaded with clCreateProgramWithIL() */ if (compilationMode == kSpir_v) { std::string ilVersionString = get_device_il_version_string(device); clDeviceInfoStream << "CL_DEVICE_IL_VERSION=\"" << ilVersionString << "\"" << std::endl; } clDeviceInfoStream << "CL_DEVICE_VERSION=\"" << versionString << "\"" << std::endl; clDeviceInfoStream << "CL_DEVICE_IMAGE_SUPPORT=" << (0 == checkForImageSupport(device)) << std::endl; clDeviceInfoStream << "CL_DEVICE_NAME=\"" << get_device_name(device).c_str() << "\"" << std::endl; clDeviceInfo = clDeviceInfoStream.str(); return CL_SUCCESS; } static int write_cl_device_info(const cl_device_id device, const cl_uint device_address_space_size, const CompilationMode compilationMode, std::string &clDeviceInfoFilename) { std::string clDeviceInfo; int error = get_cl_device_info_str(device, device_address_space_size, compilationMode, clDeviceInfo); if (error != CL_SUCCESS) { return error; } cl_uint crc = crc32(clDeviceInfo.data(), clDeviceInfo.size()); /* Get the filename for the clDeviceInfo file. * Note: the file includes the hash on its content, so it is usually * unnecessary to delete it. */ std::ostringstream clDeviceInfoFilenameStream; clDeviceInfoFilenameStream << gCompilationCachePath << slash << "clDeviceInfo-"; clDeviceInfoFilenameStream << std::hex << std::setfill('0') << std::setw(8) << crc << ".txt"; clDeviceInfoFilename = clDeviceInfoFilenameStream.str(); if ((size_t)get_file_size(clDeviceInfoFilename) == clDeviceInfo.size()) { /* The CL device info file has already been created. * Nothing to do. */ return 0; } /* The file does not exist or its length is not as expected. * Create/overwrite it. */ std::ofstream ofs(clDeviceInfoFilename); if (!ofs.good()) { log_info("OfflineCompiler: can't create CL device info file: %s\n", clDeviceInfoFilename.c_str()); return -1; } ofs << clDeviceInfo; ofs.close(); return CL_SUCCESS; } static std::string get_offline_compilation_command( const cl_uint device_address_space_size, const CompilationMode compilationMode, const std::string &bOptions, const std::string &sourceFilename, const std::string &outputFilename, const std::string &clDeviceInfoFilename) { std::ostringstream wrapperOptions; wrapperOptions << gCompilationProgram << " --mode=" << get_compilation_mode_str(compilationMode) << " --source=" << sourceFilename << " --output=" << outputFilename << " --cl-device-info=" << clDeviceInfoFilename; if (bOptions != "") { // Add build options passed to this function wrapperOptions << " -- " << bOptions; } return wrapperOptions.str(); } static int invoke_offline_compiler(const cl_device_id device, const cl_uint device_address_space_size, const CompilationMode compilationMode, const std::string &bOptions, const std::string &sourceFilename, const std::string &outputFilename) { std::string runString; std::string clDeviceInfoFilename; // See cl_offline_compiler-interface.txt for a description of the // format of the CL device information file generated below, and // the internal command line interface for invoking the offline // compiler. cl_int err = write_cl_device_info(device, device_address_space_size, compilationMode, clDeviceInfoFilename); if (err != CL_SUCCESS) { log_error("Failed writing CL device info file\n"); return err; } runString = get_offline_compilation_command( device_address_space_size, compilationMode, bOptions, sourceFilename, outputFilename, clDeviceInfoFilename); // execute script log_info("Executing command: %s\n", runString.c_str()); fflush(stdout); int returnCode = system(runString.c_str()); if (returnCode != 0) { log_error("ERROR: Command finished with error: 0x%x\n", returnCode); return CL_COMPILE_PROGRAM_FAILURE; } return CL_SUCCESS; } static cl_int get_first_device_id(const cl_context context, cl_device_id &device) { cl_uint numDevices = 0; cl_int error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES, sizeof(cl_uint), &numDevices, NULL); test_error(error, "clGetContextInfo failed getting CL_CONTEXT_NUM_DEVICES"); if (numDevices == 0) { log_error("ERROR: No CL devices found\n"); return -1; } std::vector<cl_device_id> devices(numDevices, 0); error = clGetContextInfo(context, CL_CONTEXT_DEVICES, numDevices * sizeof(cl_device_id), &devices[0], NULL); test_error(error, "clGetContextInfo failed getting CL_CONTEXT_DEVICES"); device = devices[0]; return CL_SUCCESS; } static cl_int get_device_address_bits(const cl_device_id device, cl_uint &device_address_space_size) { cl_int error = clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint), &device_address_space_size, NULL); test_error(error, "Unable to obtain device address bits"); if (device_address_space_size != 32 && device_address_space_size != 64) { log_error("ERROR: Unexpected number of device address bits: %u\n", device_address_space_size); return -1; } return CL_SUCCESS; } static int get_offline_compiler_output( std::ifstream &ifs, const cl_device_id device, cl_uint deviceAddrSpaceSize, const CompilationMode compilationMode, const std::string &bOptions, const std::string &kernelPath, const std::string &kernelNamePrefix) { std::string sourceFilename = get_cl_source_filename_with_path(kernelPath, kernelNamePrefix); std::string outputFilename = get_binary_filename_with_path( compilationMode, deviceAddrSpaceSize, kernelPath, kernelNamePrefix); ifs.open(outputFilename.c_str(), std::ios::binary); if (!ifs.good()) { std::string file_type = get_offline_compilation_file_type_str(compilationMode); if (gCompilationCacheMode == kCacheModeForceRead) { log_info("OfflineCompiler: can't open cached %s file: %s\n", file_type.c_str(), outputFilename.c_str()); return -1; } else { int error = invoke_offline_compiler(device, deviceAddrSpaceSize, compilationMode, bOptions, sourceFilename, outputFilename); if (error != CL_SUCCESS) return error; // read output file ifs.open(outputFilename.c_str(), std::ios::binary); if (!ifs.good()) { log_info("OfflineCompiler: can't read generated %s file: %s\n", file_type.c_str(), outputFilename.c_str()); return -1; } } } return CL_SUCCESS; } static int create_single_kernel_helper_create_program_offline( cl_context context, cl_device_id device, cl_program *outProgram, unsigned int numKernelLines, const char *const *kernelProgram, const char *buildOptions, CompilationMode compilationMode) { if (kCacheModeDumpCl == gCompilationCacheMode) { return -1; } // Get device CL_DEVICE_ADDRESS_BITS int error; cl_uint device_address_space_size = 0; if (device == NULL) { error = get_first_device_id(context, device); test_error(error, "Failed to get device ID for first device"); } error = get_device_address_bits(device, device_address_space_size); if (error != CL_SUCCESS) return error; // set build options std::string bOptions; bOptions += buildOptions ? std::string(buildOptions) : ""; std::string kernelName = get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions); std::ifstream ifs; error = get_offline_compiler_output(ifs, device, device_address_space_size, compilationMode, bOptions, gCompilationCachePath, kernelName); if (error != CL_SUCCESS) return error; ifs.seekg(0, ifs.end); int length = ifs.tellg(); ifs.seekg(0, ifs.beg); // treat modifiedProgram as input for clCreateProgramWithBinary if (compilationMode == kBinary) { // read binary from file: std::vector<unsigned char> modifiedKernelBuf(length); ifs.read((char *)&modifiedKernelBuf[0], length); ifs.close(); size_t lengths = modifiedKernelBuf.size(); const unsigned char *binaries = { &modifiedKernelBuf[0] }; log_info("offlineCompiler: clCreateProgramWithSource replaced with " "clCreateProgramWithBinary\n"); *outProgram = clCreateProgramWithBinary(context, 1, &device, &lengths, &binaries, NULL, &error); if (*outProgram == NULL || error != CL_SUCCESS) { print_error(error, "clCreateProgramWithBinary failed"); return error; } } // treat modifiedProgram as input for clCreateProgramWithIL else if (compilationMode == kSpir_v) { // read spir-v from file: std::vector<unsigned char> modifiedKernelBuf(length); ifs.read((char *)&modifiedKernelBuf[0], length); ifs.close(); size_t length = modifiedKernelBuf.size(); log_info("offlineCompiler: clCreateProgramWithSource replaced with " "clCreateProgramWithIL\n"); if (gCoreILProgram) { *outProgram = clCreateProgramWithIL(context, &modifiedKernelBuf[0], length, &error); } else { cl_platform_id platform; error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(cl_platform_id), &platform, NULL); test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed"); clCreateProgramWithILKHR_fn clCreateProgramWithILKHR = NULL; clCreateProgramWithILKHR = (clCreateProgramWithILKHR_fn) clGetExtensionFunctionAddressForPlatform( platform, "clCreateProgramWithILKHR"); if (clCreateProgramWithILKHR == NULL) { log_error( "ERROR: clGetExtensionFunctionAddressForPlatform failed\n"); return -1; } *outProgram = clCreateProgramWithILKHR( context, &modifiedKernelBuf[0], length, &error); } if (*outProgram == NULL || error != CL_SUCCESS) { if (gCoreILProgram) { print_error(error, "clCreateProgramWithIL failed"); } else { print_error(error, "clCreateProgramWithILKHR failed"); } return error; } } return CL_SUCCESS; } static int create_single_kernel_helper_create_program( cl_context context, cl_device_id device, cl_program *outProgram, unsigned int numKernelLines, const char **kernelProgram, const char *buildOptions, CompilationMode compilationMode) { std::lock_guard<std::mutex> compiler_lock(gCompilerMutex); std::string filePrefix = get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions); bool shouldSaveToDisk = should_save_kernel_source_to_disk( compilationMode, gCompilationCacheMode, gCompilationCachePath, filePrefix); if (shouldSaveToDisk) { if (CL_SUCCESS != save_kernel_source_and_options_to_disk( numKernelLines, kernelProgram, buildOptions)) { log_error("Unable to dump kernel source to disk"); return -1; } } if (compilationMode == kOnline) { int error = CL_SUCCESS; /* Create the program object from source */ *outProgram = clCreateProgramWithSource(context, numKernelLines, kernelProgram, NULL, &error); if (*outProgram == NULL || error != CL_SUCCESS) { print_error(error, "clCreateProgramWithSource failed"); return error; } return CL_SUCCESS; } else { return create_single_kernel_helper_create_program_offline( context, device, outProgram, numKernelLines, kernelProgram, buildOptions, compilationMode); } } int create_single_kernel_helper_create_program(cl_context context, cl_program *outProgram, unsigned int numKernelLines, const char **kernelProgram, const char *buildOptions) { return create_single_kernel_helper_create_program( context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions, gCompilationMode); } int create_single_kernel_helper_create_program_for_device( cl_context context, cl_device_id device, cl_program *outProgram, unsigned int numKernelLines, const char **kernelProgram, const char *buildOptions) { return create_single_kernel_helper_create_program( context, device, outProgram, numKernelLines, kernelProgram, buildOptions, gCompilationMode); } int create_single_kernel_helper_with_build_options( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName, const char *buildOptions) { return create_single_kernel_helper(context, outProgram, outKernel, numKernelLines, kernelProgram, kernelName, buildOptions); } // Creates and builds OpenCL C/C++ program, and creates a kernel int create_single_kernel_helper(cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName, const char *buildOptions) { // For the logic that automatically adds -cl-std it is much cleaner if the // build options have RAII. This buffer will store the potentially updated // build options, in which case buildOptions will point at the string owned // by this buffer. std::string build_options_internal{ buildOptions ? buildOptions : "" }; // Check the build options for the -cl-std option. if (!buildOptions || !strstr(buildOptions, "-cl-std")) { // If the build option isn't present add it using the latest OpenCL-C // version supported by the device. This allows calling code to force a // particular CL C version if it is required, but also means that // callers need not specify a version if they want to assume the most // recent CL C. auto version = get_max_OpenCL_C_for_context(context); std::string cl_std{}; if (version >= Version(3, 0)) { cl_std = "-cl-std=CL3.0"; } else if (version >= Version(2, 0) && version < Version(3, 0)) { cl_std = "-cl-std=CL2.0"; } else { // If the -cl-std build option is not specified, the highest OpenCL // C 1.x language version supported by each device is used when // compiling the program for each device. cl_std = ""; } build_options_internal += ' '; build_options_internal += cl_std; buildOptions = build_options_internal.c_str(); } int error = create_single_kernel_helper_create_program( context, outProgram, numKernelLines, kernelProgram, buildOptions); if (error != CL_SUCCESS) { log_error("Create program failed: %d, line: %d\n", error, __LINE__); return error; } // Remove offline-compiler-only build options std::string newBuildOptions; if (buildOptions != NULL) { newBuildOptions = buildOptions; std::string offlineCompierOptions[] = { "-cl-fp16-enable", "-cl-fp64-enable", "-cl-zero-init-local-mem-vars" }; for (auto &s : offlineCompierOptions) { std::string::size_type i = newBuildOptions.find(s); if (i != std::string::npos) newBuildOptions.erase(i, s.length()); } } // Build program and create kernel return build_program_create_kernel_helper( context, outProgram, outKernel, numKernelLines, kernelProgram, kernelName, newBuildOptions.c_str()); } // Builds OpenCL C/C++ program and creates int build_program_create_kernel_helper( cl_context context, cl_program *outProgram, cl_kernel *outKernel, unsigned int numKernelLines, const char **kernelProgram, const char *kernelName, const char *buildOptions) { int error; /* Compile the program */ int buildProgramFailed = 0; int printedSource = 0; error = clBuildProgram(*outProgram, 0, NULL, buildOptions, NULL, NULL); if (error != CL_SUCCESS) { unsigned int i; print_error(error, "clBuildProgram failed"); buildProgramFailed = 1; printedSource = 1; log_error("Build options: %s\n", buildOptions); log_error("Original source is: ------------\n"); for (i = 0; i < numKernelLines; i++) log_error("%s", kernelProgram[i]); } // Verify the build status on all devices cl_uint deviceCount = 0; error = clGetProgramInfo(*outProgram, CL_PROGRAM_NUM_DEVICES, sizeof(deviceCount), &deviceCount, NULL); if (error != CL_SUCCESS) { print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed"); return error; } if (deviceCount == 0) { log_error("No devices found for program.\n"); return -1; } cl_device_id *devices = (cl_device_id *)malloc(deviceCount * sizeof(cl_device_id)); if (NULL == devices) return -1; BufferOwningPtr<cl_device_id> devicesBuf(devices); memset(devices, 0, deviceCount * sizeof(cl_device_id)); error = clGetProgramInfo(*outProgram, CL_PROGRAM_DEVICES, sizeof(cl_device_id) * deviceCount, devices, NULL); if (error != CL_SUCCESS) { print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed"); return error; } cl_uint z; bool buildFailed = false; for (z = 0; z < deviceCount; z++) { char deviceName[4096] = ""; error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL); if (error != CL_SUCCESS || deviceName[0] == '\0') { log_error("Device \"%d\" failed to return a name\n", z); print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed"); } cl_build_status buildStatus; error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_STATUS, sizeof(buildStatus), &buildStatus, NULL); if (error != CL_SUCCESS) { print_error(error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed"); return error; } if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed && deviceCount == 1) { buildFailed = true; log_error("clBuildProgram returned an error, but buildStatus is " "marked as CL_BUILD_SUCCESS.\n"); } if (buildStatus != CL_BUILD_SUCCESS) { char statusString[64] = ""; if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS) sprintf(statusString, "CL_BUILD_SUCCESS"); else if (buildStatus == (cl_build_status)CL_BUILD_NONE) sprintf(statusString, "CL_BUILD_NONE"); else if (buildStatus == (cl_build_status)CL_BUILD_ERROR) sprintf(statusString, "CL_BUILD_ERROR"); else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS) sprintf(statusString, "CL_BUILD_IN_PROGRESS"); else sprintf(statusString, "UNKNOWN (%d)", buildStatus); if (buildStatus != CL_BUILD_SUCCESS) log_error( "Build not successful for device \"%s\", status: %s\n", deviceName, statusString); size_t paramSize = 0; error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_LOG, 0, NULL, ¶mSize); if (error != CL_SUCCESS) { print_error( error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed"); return error; } std::string log; log.resize(paramSize / sizeof(char)); error = clGetProgramBuildInfo(*outProgram, devices[z], CL_PROGRAM_BUILD_LOG, paramSize, &log[0], NULL); if (error != CL_SUCCESS || log[0] == '\0') { log_error("Device %d (%s) failed to return a build log\n", z, deviceName); if (error) { print_error( error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed"); return error; } else { log_error("clGetProgramBuildInfo returned an empty log.\n"); return -1; } } // In this case we've already printed out the code above. if (!printedSource) { unsigned int i; log_error("Original source is: ------------\n"); for (i = 0; i < numKernelLines; i++) log_error("%s", kernelProgram[i]); printedSource = 1; } log_error("Build log for device \"%s\" is: ------------\n", deviceName); log_error("%s\n", log.c_str()); log_error("\n----------\n"); return -1; } } if (buildFailed) { return -1; } /* And create a kernel from it */ if (kernelName != NULL) { *outKernel = clCreateKernel(*outProgram, kernelName, &error); if (*outKernel == NULL || error != CL_SUCCESS) { print_error(error, "Unable to create kernel"); return error; } } return 0; } int get_max_allowed_work_group_size(cl_context context, cl_kernel kernel, size_t *outMaxSize, size_t *outLimits) { cl_device_id *devices; size_t size, maxCommonSize = 0; int numDevices, i, j, error; cl_uint numDims; size_t outSize; size_t sizeLimit[] = { 1, 1, 1 }; /* Assume fewer than 16 devices will be returned */ error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &outSize); test_error(error, "Unable to obtain list of devices size for context"); devices = (cl_device_id *)malloc(outSize); BufferOwningPtr<cl_device_id> devicesBuf(devices); error = clGetContextInfo(context, CL_CONTEXT_DEVICES, outSize, devices, NULL); test_error(error, "Unable to obtain list of devices for context"); numDevices = (int)(outSize / sizeof(cl_device_id)); for (i = 0; i < numDevices; i++) { error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(size), &size, NULL); test_error(error, "Unable to obtain max work group size for device"); if (size < maxCommonSize || maxCommonSize == 0) maxCommonSize = size; error = clGetKernelWorkGroupInfo(kernel, devices[i], CL_KERNEL_WORK_GROUP_SIZE, sizeof(size), &size, NULL); test_error( error, "Unable to obtain max work group size for device and kernel combo"); if (size < maxCommonSize || maxCommonSize == 0) maxCommonSize = size; error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(numDims), &numDims, NULL); test_error( error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS"); sizeLimit[0] = 1; error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES, numDims * sizeof(size_t), sizeLimit, NULL); test_error(error, "clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES"); if (outLimits != NULL) { if (i == 0) { for (j = 0; j < 3; j++) outLimits[j] = sizeLimit[j]; } else { for (j = 0; j < (int)numDims; j++) { if (sizeLimit[j] < outLimits[j]) outLimits[j] = sizeLimit[j]; } } } } *outMaxSize = (unsigned int)maxCommonSize; return 0; } extern int get_max_allowed_1d_work_group_size_on_device(cl_device_id device, cl_kernel kernel, size_t *outSize) { cl_uint maxDim; size_t maxWgSize; size_t *maxWgSizePerDim; int error; error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof(size_t), &maxWgSize, NULL); test_error(error, "clGetKernelWorkGroupInfo CL_KERNEL_WORK_GROUP_SIZE failed"); error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), &maxDim, NULL); test_error(error, "clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed"); maxWgSizePerDim = (size_t *)malloc(maxDim * sizeof(size_t)); if (!maxWgSizePerDim) { log_error("Unable to allocate maxWgSizePerDim\n"); return -1; } error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES, maxDim * sizeof(size_t), maxWgSizePerDim, NULL); if (error != CL_SUCCESS) { log_error("clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed\n"); free(maxWgSizePerDim); return error; } // "maxWgSize" is limited to that of the first dimension. if (maxWgSize > maxWgSizePerDim[0]) { maxWgSize = maxWgSizePerDim[0]; } free(maxWgSizePerDim); *outSize = maxWgSize; return 0; } int get_max_common_work_group_size(cl_context context, cl_kernel kernel, size_t globalThreadSize, size_t *outMaxSize) { size_t sizeLimit[3]; int error = get_max_allowed_work_group_size(context, kernel, outMaxSize, sizeLimit); if (error != 0) return error; /* Now find the largest factor of globalThreadSize that is <= maxCommonSize */ /* Note for speed, we don't need to check the range of maxCommonSize, b/c once it gets to 1, the modulo test will succeed and break the loop anyway */ for (; (globalThreadSize % *outMaxSize) != 0 || (*outMaxSize > sizeLimit[0]); (*outMaxSize)--) ; return 0; } int get_max_common_2D_work_group_size(cl_context context, cl_kernel kernel, size_t *globalThreadSizes, size_t *outMaxSizes) { size_t sizeLimit[3]; size_t maxSize; int error = get_max_allowed_work_group_size(context, kernel, &maxSize, sizeLimit); if (error != 0) return error; /* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global sizes */ /* Simple case */ if (globalThreadSizes[0] * globalThreadSizes[1] <= maxSize) { if (globalThreadSizes[0] <= sizeLimit[0] && globalThreadSizes[1] <= sizeLimit[1]) { outMaxSizes[0] = globalThreadSizes[0]; outMaxSizes[1] = globalThreadSizes[1]; return 0; } } size_t remainingSize, sizeForThisOne; remainingSize = maxSize; int i, j; for (i = 0; i < 2; i++) { if (globalThreadSizes[i] > remainingSize) sizeForThisOne = remainingSize; else sizeForThisOne = globalThreadSizes[i]; for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ; outMaxSizes[i] = sizeForThisOne; remainingSize = maxSize; for (j = 0; j <= i; j++) remainingSize /= outMaxSizes[j]; } return 0; } int get_max_common_3D_work_group_size(cl_context context, cl_kernel kernel, size_t *globalThreadSizes, size_t *outMaxSizes) { size_t sizeLimit[3]; size_t maxSize; int error = get_max_allowed_work_group_size(context, kernel, &maxSize, sizeLimit); if (error != 0) return error; /* Now find a set of factors, multiplied together less than maxSize, but each a factor of the global sizes */ /* Simple case */ if (globalThreadSizes[0] * globalThreadSizes[1] * globalThreadSizes[2] <= maxSize) { if (globalThreadSizes[0] <= sizeLimit[0] && globalThreadSizes[1] <= sizeLimit[1] && globalThreadSizes[2] <= sizeLimit[2]) { outMaxSizes[0] = globalThreadSizes[0]; outMaxSizes[1] = globalThreadSizes[1]; outMaxSizes[2] = globalThreadSizes[2]; return 0; } } size_t remainingSize, sizeForThisOne; remainingSize = maxSize; int i, j; for (i = 0; i < 3; i++) { if (globalThreadSizes[i] > remainingSize) sizeForThisOne = remainingSize; else sizeForThisOne = globalThreadSizes[i]; for (; (globalThreadSizes[i] % sizeForThisOne) != 0 || (sizeForThisOne > sizeLimit[i]); sizeForThisOne--) ; outMaxSizes[i] = sizeForThisOne; remainingSize = maxSize; for (j = 0; j <= i; j++) remainingSize /= outMaxSizes[j]; } return 0; } /* Helper to determine if a device supports an image format */ int is_image_format_supported(cl_context context, cl_mem_flags flags, cl_mem_object_type image_type, const cl_image_format *fmt) { cl_image_format *list; cl_uint count = 0; cl_int err = clGetSupportedImageFormats(context, flags, image_type, 128, NULL, &count); if (count == 0) return 0; list = (cl_image_format *)malloc(count * sizeof(cl_image_format)); if (NULL == list) { log_error("Error: unable to allocate %ld byte buffer for image format " "list at %s:%d (err = %d)\n", count * sizeof(cl_image_format), __FILE__, __LINE__, err); return 0; } BufferOwningPtr<cl_image_format> listBuf(list); cl_int error = clGetSupportedImageFormats(context, flags, image_type, count, list, NULL); if (error) { log_error("Error: failed to obtain supported image type list at %s:%d " "(err = %d)\n", __FILE__, __LINE__, err); return 0; } // iterate looking for a match. cl_uint i; for (i = 0; i < count; i++) { if (fmt->image_channel_data_type == list[i].image_channel_data_type && fmt->image_channel_order == list[i].image_channel_order) break; } return (i < count) ? 1 : 0; } size_t get_pixel_bytes(const cl_image_format *fmt); size_t get_pixel_bytes(const cl_image_format *fmt) { size_t chanCount; switch (fmt->image_channel_order) { case CL_R: case CL_A: case CL_Rx: case CL_INTENSITY: case CL_LUMINANCE: case CL_DEPTH: chanCount = 1; break; case CL_RG: case CL_RA: case CL_RGx: chanCount = 2; break; case CL_RGB: case CL_RGBx: case CL_sRGB: case CL_sRGBx: chanCount = 3; break; case CL_RGBA: case CL_ARGB: case CL_BGRA: case CL_sBGRA: case CL_sRGBA: #ifdef CL_1RGB_APPLE case CL_1RGB_APPLE: #endif #ifdef CL_BGR1_APPLE case CL_BGR1_APPLE: #endif chanCount = 4; break; default: log_error("Unknown channel order at %s:%d!\n", __FILE__, __LINE__); abort(); break; } switch (fmt->image_channel_data_type) { case CL_UNORM_SHORT_565: case CL_UNORM_SHORT_555: return 2; case CL_UNORM_INT_101010: return 4; case CL_SNORM_INT8: case CL_UNORM_INT8: case CL_SIGNED_INT8: case CL_UNSIGNED_INT8: return chanCount; case CL_SNORM_INT16: case CL_UNORM_INT16: case CL_HALF_FLOAT: case CL_SIGNED_INT16: case CL_UNSIGNED_INT16: #ifdef CL_SFIXED14_APPLE case CL_SFIXED14_APPLE: #endif return chanCount * 2; case CL_SIGNED_INT32: case CL_UNSIGNED_INT32: case CL_FLOAT: return chanCount * 4; default: log_error("Unknown channel data type at %s:%d!\n", __FILE__, __LINE__); abort(); } return 0; } test_status verifyImageSupport(cl_device_id device) { int result = checkForImageSupport(device); if (result == 0) { return TEST_PASS; } if (result == CL_IMAGE_FORMAT_NOT_SUPPORTED) { log_error("SKIPPED: Device does not supported images as required by " "this test!\n"); return TEST_SKIP; } return TEST_FAIL; } int checkForImageSupport(cl_device_id device) { cl_uint i; int error; /* Check the device props to see if images are supported at all first */ error = clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(i), &i, NULL); test_error(error, "Unable to query device for image support"); if (i == 0) { return CL_IMAGE_FORMAT_NOT_SUPPORTED; } /* So our support is good */ return 0; } int checkFor3DImageSupport(cl_device_id device) { cl_uint i; int error; /* Check the device props to see if images are supported at all first */ error = clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(i), &i, NULL); test_error(error, "Unable to query device for image support"); if (i == 0) { return CL_IMAGE_FORMAT_NOT_SUPPORTED; } char profile[128]; error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL); test_error(error, "Unable to query device for CL_DEVICE_PROFILE"); if (0 == strcmp(profile, "EMBEDDED_PROFILE")) { size_t width = -1L; size_t height = -1L; size_t depth = -1L; error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH, sizeof(width), &width, NULL); test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_WIDTH"); error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT, sizeof(height), &height, NULL); test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_HEIGHT"); error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH, sizeof(depth), &depth, NULL); test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_DEPTH"); if (0 == (height | width | depth)) return CL_IMAGE_FORMAT_NOT_SUPPORTED; } /* So our support is good */ return 0; } int checkForReadWriteImageSupport(cl_device_id device) { if (checkForImageSupport(device)) { return CL_IMAGE_FORMAT_NOT_SUPPORTED; } auto device_cl_version = get_device_cl_version(device); if (device_cl_version >= Version(3, 0)) { // In OpenCL 3.0, Read-Write images are optional. // Check if they are supported. cl_uint are_rw_images_supported{}; test_error( clGetDeviceInfo(device, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, sizeof(are_rw_images_supported), &are_rw_images_supported, nullptr), "clGetDeviceInfo failed for CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS\n"); if (0 == are_rw_images_supported) { log_info("READ_WRITE_IMAGE tests skipped, not supported.\n"); return CL_IMAGE_FORMAT_NOT_SUPPORTED; } } // READ_WRITE images are not supported on 1.X devices. else if (device_cl_version < Version(2, 0)) { log_info("READ_WRITE_IMAGE tests skipped, Opencl 2.0+ is requried."); return CL_IMAGE_FORMAT_NOT_SUPPORTED; } // Support for read-write image arguments is required // for an 2.X device if the device supports images. /* So our support is good */ return 0; } size_t get_min_alignment(cl_context context) { static cl_uint align_size = 0; if (0 == align_size) { cl_device_id *devices; size_t devices_size = 0; cl_uint result = 0; cl_int error; int i; error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &devices_size); test_error_ret(error, "clGetContextInfo failed", 0); devices = (cl_device_id *)malloc(devices_size); if (devices == NULL) { print_error(error, "malloc failed"); return 0; } error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size, (void *)devices, NULL); test_error_ret(error, "clGetContextInfo failed", 0); for (i = 0; i < (int)(devices_size / sizeof(cl_device_id)); i++) { cl_uint alignment = 0; error = clGetDeviceInfo(devices[i], CL_DEVICE_MEM_BASE_ADDR_ALIGN, sizeof(cl_uint), (void *)&alignment, NULL); if (error == CL_SUCCESS) { alignment >>= 3; // convert bits to bytes result = (alignment > result) ? alignment : result; } else print_error(error, "clGetDeviceInfo failed"); } align_size = result; free(devices); } return align_size; } cl_device_fp_config get_default_rounding_mode(cl_device_id device) { char profileStr[128] = ""; cl_device_fp_config single = 0; int error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG, sizeof(single), &single, NULL); if (error) test_error_ret(error, "Unable to get device CL_DEVICE_SINGLE_FP_CONFIG", 0); if (single & CL_FP_ROUND_TO_NEAREST) return CL_FP_ROUND_TO_NEAREST; if (0 == (single & CL_FP_ROUND_TO_ZERO)) test_error_ret(-1, "FAILURE: device must support either " "CL_DEVICE_SINGLE_FP_CONFIG or CL_FP_ROUND_TO_NEAREST", 0); // Make sure we are an embedded device before allowing a pass if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profileStr), &profileStr, NULL))) test_error_ret(error, "FAILURE: Unable to get CL_DEVICE_PROFILE", 0); if (strcmp(profileStr, "EMBEDDED_PROFILE")) test_error_ret(error, "FAILURE: non-EMBEDDED_PROFILE devices must support " "CL_FP_ROUND_TO_NEAREST", 0); return CL_FP_ROUND_TO_ZERO; } int checkDeviceForQueueSupport(cl_device_id device, cl_command_queue_properties prop) { cl_command_queue_properties realProps; cl_int error = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, sizeof(realProps), &realProps, NULL); test_error_ret(error, "FAILURE: Unable to get device queue properties", 0); return (realProps & prop) ? 1 : 0; } int printDeviceHeader(cl_device_id device) { char deviceName[512], deviceVendor[512], deviceVersion[512], cLangVersion[512]; int error; error = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(deviceName), deviceName, NULL); test_error(error, "Unable to get CL_DEVICE_NAME for device"); error = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(deviceVendor), deviceVendor, NULL); test_error(error, "Unable to get CL_DEVICE_VENDOR for device"); error = clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(deviceVersion), deviceVersion, NULL); test_error(error, "Unable to get CL_DEVICE_VERSION for device"); error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, sizeof(cLangVersion), cLangVersion, NULL); test_error(error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device"); log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute " "Device Version = %s%s%s\n", deviceName, deviceVendor, deviceVersion, (error == CL_SUCCESS) ? ", CL C Version = " : "", (error == CL_SUCCESS) ? cLangVersion : ""); auto version = get_device_cl_version(device); if (version >= Version(3, 0)) { auto ctsVersion = get_device_info_string( device, CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED); log_info("Device latest conformance version passed: %s\n", ctsVersion.c_str()); } return CL_SUCCESS; } Version get_device_cl_c_version(cl_device_id device) { auto device_cl_version = get_device_cl_version(device); // The second special case is OpenCL-1.0 where CL_DEVICE_OPENCL_C_VERSION // did not exist, but since this is just the first version we can // return 1.0. if (device_cl_version == Version{ 1, 0 }) { return Version{ 1, 0 }; } // Otherwise we know we have a 1.1 <= device_version <= 2.0 where all CL C // versions are backwards compatible, hence querying with the // CL_DEVICE_OPENCL_C_VERSION query must return the most recent supported // OpenCL C version. size_t opencl_c_version_size_in_bytes{}; auto error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, 0, nullptr, &opencl_c_version_size_in_bytes); test_error_ret(error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_VERSION\n", (Version{ -1, 0 })); std::string opencl_c_version(opencl_c_version_size_in_bytes, '\0'); error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, opencl_c_version.size(), &opencl_c_version[0], nullptr); test_error_ret(error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_VERSION\n", (Version{ -1, 0 })); // Scrape out the major, minor pair from the string. auto major = opencl_c_version[opencl_c_version.find('.') - 1]; auto minor = opencl_c_version[opencl_c_version.find('.') + 1]; return Version{ major - '0', minor - '0' }; } Version get_device_latest_cl_c_version(cl_device_id device) { auto device_cl_version = get_device_cl_version(device); // If the device version >= 3.0 it must support the // CL_DEVICE_OPENCL_C_ALL_VERSIONS query from which we can extract the most // recent CL C version supported by the device. if (device_cl_version >= Version{ 3, 0 }) { size_t opencl_c_all_versions_size_in_bytes{}; auto error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr, &opencl_c_all_versions_size_in_bytes); test_error_ret( error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS", (Version{ -1, 0 })); std::vector<cl_name_version> name_versions( opencl_c_all_versions_size_in_bytes / sizeof(cl_name_version)); error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, opencl_c_all_versions_size_in_bytes, name_versions.data(), nullptr); test_error_ret( error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS", (Version{ -1, 0 })); Version max_supported_cl_c_version{}; for (const auto &name_version : name_versions) { Version current_version{ CL_VERSION_MAJOR(name_version.version), CL_VERSION_MINOR(name_version.version) }; max_supported_cl_c_version = (current_version > max_supported_cl_c_version) ? current_version : max_supported_cl_c_version; } return max_supported_cl_c_version; } return get_device_cl_c_version(device); } Version get_max_OpenCL_C_for_context(cl_context context) { // Get all the devices in the context and find the maximum // universally supported OpenCL C version. size_t devices_size_in_bytes{}; auto error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, nullptr, &devices_size_in_bytes); test_error_ret(error, "clGetDeviceInfo failed for CL_CONTEXT_DEVICES", (Version{ -1, 0 })); std::vector<cl_device_id> devices(devices_size_in_bytes / sizeof(cl_device_id)); error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size_in_bytes, devices.data(), nullptr); auto current_version = get_device_latest_cl_c_version(devices[0]); std::for_each(std::next(devices.begin()), devices.end(), [¤t_version](cl_device_id device) { auto device_version = get_device_latest_cl_c_version(device); // OpenCL 3.0 is not backwards compatible with 2.0. // If we have 3.0 and 2.0 in the same driver we // use 1.2. if (((device_version >= Version(2, 0) && device_version < Version(3, 0)) && current_version >= Version(3, 0)) || (device_version >= Version(3, 0) && (current_version >= Version(2, 0) && current_version < Version(3, 0)))) { current_version = Version(1, 2); } else { current_version = (std::min)(device_version, current_version); } }); return current_version; } bool device_supports_cl_c_version(cl_device_id device, Version version) { auto device_cl_version = get_device_cl_version(device); // In general, a device does not support an OpenCL C version if it is <= // CL_DEVICE_OPENCL_C_VERSION AND it does not appear in the // CL_DEVICE_OPENCL_C_ALL_VERSIONS query. // If the device version >= 3.0 it must support the // CL_DEVICE_OPENCL_C_ALL_VERSIONS query, and the version of OpenCL C being // used must appear in the query result if it's <= // CL_DEVICE_OPENCL_C_VERSION. if (device_cl_version >= Version{ 3, 0 }) { size_t opencl_c_all_versions_size_in_bytes{}; auto error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr, &opencl_c_all_versions_size_in_bytes); test_error_ret( error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS", (false)); std::vector<cl_name_version> name_versions( opencl_c_all_versions_size_in_bytes / sizeof(cl_name_version)); error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, opencl_c_all_versions_size_in_bytes, name_versions.data(), nullptr); test_error_ret( error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS", (false)); for (const auto &name_version : name_versions) { Version current_version{ CL_VERSION_MAJOR(name_version.version), CL_VERSION_MINOR(name_version.version) }; if (current_version == version) { return true; } } } return version <= get_device_cl_c_version(device); } bool poll_until(unsigned timeout_ms, unsigned interval_ms, std::function<bool()> fn) { unsigned time_spent_ms = 0; bool ret = false; while (time_spent_ms < timeout_ms) { ret = fn(); if (ret) { break; } usleep(interval_ms * 1000); time_spent_ms += interval_ms; } return ret; } bool device_supports_double(cl_device_id device) { if (is_extension_available(device, "cl_khr_fp64")) { return true; } else { cl_device_fp_config double_fp_config; cl_int err = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG, sizeof(double_fp_config), &double_fp_config, nullptr); test_error(err, "clGetDeviceInfo for CL_DEVICE_DOUBLE_FP_CONFIG failed"); return double_fp_config != 0; } } bool device_supports_half(cl_device_id device) { return is_extension_available(device, "cl_khr_fp16"); }