You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1782 lines
61 KiB
1782 lines
61 KiB
//
|
|
// Copyright (c) 2017 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#include "crc32.h"
|
|
#include "kernelHelpers.h"
|
|
#include "deviceInfo.h"
|
|
#include "errorHelpers.h"
|
|
#include "imageHelpers.h"
|
|
#include "typeWrappers.h"
|
|
#include "testHarness.h"
|
|
#include "parseParameters.h"
|
|
|
|
#include <cassert>
|
|
#include <vector>
|
|
#include <string>
|
|
#include <fstream>
|
|
#include <sstream>
|
|
#include <iomanip>
|
|
#include <mutex>
|
|
#include <algorithm>
|
|
|
|
#if defined(_WIN32)
|
|
std::string slash = "\\";
|
|
#else
|
|
std::string slash = "/";
|
|
#endif
|
|
|
|
static std::mutex gCompilerMutex;
|
|
|
|
static cl_int get_first_device_id(const cl_context context,
|
|
cl_device_id &device);
|
|
|
|
long get_file_size(const std::string &fileName)
|
|
{
|
|
std::ifstream ifs(fileName.c_str(), std::ios::binary);
|
|
if (!ifs.good()) return 0;
|
|
// get length of file:
|
|
ifs.seekg(0, std::ios::end);
|
|
std::ios::pos_type length = ifs.tellg();
|
|
return static_cast<long>(length);
|
|
}
|
|
|
|
static std::string get_kernel_content(unsigned int numKernelLines,
|
|
const char *const *kernelProgram)
|
|
{
|
|
std::string kernel;
|
|
for (size_t i = 0; i < numKernelLines; ++i)
|
|
{
|
|
std::string chunk(kernelProgram[i], 0, std::string::npos);
|
|
kernel += chunk;
|
|
}
|
|
|
|
return kernel;
|
|
}
|
|
|
|
std::string get_kernel_name(const std::string &source)
|
|
{
|
|
// Create list of kernel names
|
|
std::string kernelsList;
|
|
size_t kPos = source.find("kernel");
|
|
while (kPos != std::string::npos)
|
|
{
|
|
// check for '__kernel'
|
|
size_t pos = kPos;
|
|
if (pos >= 2 && source[pos - 1] == '_' && source[pos - 2] == '_')
|
|
pos -= 2;
|
|
|
|
// check character before 'kernel' (white space expected)
|
|
size_t wsPos = source.find_last_of(" \t\r\n", pos);
|
|
if (wsPos == std::string::npos || wsPos + 1 == pos)
|
|
{
|
|
// check character after 'kernel' (white space expected)
|
|
size_t akPos = kPos + sizeof("kernel") - 1;
|
|
wsPos = source.find_first_of(" \t\r\n", akPos);
|
|
if (!(wsPos == akPos))
|
|
{
|
|
kPos = source.find("kernel", kPos + 1);
|
|
continue;
|
|
}
|
|
|
|
bool attributeFound;
|
|
do
|
|
{
|
|
attributeFound = false;
|
|
// find '(' after kernel name name
|
|
size_t pPos = source.find("(", akPos);
|
|
if (!(pPos != std::string::npos)) continue;
|
|
|
|
// check for not empty kernel name before '('
|
|
pos = source.find_last_not_of(" \t\r\n", pPos - 1);
|
|
if (!(pos != std::string::npos && pos > akPos)) continue;
|
|
|
|
// find character before kernel name
|
|
wsPos = source.find_last_of(" \t\r\n", pos);
|
|
if (!(wsPos != std::string::npos && wsPos >= akPos)) continue;
|
|
|
|
std::string name =
|
|
source.substr(wsPos + 1, pos + 1 - (wsPos + 1));
|
|
// check for kernel attribute
|
|
if (name == "__attribute__")
|
|
{
|
|
attributeFound = true;
|
|
int pCount = 1;
|
|
akPos = pPos + 1;
|
|
while (pCount > 0 && akPos != std::string::npos)
|
|
{
|
|
akPos = source.find_first_of("()", akPos + 1);
|
|
if (akPos != std::string::npos)
|
|
{
|
|
if (source[akPos] == '(')
|
|
pCount++;
|
|
else
|
|
pCount--;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
kernelsList += name + ".";
|
|
}
|
|
} while (attributeFound);
|
|
}
|
|
kPos = source.find("kernel", kPos + 1);
|
|
}
|
|
std::ostringstream oss;
|
|
if (MAX_LEN_FOR_KERNEL_LIST > 0)
|
|
{
|
|
if (kernelsList.size() > MAX_LEN_FOR_KERNEL_LIST + 1)
|
|
{
|
|
kernelsList = kernelsList.substr(0, MAX_LEN_FOR_KERNEL_LIST + 1);
|
|
kernelsList[kernelsList.size() - 1] = '.';
|
|
kernelsList[kernelsList.size() - 1] = '.';
|
|
}
|
|
oss << kernelsList;
|
|
}
|
|
return oss.str();
|
|
}
|
|
|
|
static std::string
|
|
get_offline_compilation_file_type_str(const CompilationMode compilationMode)
|
|
{
|
|
switch (compilationMode)
|
|
{
|
|
default: assert(0 && "Invalid compilation mode"); abort();
|
|
case kOnline:
|
|
assert(0 && "Invalid compilation mode for offline compilation");
|
|
abort();
|
|
case kBinary: return "binary";
|
|
case kSpir_v: return "SPIR-V";
|
|
}
|
|
}
|
|
|
|
static std::string get_unique_filename_prefix(unsigned int numKernelLines,
|
|
const char *const *kernelProgram,
|
|
const char *buildOptions)
|
|
{
|
|
std::string kernel = get_kernel_content(numKernelLines, kernelProgram);
|
|
std::string kernelName = get_kernel_name(kernel);
|
|
cl_uint kernelCrc = crc32(kernel.data(), kernel.size());
|
|
std::ostringstream oss;
|
|
oss << kernelName << std::hex << std::setfill('0') << std::setw(8)
|
|
<< kernelCrc;
|
|
if (buildOptions)
|
|
{
|
|
cl_uint bOptionsCrc = crc32(buildOptions, strlen(buildOptions));
|
|
oss << '.' << std::hex << std::setfill('0') << std::setw(8)
|
|
<< bOptionsCrc;
|
|
}
|
|
return oss.str();
|
|
}
|
|
|
|
|
|
static std::string
|
|
get_cl_build_options_filename_with_path(const std::string &filePath,
|
|
const std::string &fileNamePrefix)
|
|
{
|
|
return filePath + slash + fileNamePrefix + ".options";
|
|
}
|
|
|
|
static std::string
|
|
get_cl_source_filename_with_path(const std::string &filePath,
|
|
const std::string &fileNamePrefix)
|
|
{
|
|
return filePath + slash + fileNamePrefix + ".cl";
|
|
}
|
|
|
|
static std::string
|
|
get_binary_filename_with_path(CompilationMode mode, cl_uint deviceAddrSpaceSize,
|
|
const std::string &filePath,
|
|
const std::string &fileNamePrefix)
|
|
{
|
|
std::string binaryFilename = filePath + slash + fileNamePrefix;
|
|
if (kSpir_v == mode)
|
|
{
|
|
std::ostringstream extension;
|
|
extension << ".spv" << deviceAddrSpaceSize;
|
|
binaryFilename += extension.str();
|
|
}
|
|
return binaryFilename;
|
|
}
|
|
|
|
static bool file_exist_on_disk(const std::string &filePath,
|
|
const std::string &fileName)
|
|
{
|
|
std::string fileNameWithPath = filePath + slash + fileName;
|
|
bool exist = false;
|
|
std::ifstream ifs;
|
|
|
|
ifs.open(fileNameWithPath.c_str(), std::ios::binary);
|
|
if (ifs.good()) exist = true;
|
|
ifs.close();
|
|
return exist;
|
|
}
|
|
|
|
static bool should_save_kernel_source_to_disk(CompilationMode mode,
|
|
CompilationCacheMode cacheMode,
|
|
const std::string &binaryPath,
|
|
const std::string &binaryName)
|
|
{
|
|
bool saveToDisk = false;
|
|
if (cacheMode == kCacheModeDumpCl
|
|
|| (cacheMode == kCacheModeOverwrite && mode != kOnline))
|
|
{
|
|
saveToDisk = true;
|
|
}
|
|
if (cacheMode == kCacheModeCompileIfAbsent && mode != kOnline)
|
|
{
|
|
saveToDisk = !file_exist_on_disk(binaryPath, binaryName);
|
|
}
|
|
return saveToDisk;
|
|
}
|
|
|
|
static int save_kernel_build_options_to_disk(const std::string &path,
|
|
const std::string &prefix,
|
|
const char *buildOptions)
|
|
{
|
|
std::string filename =
|
|
get_cl_build_options_filename_with_path(path, prefix);
|
|
std::ofstream ofs(filename.c_str(), std::ios::binary);
|
|
if (!ofs.good())
|
|
{
|
|
log_info("Can't save kernel build options: %s\n", filename.c_str());
|
|
return -1;
|
|
}
|
|
ofs.write(buildOptions, strlen(buildOptions));
|
|
ofs.close();
|
|
log_info("Saved kernel build options to file: %s\n", filename.c_str());
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
static int save_kernel_source_to_disk(const std::string &path,
|
|
const std::string &prefix,
|
|
const std::string &source)
|
|
{
|
|
std::string filename = get_cl_source_filename_with_path(path, prefix);
|
|
std::ofstream ofs(filename.c_str(), std::ios::binary);
|
|
if (!ofs.good())
|
|
{
|
|
log_info("Can't save kernel source: %s\n", filename.c_str());
|
|
return -1;
|
|
}
|
|
ofs.write(source.c_str(), source.size());
|
|
ofs.close();
|
|
log_info("Saved kernel source to file: %s\n", filename.c_str());
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
static int
|
|
save_kernel_source_and_options_to_disk(unsigned int numKernelLines,
|
|
const char *const *kernelProgram,
|
|
const char *buildOptions)
|
|
{
|
|
int error;
|
|
|
|
std::string kernel = get_kernel_content(numKernelLines, kernelProgram);
|
|
std::string kernelNamePrefix =
|
|
get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
|
|
|
|
// save kernel source to disk
|
|
error = save_kernel_source_to_disk(gCompilationCachePath, kernelNamePrefix,
|
|
kernel);
|
|
|
|
// save kernel build options to disk if exists
|
|
if (buildOptions != NULL)
|
|
error |= save_kernel_build_options_to_disk(
|
|
gCompilationCachePath, kernelNamePrefix, buildOptions);
|
|
|
|
return error;
|
|
}
|
|
|
|
static std::string
|
|
get_compilation_mode_str(const CompilationMode compilationMode)
|
|
{
|
|
switch (compilationMode)
|
|
{
|
|
default: assert(0 && "Invalid compilation mode"); abort();
|
|
case kOnline: return "online";
|
|
case kBinary: return "binary";
|
|
case kSpir_v: return "spir-v";
|
|
}
|
|
}
|
|
|
|
static cl_int get_cl_device_info_str(const cl_device_id device,
|
|
const cl_uint device_address_space_size,
|
|
const CompilationMode compilationMode,
|
|
std::string &clDeviceInfo)
|
|
{
|
|
std::string extensionsString = get_device_extensions_string(device);
|
|
std::string versionString = get_device_version_string(device);
|
|
|
|
std::ostringstream clDeviceInfoStream;
|
|
std::string file_type =
|
|
get_offline_compilation_file_type_str(compilationMode);
|
|
clDeviceInfoStream << "# OpenCL device info affecting " << file_type
|
|
<< " offline compilation:" << std::endl
|
|
<< "CL_DEVICE_ADDRESS_BITS=" << device_address_space_size
|
|
<< std::endl
|
|
<< "CL_DEVICE_EXTENSIONS=\"" << extensionsString << "\""
|
|
<< std::endl;
|
|
/* We only need the device's supported IL version(s) when compiling IL
|
|
* that will be loaded with clCreateProgramWithIL() */
|
|
if (compilationMode == kSpir_v)
|
|
{
|
|
std::string ilVersionString = get_device_il_version_string(device);
|
|
clDeviceInfoStream << "CL_DEVICE_IL_VERSION=\"" << ilVersionString
|
|
<< "\"" << std::endl;
|
|
}
|
|
clDeviceInfoStream << "CL_DEVICE_VERSION=\"" << versionString << "\""
|
|
<< std::endl;
|
|
clDeviceInfoStream << "CL_DEVICE_IMAGE_SUPPORT="
|
|
<< (0 == checkForImageSupport(device)) << std::endl;
|
|
clDeviceInfoStream << "CL_DEVICE_NAME=\"" << get_device_name(device).c_str()
|
|
<< "\"" << std::endl;
|
|
|
|
clDeviceInfo = clDeviceInfoStream.str();
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
static int write_cl_device_info(const cl_device_id device,
|
|
const cl_uint device_address_space_size,
|
|
const CompilationMode compilationMode,
|
|
std::string &clDeviceInfoFilename)
|
|
{
|
|
std::string clDeviceInfo;
|
|
int error = get_cl_device_info_str(device, device_address_space_size,
|
|
compilationMode, clDeviceInfo);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
return error;
|
|
}
|
|
|
|
cl_uint crc = crc32(clDeviceInfo.data(), clDeviceInfo.size());
|
|
|
|
/* Get the filename for the clDeviceInfo file.
|
|
* Note: the file includes the hash on its content, so it is usually
|
|
* unnecessary to delete it. */
|
|
std::ostringstream clDeviceInfoFilenameStream;
|
|
clDeviceInfoFilenameStream << gCompilationCachePath << slash
|
|
<< "clDeviceInfo-";
|
|
clDeviceInfoFilenameStream << std::hex << std::setfill('0') << std::setw(8)
|
|
<< crc << ".txt";
|
|
|
|
clDeviceInfoFilename = clDeviceInfoFilenameStream.str();
|
|
|
|
if ((size_t)get_file_size(clDeviceInfoFilename) == clDeviceInfo.size())
|
|
{
|
|
/* The CL device info file has already been created.
|
|
* Nothing to do. */
|
|
return 0;
|
|
}
|
|
|
|
/* The file does not exist or its length is not as expected.
|
|
* Create/overwrite it. */
|
|
std::ofstream ofs(clDeviceInfoFilename);
|
|
if (!ofs.good())
|
|
{
|
|
log_info("OfflineCompiler: can't create CL device info file: %s\n",
|
|
clDeviceInfoFilename.c_str());
|
|
return -1;
|
|
}
|
|
ofs << clDeviceInfo;
|
|
ofs.close();
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
static std::string get_offline_compilation_command(
|
|
const cl_uint device_address_space_size,
|
|
const CompilationMode compilationMode, const std::string &bOptions,
|
|
const std::string &sourceFilename, const std::string &outputFilename,
|
|
const std::string &clDeviceInfoFilename)
|
|
{
|
|
std::ostringstream wrapperOptions;
|
|
|
|
wrapperOptions << gCompilationProgram
|
|
<< " --mode=" << get_compilation_mode_str(compilationMode)
|
|
<< " --source=" << sourceFilename
|
|
<< " --output=" << outputFilename
|
|
<< " --cl-device-info=" << clDeviceInfoFilename;
|
|
|
|
if (bOptions != "")
|
|
{
|
|
// Add build options passed to this function
|
|
wrapperOptions << " -- " << bOptions;
|
|
}
|
|
|
|
return wrapperOptions.str();
|
|
}
|
|
|
|
static int invoke_offline_compiler(const cl_device_id device,
|
|
const cl_uint device_address_space_size,
|
|
const CompilationMode compilationMode,
|
|
const std::string &bOptions,
|
|
const std::string &sourceFilename,
|
|
const std::string &outputFilename)
|
|
{
|
|
std::string runString;
|
|
std::string clDeviceInfoFilename;
|
|
|
|
// See cl_offline_compiler-interface.txt for a description of the
|
|
// format of the CL device information file generated below, and
|
|
// the internal command line interface for invoking the offline
|
|
// compiler.
|
|
|
|
cl_int err = write_cl_device_info(device, device_address_space_size,
|
|
compilationMode, clDeviceInfoFilename);
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
log_error("Failed writing CL device info file\n");
|
|
return err;
|
|
}
|
|
|
|
runString = get_offline_compilation_command(
|
|
device_address_space_size, compilationMode, bOptions, sourceFilename,
|
|
outputFilename, clDeviceInfoFilename);
|
|
|
|
// execute script
|
|
log_info("Executing command: %s\n", runString.c_str());
|
|
fflush(stdout);
|
|
int returnCode = system(runString.c_str());
|
|
if (returnCode != 0)
|
|
{
|
|
log_error("ERROR: Command finished with error: 0x%x\n", returnCode);
|
|
return CL_COMPILE_PROGRAM_FAILURE;
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
static cl_int get_first_device_id(const cl_context context,
|
|
cl_device_id &device)
|
|
{
|
|
cl_uint numDevices = 0;
|
|
cl_int error = clGetContextInfo(context, CL_CONTEXT_NUM_DEVICES,
|
|
sizeof(cl_uint), &numDevices, NULL);
|
|
test_error(error, "clGetContextInfo failed getting CL_CONTEXT_NUM_DEVICES");
|
|
|
|
if (numDevices == 0)
|
|
{
|
|
log_error("ERROR: No CL devices found\n");
|
|
return -1;
|
|
}
|
|
|
|
std::vector<cl_device_id> devices(numDevices, 0);
|
|
error =
|
|
clGetContextInfo(context, CL_CONTEXT_DEVICES,
|
|
numDevices * sizeof(cl_device_id), &devices[0], NULL);
|
|
test_error(error, "clGetContextInfo failed getting CL_CONTEXT_DEVICES");
|
|
|
|
device = devices[0];
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
static cl_int get_device_address_bits(const cl_device_id device,
|
|
cl_uint &device_address_space_size)
|
|
{
|
|
cl_int error =
|
|
clGetDeviceInfo(device, CL_DEVICE_ADDRESS_BITS, sizeof(cl_uint),
|
|
&device_address_space_size, NULL);
|
|
test_error(error, "Unable to obtain device address bits");
|
|
|
|
if (device_address_space_size != 32 && device_address_space_size != 64)
|
|
{
|
|
log_error("ERROR: Unexpected number of device address bits: %u\n",
|
|
device_address_space_size);
|
|
return -1;
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
static int get_offline_compiler_output(
|
|
std::ifstream &ifs, const cl_device_id device, cl_uint deviceAddrSpaceSize,
|
|
const CompilationMode compilationMode, const std::string &bOptions,
|
|
const std::string &kernelPath, const std::string &kernelNamePrefix)
|
|
{
|
|
std::string sourceFilename =
|
|
get_cl_source_filename_with_path(kernelPath, kernelNamePrefix);
|
|
std::string outputFilename = get_binary_filename_with_path(
|
|
compilationMode, deviceAddrSpaceSize, kernelPath, kernelNamePrefix);
|
|
|
|
ifs.open(outputFilename.c_str(), std::ios::binary);
|
|
if (!ifs.good())
|
|
{
|
|
std::string file_type =
|
|
get_offline_compilation_file_type_str(compilationMode);
|
|
if (gCompilationCacheMode == kCacheModeForceRead)
|
|
{
|
|
log_info("OfflineCompiler: can't open cached %s file: %s\n",
|
|
file_type.c_str(), outputFilename.c_str());
|
|
return -1;
|
|
}
|
|
else
|
|
{
|
|
int error = invoke_offline_compiler(device, deviceAddrSpaceSize,
|
|
compilationMode, bOptions,
|
|
sourceFilename, outputFilename);
|
|
if (error != CL_SUCCESS) return error;
|
|
|
|
// read output file
|
|
ifs.open(outputFilename.c_str(), std::ios::binary);
|
|
if (!ifs.good())
|
|
{
|
|
log_info("OfflineCompiler: can't read generated %s file: %s\n",
|
|
file_type.c_str(), outputFilename.c_str());
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
static int create_single_kernel_helper_create_program_offline(
|
|
cl_context context, cl_device_id device, cl_program *outProgram,
|
|
unsigned int numKernelLines, const char *const *kernelProgram,
|
|
const char *buildOptions, CompilationMode compilationMode)
|
|
{
|
|
if (kCacheModeDumpCl == gCompilationCacheMode)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
// Get device CL_DEVICE_ADDRESS_BITS
|
|
int error;
|
|
cl_uint device_address_space_size = 0;
|
|
if (device == NULL)
|
|
{
|
|
error = get_first_device_id(context, device);
|
|
test_error(error, "Failed to get device ID for first device");
|
|
}
|
|
error = get_device_address_bits(device, device_address_space_size);
|
|
if (error != CL_SUCCESS) return error;
|
|
|
|
// set build options
|
|
std::string bOptions;
|
|
bOptions += buildOptions ? std::string(buildOptions) : "";
|
|
|
|
std::string kernelName =
|
|
get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
|
|
|
|
|
|
std::ifstream ifs;
|
|
error = get_offline_compiler_output(ifs, device, device_address_space_size,
|
|
compilationMode, bOptions,
|
|
gCompilationCachePath, kernelName);
|
|
if (error != CL_SUCCESS) return error;
|
|
|
|
ifs.seekg(0, ifs.end);
|
|
int length = ifs.tellg();
|
|
ifs.seekg(0, ifs.beg);
|
|
|
|
// treat modifiedProgram as input for clCreateProgramWithBinary
|
|
if (compilationMode == kBinary)
|
|
{
|
|
// read binary from file:
|
|
std::vector<unsigned char> modifiedKernelBuf(length);
|
|
|
|
ifs.read((char *)&modifiedKernelBuf[0], length);
|
|
ifs.close();
|
|
|
|
size_t lengths = modifiedKernelBuf.size();
|
|
const unsigned char *binaries = { &modifiedKernelBuf[0] };
|
|
log_info("offlineCompiler: clCreateProgramWithSource replaced with "
|
|
"clCreateProgramWithBinary\n");
|
|
*outProgram = clCreateProgramWithBinary(context, 1, &device, &lengths,
|
|
&binaries, NULL, &error);
|
|
if (*outProgram == NULL || error != CL_SUCCESS)
|
|
{
|
|
print_error(error, "clCreateProgramWithBinary failed");
|
|
return error;
|
|
}
|
|
}
|
|
// treat modifiedProgram as input for clCreateProgramWithIL
|
|
else if (compilationMode == kSpir_v)
|
|
{
|
|
// read spir-v from file:
|
|
std::vector<unsigned char> modifiedKernelBuf(length);
|
|
|
|
ifs.read((char *)&modifiedKernelBuf[0], length);
|
|
ifs.close();
|
|
|
|
size_t length = modifiedKernelBuf.size();
|
|
log_info("offlineCompiler: clCreateProgramWithSource replaced with "
|
|
"clCreateProgramWithIL\n");
|
|
if (gCoreILProgram)
|
|
{
|
|
*outProgram = clCreateProgramWithIL(context, &modifiedKernelBuf[0],
|
|
length, &error);
|
|
}
|
|
else
|
|
{
|
|
cl_platform_id platform;
|
|
error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM,
|
|
sizeof(cl_platform_id), &platform, NULL);
|
|
test_error(error, "clGetDeviceInfo for CL_DEVICE_PLATFORM failed");
|
|
|
|
clCreateProgramWithILKHR_fn clCreateProgramWithILKHR = NULL;
|
|
clCreateProgramWithILKHR = (clCreateProgramWithILKHR_fn)
|
|
clGetExtensionFunctionAddressForPlatform(
|
|
platform, "clCreateProgramWithILKHR");
|
|
if (clCreateProgramWithILKHR == NULL)
|
|
{
|
|
log_error(
|
|
"ERROR: clGetExtensionFunctionAddressForPlatform failed\n");
|
|
return -1;
|
|
}
|
|
*outProgram = clCreateProgramWithILKHR(
|
|
context, &modifiedKernelBuf[0], length, &error);
|
|
}
|
|
|
|
if (*outProgram == NULL || error != CL_SUCCESS)
|
|
{
|
|
if (gCoreILProgram)
|
|
{
|
|
print_error(error, "clCreateProgramWithIL failed");
|
|
}
|
|
else
|
|
{
|
|
print_error(error, "clCreateProgramWithILKHR failed");
|
|
}
|
|
return error;
|
|
}
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
static int create_single_kernel_helper_create_program(
|
|
cl_context context, cl_device_id device, cl_program *outProgram,
|
|
unsigned int numKernelLines, const char **kernelProgram,
|
|
const char *buildOptions, CompilationMode compilationMode)
|
|
{
|
|
std::lock_guard<std::mutex> compiler_lock(gCompilerMutex);
|
|
|
|
std::string filePrefix =
|
|
get_unique_filename_prefix(numKernelLines, kernelProgram, buildOptions);
|
|
bool shouldSaveToDisk = should_save_kernel_source_to_disk(
|
|
compilationMode, gCompilationCacheMode, gCompilationCachePath,
|
|
filePrefix);
|
|
|
|
if (shouldSaveToDisk)
|
|
{
|
|
if (CL_SUCCESS
|
|
!= save_kernel_source_and_options_to_disk(
|
|
numKernelLines, kernelProgram, buildOptions))
|
|
{
|
|
log_error("Unable to dump kernel source to disk");
|
|
return -1;
|
|
}
|
|
}
|
|
if (compilationMode == kOnline)
|
|
{
|
|
int error = CL_SUCCESS;
|
|
|
|
/* Create the program object from source */
|
|
*outProgram = clCreateProgramWithSource(context, numKernelLines,
|
|
kernelProgram, NULL, &error);
|
|
if (*outProgram == NULL || error != CL_SUCCESS)
|
|
{
|
|
print_error(error, "clCreateProgramWithSource failed");
|
|
return error;
|
|
}
|
|
return CL_SUCCESS;
|
|
}
|
|
else
|
|
{
|
|
return create_single_kernel_helper_create_program_offline(
|
|
context, device, outProgram, numKernelLines, kernelProgram,
|
|
buildOptions, compilationMode);
|
|
}
|
|
}
|
|
|
|
int create_single_kernel_helper_create_program(cl_context context,
|
|
cl_program *outProgram,
|
|
unsigned int numKernelLines,
|
|
const char **kernelProgram,
|
|
const char *buildOptions)
|
|
{
|
|
return create_single_kernel_helper_create_program(
|
|
context, NULL, outProgram, numKernelLines, kernelProgram, buildOptions,
|
|
gCompilationMode);
|
|
}
|
|
|
|
int create_single_kernel_helper_create_program_for_device(
|
|
cl_context context, cl_device_id device, cl_program *outProgram,
|
|
unsigned int numKernelLines, const char **kernelProgram,
|
|
const char *buildOptions)
|
|
{
|
|
return create_single_kernel_helper_create_program(
|
|
context, device, outProgram, numKernelLines, kernelProgram,
|
|
buildOptions, gCompilationMode);
|
|
}
|
|
|
|
int create_single_kernel_helper_with_build_options(
|
|
cl_context context, cl_program *outProgram, cl_kernel *outKernel,
|
|
unsigned int numKernelLines, const char **kernelProgram,
|
|
const char *kernelName, const char *buildOptions)
|
|
{
|
|
return create_single_kernel_helper(context, outProgram, outKernel,
|
|
numKernelLines, kernelProgram,
|
|
kernelName, buildOptions);
|
|
}
|
|
|
|
// Creates and builds OpenCL C/C++ program, and creates a kernel
|
|
int create_single_kernel_helper(cl_context context, cl_program *outProgram,
|
|
cl_kernel *outKernel,
|
|
unsigned int numKernelLines,
|
|
const char **kernelProgram,
|
|
const char *kernelName,
|
|
const char *buildOptions)
|
|
{
|
|
// For the logic that automatically adds -cl-std it is much cleaner if the
|
|
// build options have RAII. This buffer will store the potentially updated
|
|
// build options, in which case buildOptions will point at the string owned
|
|
// by this buffer.
|
|
std::string build_options_internal{ buildOptions ? buildOptions : "" };
|
|
|
|
// Check the build options for the -cl-std option.
|
|
if (!buildOptions || !strstr(buildOptions, "-cl-std"))
|
|
{
|
|
// If the build option isn't present add it using the latest OpenCL-C
|
|
// version supported by the device. This allows calling code to force a
|
|
// particular CL C version if it is required, but also means that
|
|
// callers need not specify a version if they want to assume the most
|
|
// recent CL C.
|
|
|
|
auto version = get_max_OpenCL_C_for_context(context);
|
|
|
|
std::string cl_std{};
|
|
if (version >= Version(3, 0))
|
|
{
|
|
cl_std = "-cl-std=CL3.0";
|
|
}
|
|
else if (version >= Version(2, 0) && version < Version(3, 0))
|
|
{
|
|
cl_std = "-cl-std=CL2.0";
|
|
}
|
|
else
|
|
{
|
|
// If the -cl-std build option is not specified, the highest OpenCL
|
|
// C 1.x language version supported by each device is used when
|
|
// compiling the program for each device.
|
|
cl_std = "";
|
|
}
|
|
build_options_internal += ' ';
|
|
build_options_internal += cl_std;
|
|
buildOptions = build_options_internal.c_str();
|
|
}
|
|
int error = create_single_kernel_helper_create_program(
|
|
context, outProgram, numKernelLines, kernelProgram, buildOptions);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
log_error("Create program failed: %d, line: %d\n", error, __LINE__);
|
|
return error;
|
|
}
|
|
|
|
// Remove offline-compiler-only build options
|
|
std::string newBuildOptions;
|
|
if (buildOptions != NULL)
|
|
{
|
|
newBuildOptions = buildOptions;
|
|
std::string offlineCompierOptions[] = {
|
|
"-cl-fp16-enable", "-cl-fp64-enable", "-cl-zero-init-local-mem-vars"
|
|
};
|
|
for (auto &s : offlineCompierOptions)
|
|
{
|
|
std::string::size_type i = newBuildOptions.find(s);
|
|
if (i != std::string::npos) newBuildOptions.erase(i, s.length());
|
|
}
|
|
}
|
|
// Build program and create kernel
|
|
return build_program_create_kernel_helper(
|
|
context, outProgram, outKernel, numKernelLines, kernelProgram,
|
|
kernelName, newBuildOptions.c_str());
|
|
}
|
|
|
|
// Builds OpenCL C/C++ program and creates
|
|
int build_program_create_kernel_helper(
|
|
cl_context context, cl_program *outProgram, cl_kernel *outKernel,
|
|
unsigned int numKernelLines, const char **kernelProgram,
|
|
const char *kernelName, const char *buildOptions)
|
|
{
|
|
int error;
|
|
/* Compile the program */
|
|
int buildProgramFailed = 0;
|
|
int printedSource = 0;
|
|
error = clBuildProgram(*outProgram, 0, NULL, buildOptions, NULL, NULL);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
unsigned int i;
|
|
print_error(error, "clBuildProgram failed");
|
|
buildProgramFailed = 1;
|
|
printedSource = 1;
|
|
log_error("Build options: %s\n", buildOptions);
|
|
log_error("Original source is: ------------\n");
|
|
for (i = 0; i < numKernelLines; i++) log_error("%s", kernelProgram[i]);
|
|
}
|
|
|
|
// Verify the build status on all devices
|
|
cl_uint deviceCount = 0;
|
|
error = clGetProgramInfo(*outProgram, CL_PROGRAM_NUM_DEVICES,
|
|
sizeof(deviceCount), &deviceCount, NULL);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
print_error(error, "clGetProgramInfo CL_PROGRAM_NUM_DEVICES failed");
|
|
return error;
|
|
}
|
|
|
|
if (deviceCount == 0)
|
|
{
|
|
log_error("No devices found for program.\n");
|
|
return -1;
|
|
}
|
|
|
|
cl_device_id *devices =
|
|
(cl_device_id *)malloc(deviceCount * sizeof(cl_device_id));
|
|
if (NULL == devices) return -1;
|
|
BufferOwningPtr<cl_device_id> devicesBuf(devices);
|
|
|
|
memset(devices, 0, deviceCount * sizeof(cl_device_id));
|
|
error = clGetProgramInfo(*outProgram, CL_PROGRAM_DEVICES,
|
|
sizeof(cl_device_id) * deviceCount, devices, NULL);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
print_error(error, "clGetProgramInfo CL_PROGRAM_DEVICES failed");
|
|
return error;
|
|
}
|
|
|
|
cl_uint z;
|
|
bool buildFailed = false;
|
|
for (z = 0; z < deviceCount; z++)
|
|
{
|
|
char deviceName[4096] = "";
|
|
error = clGetDeviceInfo(devices[z], CL_DEVICE_NAME, sizeof(deviceName),
|
|
deviceName, NULL);
|
|
if (error != CL_SUCCESS || deviceName[0] == '\0')
|
|
{
|
|
log_error("Device \"%d\" failed to return a name\n", z);
|
|
print_error(error, "clGetDeviceInfo CL_DEVICE_NAME failed");
|
|
}
|
|
|
|
cl_build_status buildStatus;
|
|
error = clGetProgramBuildInfo(*outProgram, devices[z],
|
|
CL_PROGRAM_BUILD_STATUS,
|
|
sizeof(buildStatus), &buildStatus, NULL);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
print_error(error,
|
|
"clGetProgramBuildInfo CL_PROGRAM_BUILD_STATUS failed");
|
|
return error;
|
|
}
|
|
|
|
if (buildStatus == CL_BUILD_SUCCESS && buildProgramFailed
|
|
&& deviceCount == 1)
|
|
{
|
|
buildFailed = true;
|
|
log_error("clBuildProgram returned an error, but buildStatus is "
|
|
"marked as CL_BUILD_SUCCESS.\n");
|
|
}
|
|
|
|
if (buildStatus != CL_BUILD_SUCCESS)
|
|
{
|
|
|
|
char statusString[64] = "";
|
|
if (buildStatus == (cl_build_status)CL_BUILD_SUCCESS)
|
|
sprintf(statusString, "CL_BUILD_SUCCESS");
|
|
else if (buildStatus == (cl_build_status)CL_BUILD_NONE)
|
|
sprintf(statusString, "CL_BUILD_NONE");
|
|
else if (buildStatus == (cl_build_status)CL_BUILD_ERROR)
|
|
sprintf(statusString, "CL_BUILD_ERROR");
|
|
else if (buildStatus == (cl_build_status)CL_BUILD_IN_PROGRESS)
|
|
sprintf(statusString, "CL_BUILD_IN_PROGRESS");
|
|
else
|
|
sprintf(statusString, "UNKNOWN (%d)", buildStatus);
|
|
|
|
if (buildStatus != CL_BUILD_SUCCESS)
|
|
log_error(
|
|
"Build not successful for device \"%s\", status: %s\n",
|
|
deviceName, statusString);
|
|
size_t paramSize = 0;
|
|
error = clGetProgramBuildInfo(*outProgram, devices[z],
|
|
CL_PROGRAM_BUILD_LOG, 0, NULL,
|
|
¶mSize);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
|
|
print_error(
|
|
error, "clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
|
|
return error;
|
|
}
|
|
|
|
std::string log;
|
|
log.resize(paramSize / sizeof(char));
|
|
error = clGetProgramBuildInfo(*outProgram, devices[z],
|
|
CL_PROGRAM_BUILD_LOG, paramSize,
|
|
&log[0], NULL);
|
|
if (error != CL_SUCCESS || log[0] == '\0')
|
|
{
|
|
log_error("Device %d (%s) failed to return a build log\n", z,
|
|
deviceName);
|
|
if (error)
|
|
{
|
|
print_error(
|
|
error,
|
|
"clGetProgramBuildInfo CL_PROGRAM_BUILD_LOG failed");
|
|
return error;
|
|
}
|
|
else
|
|
{
|
|
log_error("clGetProgramBuildInfo returned an empty log.\n");
|
|
return -1;
|
|
}
|
|
}
|
|
// In this case we've already printed out the code above.
|
|
if (!printedSource)
|
|
{
|
|
unsigned int i;
|
|
log_error("Original source is: ------------\n");
|
|
for (i = 0; i < numKernelLines; i++)
|
|
log_error("%s", kernelProgram[i]);
|
|
printedSource = 1;
|
|
}
|
|
log_error("Build log for device \"%s\" is: ------------\n",
|
|
deviceName);
|
|
log_error("%s\n", log.c_str());
|
|
log_error("\n----------\n");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
if (buildFailed)
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
/* And create a kernel from it */
|
|
if (kernelName != NULL)
|
|
{
|
|
*outKernel = clCreateKernel(*outProgram, kernelName, &error);
|
|
if (*outKernel == NULL || error != CL_SUCCESS)
|
|
{
|
|
print_error(error, "Unable to create kernel");
|
|
return error;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int get_max_allowed_work_group_size(cl_context context, cl_kernel kernel,
|
|
size_t *outMaxSize, size_t *outLimits)
|
|
{
|
|
cl_device_id *devices;
|
|
size_t size, maxCommonSize = 0;
|
|
int numDevices, i, j, error;
|
|
cl_uint numDims;
|
|
size_t outSize;
|
|
size_t sizeLimit[] = { 1, 1, 1 };
|
|
|
|
|
|
/* Assume fewer than 16 devices will be returned */
|
|
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &outSize);
|
|
test_error(error, "Unable to obtain list of devices size for context");
|
|
devices = (cl_device_id *)malloc(outSize);
|
|
BufferOwningPtr<cl_device_id> devicesBuf(devices);
|
|
|
|
error =
|
|
clGetContextInfo(context, CL_CONTEXT_DEVICES, outSize, devices, NULL);
|
|
test_error(error, "Unable to obtain list of devices for context");
|
|
|
|
numDevices = (int)(outSize / sizeof(cl_device_id));
|
|
|
|
for (i = 0; i < numDevices; i++)
|
|
{
|
|
error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE,
|
|
sizeof(size), &size, NULL);
|
|
test_error(error, "Unable to obtain max work group size for device");
|
|
if (size < maxCommonSize || maxCommonSize == 0) maxCommonSize = size;
|
|
|
|
error = clGetKernelWorkGroupInfo(kernel, devices[i],
|
|
CL_KERNEL_WORK_GROUP_SIZE,
|
|
sizeof(size), &size, NULL);
|
|
test_error(
|
|
error,
|
|
"Unable to obtain max work group size for device and kernel combo");
|
|
if (size < maxCommonSize || maxCommonSize == 0) maxCommonSize = size;
|
|
|
|
error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
|
|
sizeof(numDims), &numDims, NULL);
|
|
test_error(
|
|
error,
|
|
"clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
|
|
sizeLimit[0] = 1;
|
|
error = clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES,
|
|
numDims * sizeof(size_t), sizeLimit, NULL);
|
|
test_error(error,
|
|
"clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_SIZES");
|
|
|
|
if (outLimits != NULL)
|
|
{
|
|
if (i == 0)
|
|
{
|
|
for (j = 0; j < 3; j++) outLimits[j] = sizeLimit[j];
|
|
}
|
|
else
|
|
{
|
|
for (j = 0; j < (int)numDims; j++)
|
|
{
|
|
if (sizeLimit[j] < outLimits[j])
|
|
outLimits[j] = sizeLimit[j];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
*outMaxSize = (unsigned int)maxCommonSize;
|
|
return 0;
|
|
}
|
|
|
|
|
|
extern int get_max_allowed_1d_work_group_size_on_device(cl_device_id device,
|
|
cl_kernel kernel,
|
|
size_t *outSize)
|
|
{
|
|
cl_uint maxDim;
|
|
size_t maxWgSize;
|
|
size_t *maxWgSizePerDim;
|
|
int error;
|
|
|
|
error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE,
|
|
sizeof(size_t), &maxWgSize, NULL);
|
|
test_error(error,
|
|
"clGetKernelWorkGroupInfo CL_KERNEL_WORK_GROUP_SIZE failed");
|
|
|
|
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
|
|
sizeof(cl_uint), &maxDim, NULL);
|
|
test_error(error,
|
|
"clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS failed");
|
|
maxWgSizePerDim = (size_t *)malloc(maxDim * sizeof(size_t));
|
|
if (!maxWgSizePerDim)
|
|
{
|
|
log_error("Unable to allocate maxWgSizePerDim\n");
|
|
return -1;
|
|
}
|
|
|
|
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_SIZES,
|
|
maxDim * sizeof(size_t), maxWgSizePerDim, NULL);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
log_error("clGetDeviceInfo CL_DEVICE_MAX_WORK_ITEM_SIZES failed\n");
|
|
free(maxWgSizePerDim);
|
|
return error;
|
|
}
|
|
|
|
// "maxWgSize" is limited to that of the first dimension.
|
|
if (maxWgSize > maxWgSizePerDim[0])
|
|
{
|
|
maxWgSize = maxWgSizePerDim[0];
|
|
}
|
|
|
|
free(maxWgSizePerDim);
|
|
|
|
*outSize = maxWgSize;
|
|
return 0;
|
|
}
|
|
|
|
|
|
int get_max_common_work_group_size(cl_context context, cl_kernel kernel,
|
|
size_t globalThreadSize, size_t *outMaxSize)
|
|
{
|
|
size_t sizeLimit[3];
|
|
int error =
|
|
get_max_allowed_work_group_size(context, kernel, outMaxSize, sizeLimit);
|
|
if (error != 0) return error;
|
|
|
|
/* Now find the largest factor of globalThreadSize that is <= maxCommonSize
|
|
*/
|
|
/* Note for speed, we don't need to check the range of maxCommonSize, b/c
|
|
once it gets to 1, the modulo test will succeed and break the loop anyway
|
|
*/
|
|
for (;
|
|
(globalThreadSize % *outMaxSize) != 0 || (*outMaxSize > sizeLimit[0]);
|
|
(*outMaxSize)--)
|
|
;
|
|
return 0;
|
|
}
|
|
|
|
int get_max_common_2D_work_group_size(cl_context context, cl_kernel kernel,
|
|
size_t *globalThreadSizes,
|
|
size_t *outMaxSizes)
|
|
{
|
|
size_t sizeLimit[3];
|
|
size_t maxSize;
|
|
int error =
|
|
get_max_allowed_work_group_size(context, kernel, &maxSize, sizeLimit);
|
|
if (error != 0) return error;
|
|
|
|
/* Now find a set of factors, multiplied together less than maxSize, but
|
|
each a factor of the global sizes */
|
|
|
|
/* Simple case */
|
|
if (globalThreadSizes[0] * globalThreadSizes[1] <= maxSize)
|
|
{
|
|
if (globalThreadSizes[0] <= sizeLimit[0]
|
|
&& globalThreadSizes[1] <= sizeLimit[1])
|
|
{
|
|
outMaxSizes[0] = globalThreadSizes[0];
|
|
outMaxSizes[1] = globalThreadSizes[1];
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
size_t remainingSize, sizeForThisOne;
|
|
remainingSize = maxSize;
|
|
int i, j;
|
|
for (i = 0; i < 2; i++)
|
|
{
|
|
if (globalThreadSizes[i] > remainingSize)
|
|
sizeForThisOne = remainingSize;
|
|
else
|
|
sizeForThisOne = globalThreadSizes[i];
|
|
for (; (globalThreadSizes[i] % sizeForThisOne) != 0
|
|
|| (sizeForThisOne > sizeLimit[i]);
|
|
sizeForThisOne--)
|
|
;
|
|
outMaxSizes[i] = sizeForThisOne;
|
|
remainingSize = maxSize;
|
|
for (j = 0; j <= i; j++) remainingSize /= outMaxSizes[j];
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int get_max_common_3D_work_group_size(cl_context context, cl_kernel kernel,
|
|
size_t *globalThreadSizes,
|
|
size_t *outMaxSizes)
|
|
{
|
|
size_t sizeLimit[3];
|
|
size_t maxSize;
|
|
int error =
|
|
get_max_allowed_work_group_size(context, kernel, &maxSize, sizeLimit);
|
|
if (error != 0) return error;
|
|
/* Now find a set of factors, multiplied together less than maxSize, but
|
|
each a factor of the global sizes */
|
|
|
|
/* Simple case */
|
|
if (globalThreadSizes[0] * globalThreadSizes[1] * globalThreadSizes[2]
|
|
<= maxSize)
|
|
{
|
|
if (globalThreadSizes[0] <= sizeLimit[0]
|
|
&& globalThreadSizes[1] <= sizeLimit[1]
|
|
&& globalThreadSizes[2] <= sizeLimit[2])
|
|
{
|
|
outMaxSizes[0] = globalThreadSizes[0];
|
|
outMaxSizes[1] = globalThreadSizes[1];
|
|
outMaxSizes[2] = globalThreadSizes[2];
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
size_t remainingSize, sizeForThisOne;
|
|
remainingSize = maxSize;
|
|
int i, j;
|
|
for (i = 0; i < 3; i++)
|
|
{
|
|
if (globalThreadSizes[i] > remainingSize)
|
|
sizeForThisOne = remainingSize;
|
|
else
|
|
sizeForThisOne = globalThreadSizes[i];
|
|
for (; (globalThreadSizes[i] % sizeForThisOne) != 0
|
|
|| (sizeForThisOne > sizeLimit[i]);
|
|
sizeForThisOne--)
|
|
;
|
|
outMaxSizes[i] = sizeForThisOne;
|
|
remainingSize = maxSize;
|
|
for (j = 0; j <= i; j++) remainingSize /= outMaxSizes[j];
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* Helper to determine if a device supports an image format */
|
|
int is_image_format_supported(cl_context context, cl_mem_flags flags,
|
|
cl_mem_object_type image_type,
|
|
const cl_image_format *fmt)
|
|
{
|
|
cl_image_format *list;
|
|
cl_uint count = 0;
|
|
cl_int err = clGetSupportedImageFormats(context, flags, image_type, 128,
|
|
NULL, &count);
|
|
if (count == 0) return 0;
|
|
|
|
list = (cl_image_format *)malloc(count * sizeof(cl_image_format));
|
|
if (NULL == list)
|
|
{
|
|
log_error("Error: unable to allocate %ld byte buffer for image format "
|
|
"list at %s:%d (err = %d)\n",
|
|
count * sizeof(cl_image_format), __FILE__, __LINE__, err);
|
|
return 0;
|
|
}
|
|
BufferOwningPtr<cl_image_format> listBuf(list);
|
|
|
|
|
|
cl_int error = clGetSupportedImageFormats(context, flags, image_type, count,
|
|
list, NULL);
|
|
if (error)
|
|
{
|
|
log_error("Error: failed to obtain supported image type list at %s:%d "
|
|
"(err = %d)\n",
|
|
__FILE__, __LINE__, err);
|
|
return 0;
|
|
}
|
|
|
|
// iterate looking for a match.
|
|
cl_uint i;
|
|
for (i = 0; i < count; i++)
|
|
{
|
|
if (fmt->image_channel_data_type == list[i].image_channel_data_type
|
|
&& fmt->image_channel_order == list[i].image_channel_order)
|
|
break;
|
|
}
|
|
|
|
return (i < count) ? 1 : 0;
|
|
}
|
|
|
|
size_t get_pixel_bytes(const cl_image_format *fmt);
|
|
size_t get_pixel_bytes(const cl_image_format *fmt)
|
|
{
|
|
size_t chanCount;
|
|
switch (fmt->image_channel_order)
|
|
{
|
|
case CL_R:
|
|
case CL_A:
|
|
case CL_Rx:
|
|
case CL_INTENSITY:
|
|
case CL_LUMINANCE:
|
|
case CL_DEPTH: chanCount = 1; break;
|
|
case CL_RG:
|
|
case CL_RA:
|
|
case CL_RGx: chanCount = 2; break;
|
|
case CL_RGB:
|
|
case CL_RGBx:
|
|
case CL_sRGB:
|
|
case CL_sRGBx: chanCount = 3; break;
|
|
case CL_RGBA:
|
|
case CL_ARGB:
|
|
case CL_BGRA:
|
|
case CL_sBGRA:
|
|
case CL_sRGBA:
|
|
#ifdef CL_1RGB_APPLE
|
|
case CL_1RGB_APPLE:
|
|
#endif
|
|
#ifdef CL_BGR1_APPLE
|
|
case CL_BGR1_APPLE:
|
|
#endif
|
|
chanCount = 4;
|
|
break;
|
|
default:
|
|
log_error("Unknown channel order at %s:%d!\n", __FILE__, __LINE__);
|
|
abort();
|
|
break;
|
|
}
|
|
|
|
switch (fmt->image_channel_data_type)
|
|
{
|
|
case CL_UNORM_SHORT_565:
|
|
case CL_UNORM_SHORT_555: return 2;
|
|
|
|
case CL_UNORM_INT_101010: return 4;
|
|
|
|
case CL_SNORM_INT8:
|
|
case CL_UNORM_INT8:
|
|
case CL_SIGNED_INT8:
|
|
case CL_UNSIGNED_INT8: return chanCount;
|
|
|
|
case CL_SNORM_INT16:
|
|
case CL_UNORM_INT16:
|
|
case CL_HALF_FLOAT:
|
|
case CL_SIGNED_INT16:
|
|
case CL_UNSIGNED_INT16:
|
|
#ifdef CL_SFIXED14_APPLE
|
|
case CL_SFIXED14_APPLE:
|
|
#endif
|
|
return chanCount * 2;
|
|
|
|
case CL_SIGNED_INT32:
|
|
case CL_UNSIGNED_INT32:
|
|
case CL_FLOAT: return chanCount * 4;
|
|
|
|
default:
|
|
log_error("Unknown channel data type at %s:%d!\n", __FILE__,
|
|
__LINE__);
|
|
abort();
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
test_status verifyImageSupport(cl_device_id device)
|
|
{
|
|
int result = checkForImageSupport(device);
|
|
if (result == 0)
|
|
{
|
|
return TEST_PASS;
|
|
}
|
|
if (result == CL_IMAGE_FORMAT_NOT_SUPPORTED)
|
|
{
|
|
log_error("SKIPPED: Device does not supported images as required by "
|
|
"this test!\n");
|
|
return TEST_SKIP;
|
|
}
|
|
return TEST_FAIL;
|
|
}
|
|
|
|
int checkForImageSupport(cl_device_id device)
|
|
{
|
|
cl_uint i;
|
|
int error;
|
|
|
|
|
|
/* Check the device props to see if images are supported at all first */
|
|
error =
|
|
clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(i), &i, NULL);
|
|
test_error(error, "Unable to query device for image support");
|
|
if (i == 0)
|
|
{
|
|
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
|
}
|
|
|
|
/* So our support is good */
|
|
return 0;
|
|
}
|
|
|
|
int checkFor3DImageSupport(cl_device_id device)
|
|
{
|
|
cl_uint i;
|
|
int error;
|
|
|
|
/* Check the device props to see if images are supported at all first */
|
|
error =
|
|
clGetDeviceInfo(device, CL_DEVICE_IMAGE_SUPPORT, sizeof(i), &i, NULL);
|
|
test_error(error, "Unable to query device for image support");
|
|
if (i == 0)
|
|
{
|
|
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
|
}
|
|
|
|
char profile[128];
|
|
error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profile), profile,
|
|
NULL);
|
|
test_error(error, "Unable to query device for CL_DEVICE_PROFILE");
|
|
if (0 == strcmp(profile, "EMBEDDED_PROFILE"))
|
|
{
|
|
size_t width = -1L;
|
|
size_t height = -1L;
|
|
size_t depth = -1L;
|
|
error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_WIDTH,
|
|
sizeof(width), &width, NULL);
|
|
test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_WIDTH");
|
|
error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_HEIGHT,
|
|
sizeof(height), &height, NULL);
|
|
test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_HEIGHT");
|
|
error = clGetDeviceInfo(device, CL_DEVICE_IMAGE3D_MAX_DEPTH,
|
|
sizeof(depth), &depth, NULL);
|
|
test_error(error, "Unable to get CL_DEVICE_IMAGE3D_MAX_DEPTH");
|
|
|
|
if (0 == (height | width | depth)) return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
|
}
|
|
|
|
/* So our support is good */
|
|
return 0;
|
|
}
|
|
|
|
int checkForReadWriteImageSupport(cl_device_id device)
|
|
{
|
|
if (checkForImageSupport(device))
|
|
{
|
|
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
|
}
|
|
|
|
auto device_cl_version = get_device_cl_version(device);
|
|
if (device_cl_version >= Version(3, 0))
|
|
{
|
|
// In OpenCL 3.0, Read-Write images are optional.
|
|
// Check if they are supported.
|
|
cl_uint are_rw_images_supported{};
|
|
test_error(
|
|
clGetDeviceInfo(device, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS,
|
|
sizeof(are_rw_images_supported),
|
|
&are_rw_images_supported, nullptr),
|
|
"clGetDeviceInfo failed for CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS\n");
|
|
if (0 == are_rw_images_supported)
|
|
{
|
|
log_info("READ_WRITE_IMAGE tests skipped, not supported.\n");
|
|
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
|
}
|
|
}
|
|
// READ_WRITE images are not supported on 1.X devices.
|
|
else if (device_cl_version < Version(2, 0))
|
|
{
|
|
log_info("READ_WRITE_IMAGE tests skipped, Opencl 2.0+ is requried.");
|
|
return CL_IMAGE_FORMAT_NOT_SUPPORTED;
|
|
}
|
|
// Support for read-write image arguments is required
|
|
// for an 2.X device if the device supports images.
|
|
|
|
/* So our support is good */
|
|
return 0;
|
|
}
|
|
|
|
size_t get_min_alignment(cl_context context)
|
|
{
|
|
static cl_uint align_size = 0;
|
|
|
|
if (0 == align_size)
|
|
{
|
|
cl_device_id *devices;
|
|
size_t devices_size = 0;
|
|
cl_uint result = 0;
|
|
cl_int error;
|
|
int i;
|
|
|
|
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL,
|
|
&devices_size);
|
|
test_error_ret(error, "clGetContextInfo failed", 0);
|
|
|
|
devices = (cl_device_id *)malloc(devices_size);
|
|
if (devices == NULL)
|
|
{
|
|
print_error(error, "malloc failed");
|
|
return 0;
|
|
}
|
|
|
|
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size,
|
|
(void *)devices, NULL);
|
|
test_error_ret(error, "clGetContextInfo failed", 0);
|
|
|
|
for (i = 0; i < (int)(devices_size / sizeof(cl_device_id)); i++)
|
|
{
|
|
cl_uint alignment = 0;
|
|
|
|
error = clGetDeviceInfo(devices[i], CL_DEVICE_MEM_BASE_ADDR_ALIGN,
|
|
sizeof(cl_uint), (void *)&alignment, NULL);
|
|
|
|
if (error == CL_SUCCESS)
|
|
{
|
|
alignment >>= 3; // convert bits to bytes
|
|
result = (alignment > result) ? alignment : result;
|
|
}
|
|
else
|
|
print_error(error, "clGetDeviceInfo failed");
|
|
}
|
|
|
|
align_size = result;
|
|
free(devices);
|
|
}
|
|
|
|
return align_size;
|
|
}
|
|
|
|
cl_device_fp_config get_default_rounding_mode(cl_device_id device)
|
|
{
|
|
char profileStr[128] = "";
|
|
cl_device_fp_config single = 0;
|
|
int error = clGetDeviceInfo(device, CL_DEVICE_SINGLE_FP_CONFIG,
|
|
sizeof(single), &single, NULL);
|
|
if (error)
|
|
test_error_ret(error, "Unable to get device CL_DEVICE_SINGLE_FP_CONFIG",
|
|
0);
|
|
|
|
if (single & CL_FP_ROUND_TO_NEAREST) return CL_FP_ROUND_TO_NEAREST;
|
|
|
|
if (0 == (single & CL_FP_ROUND_TO_ZERO))
|
|
test_error_ret(-1,
|
|
"FAILURE: device must support either "
|
|
"CL_DEVICE_SINGLE_FP_CONFIG or CL_FP_ROUND_TO_NEAREST",
|
|
0);
|
|
|
|
// Make sure we are an embedded device before allowing a pass
|
|
if ((error = clGetDeviceInfo(device, CL_DEVICE_PROFILE, sizeof(profileStr),
|
|
&profileStr, NULL)))
|
|
test_error_ret(error, "FAILURE: Unable to get CL_DEVICE_PROFILE", 0);
|
|
|
|
if (strcmp(profileStr, "EMBEDDED_PROFILE"))
|
|
test_error_ret(error,
|
|
"FAILURE: non-EMBEDDED_PROFILE devices must support "
|
|
"CL_FP_ROUND_TO_NEAREST",
|
|
0);
|
|
|
|
return CL_FP_ROUND_TO_ZERO;
|
|
}
|
|
|
|
int checkDeviceForQueueSupport(cl_device_id device,
|
|
cl_command_queue_properties prop)
|
|
{
|
|
cl_command_queue_properties realProps;
|
|
cl_int error = clGetDeviceInfo(device, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES,
|
|
sizeof(realProps), &realProps, NULL);
|
|
test_error_ret(error, "FAILURE: Unable to get device queue properties", 0);
|
|
|
|
return (realProps & prop) ? 1 : 0;
|
|
}
|
|
|
|
int printDeviceHeader(cl_device_id device)
|
|
{
|
|
char deviceName[512], deviceVendor[512], deviceVersion[512],
|
|
cLangVersion[512];
|
|
int error;
|
|
|
|
error = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(deviceName),
|
|
deviceName, NULL);
|
|
test_error(error, "Unable to get CL_DEVICE_NAME for device");
|
|
|
|
error = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(deviceVendor),
|
|
deviceVendor, NULL);
|
|
test_error(error, "Unable to get CL_DEVICE_VENDOR for device");
|
|
|
|
error = clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(deviceVersion),
|
|
deviceVersion, NULL);
|
|
test_error(error, "Unable to get CL_DEVICE_VERSION for device");
|
|
|
|
error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION,
|
|
sizeof(cLangVersion), cLangVersion, NULL);
|
|
test_error(error, "Unable to get CL_DEVICE_OPENCL_C_VERSION for device");
|
|
|
|
log_info("Compute Device Name = %s, Compute Device Vendor = %s, Compute "
|
|
"Device Version = %s%s%s\n",
|
|
deviceName, deviceVendor, deviceVersion,
|
|
(error == CL_SUCCESS) ? ", CL C Version = " : "",
|
|
(error == CL_SUCCESS) ? cLangVersion : "");
|
|
|
|
auto version = get_device_cl_version(device);
|
|
if (version >= Version(3, 0))
|
|
{
|
|
auto ctsVersion = get_device_info_string(
|
|
device, CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED);
|
|
log_info("Device latest conformance version passed: %s\n",
|
|
ctsVersion.c_str());
|
|
}
|
|
|
|
return CL_SUCCESS;
|
|
}
|
|
|
|
Version get_device_cl_c_version(cl_device_id device)
|
|
{
|
|
auto device_cl_version = get_device_cl_version(device);
|
|
|
|
// The second special case is OpenCL-1.0 where CL_DEVICE_OPENCL_C_VERSION
|
|
// did not exist, but since this is just the first version we can
|
|
// return 1.0.
|
|
if (device_cl_version == Version{ 1, 0 })
|
|
{
|
|
return Version{ 1, 0 };
|
|
}
|
|
|
|
// Otherwise we know we have a 1.1 <= device_version <= 2.0 where all CL C
|
|
// versions are backwards compatible, hence querying with the
|
|
// CL_DEVICE_OPENCL_C_VERSION query must return the most recent supported
|
|
// OpenCL C version.
|
|
size_t opencl_c_version_size_in_bytes{};
|
|
auto error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, 0, nullptr,
|
|
&opencl_c_version_size_in_bytes);
|
|
test_error_ret(error,
|
|
"clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_VERSION\n",
|
|
(Version{ -1, 0 }));
|
|
|
|
std::string opencl_c_version(opencl_c_version_size_in_bytes, '\0');
|
|
error =
|
|
clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION,
|
|
opencl_c_version.size(), &opencl_c_version[0], nullptr);
|
|
|
|
test_error_ret(error,
|
|
"clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_VERSION\n",
|
|
(Version{ -1, 0 }));
|
|
|
|
// Scrape out the major, minor pair from the string.
|
|
auto major = opencl_c_version[opencl_c_version.find('.') - 1];
|
|
auto minor = opencl_c_version[opencl_c_version.find('.') + 1];
|
|
|
|
return Version{ major - '0', minor - '0' };
|
|
}
|
|
|
|
Version get_device_latest_cl_c_version(cl_device_id device)
|
|
{
|
|
auto device_cl_version = get_device_cl_version(device);
|
|
|
|
// If the device version >= 3.0 it must support the
|
|
// CL_DEVICE_OPENCL_C_ALL_VERSIONS query from which we can extract the most
|
|
// recent CL C version supported by the device.
|
|
if (device_cl_version >= Version{ 3, 0 })
|
|
{
|
|
size_t opencl_c_all_versions_size_in_bytes{};
|
|
auto error =
|
|
clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr,
|
|
&opencl_c_all_versions_size_in_bytes);
|
|
test_error_ret(
|
|
error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
|
|
(Version{ -1, 0 }));
|
|
std::vector<cl_name_version> name_versions(
|
|
opencl_c_all_versions_size_in_bytes / sizeof(cl_name_version));
|
|
error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS,
|
|
opencl_c_all_versions_size_in_bytes,
|
|
name_versions.data(), nullptr);
|
|
test_error_ret(
|
|
error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
|
|
(Version{ -1, 0 }));
|
|
|
|
Version max_supported_cl_c_version{};
|
|
for (const auto &name_version : name_versions)
|
|
{
|
|
Version current_version{ CL_VERSION_MAJOR(name_version.version),
|
|
CL_VERSION_MINOR(name_version.version) };
|
|
max_supported_cl_c_version =
|
|
(current_version > max_supported_cl_c_version)
|
|
? current_version
|
|
: max_supported_cl_c_version;
|
|
}
|
|
return max_supported_cl_c_version;
|
|
}
|
|
|
|
return get_device_cl_c_version(device);
|
|
}
|
|
|
|
Version get_max_OpenCL_C_for_context(cl_context context)
|
|
{
|
|
// Get all the devices in the context and find the maximum
|
|
// universally supported OpenCL C version.
|
|
size_t devices_size_in_bytes{};
|
|
auto error = clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, nullptr,
|
|
&devices_size_in_bytes);
|
|
test_error_ret(error, "clGetDeviceInfo failed for CL_CONTEXT_DEVICES",
|
|
(Version{ -1, 0 }));
|
|
std::vector<cl_device_id> devices(devices_size_in_bytes
|
|
/ sizeof(cl_device_id));
|
|
error = clGetContextInfo(context, CL_CONTEXT_DEVICES, devices_size_in_bytes,
|
|
devices.data(), nullptr);
|
|
auto current_version = get_device_latest_cl_c_version(devices[0]);
|
|
std::for_each(std::next(devices.begin()), devices.end(),
|
|
[¤t_version](cl_device_id device) {
|
|
auto device_version =
|
|
get_device_latest_cl_c_version(device);
|
|
// OpenCL 3.0 is not backwards compatible with 2.0.
|
|
// If we have 3.0 and 2.0 in the same driver we
|
|
// use 1.2.
|
|
if (((device_version >= Version(2, 0)
|
|
&& device_version < Version(3, 0))
|
|
&& current_version >= Version(3, 0))
|
|
|| (device_version >= Version(3, 0)
|
|
&& (current_version >= Version(2, 0)
|
|
&& current_version < Version(3, 0))))
|
|
{
|
|
current_version = Version(1, 2);
|
|
}
|
|
else
|
|
{
|
|
current_version =
|
|
(std::min)(device_version, current_version);
|
|
}
|
|
});
|
|
return current_version;
|
|
}
|
|
|
|
bool device_supports_cl_c_version(cl_device_id device, Version version)
|
|
{
|
|
auto device_cl_version = get_device_cl_version(device);
|
|
|
|
// In general, a device does not support an OpenCL C version if it is <=
|
|
// CL_DEVICE_OPENCL_C_VERSION AND it does not appear in the
|
|
// CL_DEVICE_OPENCL_C_ALL_VERSIONS query.
|
|
|
|
// If the device version >= 3.0 it must support the
|
|
// CL_DEVICE_OPENCL_C_ALL_VERSIONS query, and the version of OpenCL C being
|
|
// used must appear in the query result if it's <=
|
|
// CL_DEVICE_OPENCL_C_VERSION.
|
|
if (device_cl_version >= Version{ 3, 0 })
|
|
{
|
|
size_t opencl_c_all_versions_size_in_bytes{};
|
|
auto error =
|
|
clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0, nullptr,
|
|
&opencl_c_all_versions_size_in_bytes);
|
|
test_error_ret(
|
|
error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
|
|
(false));
|
|
std::vector<cl_name_version> name_versions(
|
|
opencl_c_all_versions_size_in_bytes / sizeof(cl_name_version));
|
|
error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_ALL_VERSIONS,
|
|
opencl_c_all_versions_size_in_bytes,
|
|
name_versions.data(), nullptr);
|
|
test_error_ret(
|
|
error, "clGetDeviceInfo failed for CL_DEVICE_OPENCL_C_ALL_VERSIONS",
|
|
(false));
|
|
|
|
for (const auto &name_version : name_versions)
|
|
{
|
|
Version current_version{ CL_VERSION_MAJOR(name_version.version),
|
|
CL_VERSION_MINOR(name_version.version) };
|
|
if (current_version == version)
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
return version <= get_device_cl_c_version(device);
|
|
}
|
|
|
|
bool poll_until(unsigned timeout_ms, unsigned interval_ms,
|
|
std::function<bool()> fn)
|
|
{
|
|
unsigned time_spent_ms = 0;
|
|
bool ret = false;
|
|
|
|
while (time_spent_ms < timeout_ms)
|
|
{
|
|
ret = fn();
|
|
if (ret)
|
|
{
|
|
break;
|
|
}
|
|
usleep(interval_ms * 1000);
|
|
time_spent_ms += interval_ms;
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
bool device_supports_double(cl_device_id device)
|
|
{
|
|
if (is_extension_available(device, "cl_khr_fp64"))
|
|
{
|
|
return true;
|
|
}
|
|
else
|
|
{
|
|
cl_device_fp_config double_fp_config;
|
|
cl_int err = clGetDeviceInfo(device, CL_DEVICE_DOUBLE_FP_CONFIG,
|
|
sizeof(double_fp_config),
|
|
&double_fp_config, nullptr);
|
|
test_error(err,
|
|
"clGetDeviceInfo for CL_DEVICE_DOUBLE_FP_CONFIG failed");
|
|
return double_fp_config != 0;
|
|
}
|
|
}
|
|
|
|
bool device_supports_half(cl_device_id device)
|
|
{
|
|
return is_extension_available(device, "cl_khr_fp16");
|
|
}
|