You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
212 lines
7.6 KiB
212 lines
7.6 KiB
//
|
|
// Copyright (c) 2017 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#include "procs.h"
|
|
#include "subhelpers.h"
|
|
|
|
typedef struct
|
|
{
|
|
cl_uint maxSubGroupSize;
|
|
cl_uint numSubGroups;
|
|
} result_data;
|
|
|
|
|
|
int test_sub_group_info(cl_device_id device, cl_context context,
|
|
cl_command_queue queue, int num_elements,
|
|
bool useCoreSubgroups)
|
|
{
|
|
static const size_t gsize0 = 80;
|
|
int i, error;
|
|
size_t realSize;
|
|
size_t kernel_max_subgroup_size, kernel_subgroup_count;
|
|
size_t global[] = { gsize0, 14, 10 };
|
|
size_t local[] = { 0, 0, 0 };
|
|
result_data result[gsize0];
|
|
|
|
cl_uint max_dimensions;
|
|
|
|
error = clGetDeviceInfo(device, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS,
|
|
sizeof(max_dimensions), &max_dimensions, NULL);
|
|
test_error(error,
|
|
"clGetDeviceInfo failed for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS");
|
|
|
|
cl_platform_id platform;
|
|
clProgramWrapper program;
|
|
clKernelWrapper kernel;
|
|
clMemWrapper out;
|
|
std::stringstream kernel_sstr;
|
|
if (useCoreSubgroups)
|
|
{
|
|
kernel_sstr << "#pragma OPENCL EXTENSION cl_khr_subgroups : enable\n";
|
|
}
|
|
kernel_sstr
|
|
<< "\n"
|
|
"typedef struct {\n"
|
|
" uint maxSubGroupSize;\n"
|
|
" uint numSubGroups;\n"
|
|
"} result_data;\n"
|
|
"\n"
|
|
"__kernel void query_kernel( __global result_data *outData )\n"
|
|
"{\n"
|
|
" int gid = get_global_id( 0 );\n"
|
|
" outData[gid].maxSubGroupSize = get_max_sub_group_size();\n"
|
|
" outData[gid].numSubGroups = get_num_sub_groups();\n"
|
|
"}";
|
|
|
|
const std::string &kernel_str = kernel_sstr.str();
|
|
const char *kernel_src = kernel_str.c_str();
|
|
error = create_single_kernel_helper(context, &program, &kernel, 1,
|
|
&kernel_src, "query_kernel");
|
|
if (error != 0) return error;
|
|
|
|
// Determine some local dimensions to use for the test.
|
|
if (max_dimensions == 1)
|
|
{
|
|
error = get_max_common_work_group_size(context, kernel, global[0],
|
|
&local[0]);
|
|
test_error(error, "get_max_common_work_group_size failed");
|
|
}
|
|
else if (max_dimensions == 2)
|
|
{
|
|
error =
|
|
get_max_common_2D_work_group_size(context, kernel, global, local);
|
|
test_error(error, "get_max_common_2D_work_group_size failed");
|
|
}
|
|
else
|
|
{
|
|
error =
|
|
get_max_common_3D_work_group_size(context, kernel, global, local);
|
|
test_error(error, "get_max_common_3D_work_group_size failed");
|
|
}
|
|
|
|
error = clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform),
|
|
(void *)&platform, NULL);
|
|
test_error(error, "clDeviceInfo failed for CL_DEVICE_PLATFORM");
|
|
|
|
subgroupsAPI subgroupsApiSet(platform, useCoreSubgroups);
|
|
clGetKernelSubGroupInfoKHR_fn clGetKernelSubGroupInfo_ptr =
|
|
subgroupsApiSet.clGetKernelSubGroupInfo_ptr();
|
|
if (clGetKernelSubGroupInfo_ptr == NULL)
|
|
{
|
|
log_error("ERROR: %s function not available",
|
|
subgroupsApiSet.clGetKernelSubGroupInfo_name);
|
|
return TEST_FAIL;
|
|
}
|
|
|
|
error = clGetKernelSubGroupInfo_ptr(
|
|
kernel, device, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, sizeof(local),
|
|
(void *)&local, sizeof(kernel_max_subgroup_size),
|
|
(void *)&kernel_max_subgroup_size, &realSize);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
log_error("ERROR: %s function error for "
|
|
"CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE",
|
|
subgroupsApiSet.clGetKernelSubGroupInfo_name);
|
|
return TEST_FAIL;
|
|
}
|
|
log_info(
|
|
"The CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE for the kernel is %d.\n",
|
|
(int)kernel_max_subgroup_size);
|
|
if (realSize != sizeof(kernel_max_subgroup_size))
|
|
{
|
|
log_error("ERROR: Returned size of max sub group size not valid! "
|
|
"(Expected %d, got %d)\n",
|
|
(int)sizeof(kernel_max_subgroup_size), (int)realSize);
|
|
return TEST_FAIL;
|
|
}
|
|
error = clGetKernelSubGroupInfo_ptr(
|
|
kernel, device, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, sizeof(local),
|
|
(void *)&local, sizeof(kernel_subgroup_count),
|
|
(void *)&kernel_subgroup_count, &realSize);
|
|
if (error != CL_SUCCESS)
|
|
{
|
|
log_error("ERROR: %s function error "
|
|
"for CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE",
|
|
subgroupsApiSet.clGetKernelSubGroupInfo_name);
|
|
return TEST_FAIL;
|
|
}
|
|
log_info(
|
|
"The CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE for the kernel is %d.\n",
|
|
(int)kernel_subgroup_count);
|
|
|
|
if (realSize != sizeof(kernel_subgroup_count))
|
|
{
|
|
log_error("ERROR: Returned size of sub group count not valid! "
|
|
"(Expected %d, got %d)\n",
|
|
(int)sizeof(kernel_subgroup_count), (int)realSize);
|
|
return TEST_FAIL;
|
|
}
|
|
|
|
// Verify that the kernel gets the same max_subgroup_size and subgroup_count
|
|
out = clCreateBuffer(context, CL_MEM_WRITE_ONLY, sizeof(result), NULL,
|
|
&error);
|
|
test_error(error, "clCreateBuffer failed");
|
|
|
|
error = clSetKernelArg(kernel, 0, sizeof(out), &out);
|
|
test_error(error, "clSetKernelArg failed");
|
|
|
|
error = clEnqueueNDRangeKernel(queue, kernel, max_dimensions, NULL, global,
|
|
local, 0, NULL, NULL);
|
|
test_error(error, "clEnqueueNDRangeKernel failed");
|
|
|
|
error = clEnqueueReadBuffer(queue, out, CL_FALSE, 0, sizeof(result),
|
|
&result, 0, NULL, NULL);
|
|
test_error(error, "clEnqueueReadBuffer failed");
|
|
|
|
error = clFinish(queue);
|
|
test_error(error, "clFinish failed");
|
|
|
|
for (i = 0; i < (int)gsize0; ++i)
|
|
{
|
|
if (result[i].maxSubGroupSize != (cl_uint)kernel_max_subgroup_size)
|
|
{
|
|
log_error("ERROR: get_max_subgroup_size() doesn't match result "
|
|
"from clGetKernelSubGroupInfoKHR, %u vs %u\n",
|
|
result[i].maxSubGroupSize,
|
|
(cl_uint)kernel_max_subgroup_size);
|
|
return -1;
|
|
}
|
|
if (result[i].numSubGroups != (cl_uint)kernel_subgroup_count)
|
|
{
|
|
log_error("ERROR: get_num_sub_groups() doesn't match result from "
|
|
"clGetKernelSubGroupInfoKHR, %u vs %u\n",
|
|
result[i].numSubGroups, (cl_uint)kernel_subgroup_count);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int test_sub_group_info_core(cl_device_id device, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
return test_sub_group_info(device, context, queue, num_elements, true);
|
|
}
|
|
|
|
int test_sub_group_info_ext(cl_device_id device, cl_context context,
|
|
cl_command_queue queue, int num_elements)
|
|
{
|
|
bool hasExtension = is_extension_available(device, "cl_khr_subgroups");
|
|
|
|
if (!hasExtension)
|
|
{
|
|
log_info(
|
|
"Device does not support 'cl_khr_subgroups'. Skipping the test.\n");
|
|
return TEST_SKIPPED_ITSELF;
|
|
}
|
|
|
|
return test_sub_group_info(device, context, queue, num_elements, false);
|
|
} |