You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
315 lines
11 KiB
315 lines
11 KiB
//
|
|
// Copyright (c) 2017 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#include "harness/compat.h"
|
|
|
|
#include <stdio.h>
|
|
#include <vector>
|
|
#include <sstream>
|
|
#include "harness/testHarness.h"
|
|
#include "harness/kernelHelpers.h"
|
|
|
|
#include "common.h"
|
|
|
|
// SVM Atomic wrappers.
|
|
// Platforms that support SVM atomics (atomics that work across the host and devices) need to implement these host side functions correctly.
|
|
// Platforms that do not support SVM atomics can simpy implement these functions as empty stubs since the functions will not be called.
|
|
// For now only Windows x86 is implemented, add support for other platforms as needed.
|
|
cl_int AtomicLoadExplicit(volatile cl_int * pValue, cl_memory_order order)
|
|
{
|
|
#if (defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64))) || (defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)))
|
|
return *pValue; // provided the value is aligned x86 doesn't need anything more than this for seq_cst.
|
|
#elif defined(__GNUC__)
|
|
return __sync_add_and_fetch(pValue, 0);
|
|
#else
|
|
log_error("ERROR: AtomicLoadExplicit function not implemented\n");
|
|
return -1;
|
|
#endif
|
|
}
|
|
// all the x86 atomics are seq_cst, so don't need to do anything with the memory order parameter.
|
|
cl_int AtomicFetchAddExplicit(volatile cl_int *object, cl_int operand, cl_memory_order o)
|
|
{
|
|
#if (defined(_WIN32) || defined(_WIN64)) && defined(_MSC_VER)
|
|
return InterlockedExchangeAdd( (volatile LONG*) object, operand);
|
|
#elif defined(__GNUC__)
|
|
return __sync_fetch_and_add(object, operand);
|
|
#else
|
|
log_error("ERROR: AtomicFetchAddExplicit function not implemented\n");
|
|
return -1;
|
|
#endif
|
|
}
|
|
|
|
cl_int AtomicExchangeExplicit(volatile cl_int *object, cl_int desired, cl_memory_order mo)
|
|
{
|
|
#if (defined(_WIN32) || defined(_WIN64)) && defined(_MSC_VER)
|
|
return InterlockedExchange( (volatile LONG*) object, desired);
|
|
#elif defined(__GNUC__)
|
|
return __sync_lock_test_and_set(object, desired);
|
|
#else
|
|
log_error("ERROR: AtomicExchangeExplicit function not implemented\n");
|
|
return -1;
|
|
#endif
|
|
}
|
|
|
|
|
|
const char *linked_list_create_and_verify_kernels[] = {
|
|
"typedef struct Node {\n"
|
|
" int global_id;\n"
|
|
" int position_in_list;\n"
|
|
" __global struct Node* pNext;\n"
|
|
"} Node;\n"
|
|
"\n"
|
|
// The allocation_index parameter must be initialized on the host to N work-items
|
|
// The first N nodes in pNodes will be the heads of the lists.
|
|
"__kernel void create_linked_lists(__global Node* pNodes, volatile __attribute__((nosvm)) __global int* allocation_index, int list_length)\n"
|
|
"{\n"
|
|
" size_t i = get_global_id(0);\n"
|
|
" __global Node *pNode = &pNodes[i];\n"
|
|
"\n"
|
|
" pNode->global_id = i;\n"
|
|
" pNode->position_in_list = 0;\n"
|
|
"\n"
|
|
" __global Node *pNew;\n"
|
|
" for(int j=1; j < list_length; j++)\n"
|
|
" {\n"
|
|
" pNew = &pNodes[ atomic_inc(allocation_index) ];// allocate a new node\n"
|
|
" pNew->global_id = i;\n"
|
|
" pNew->position_in_list = j;\n"
|
|
" pNode->pNext = pNew; // link new node onto end of list\n"
|
|
" pNode = pNew; // move to end of list\n"
|
|
" }\n"
|
|
"}\n"
|
|
|
|
"__kernel void verify_linked_lists(__global Node* pNodes, volatile __global uint* num_correct, int list_length)\n"
|
|
"{\n"
|
|
" size_t i = get_global_id(0);\n"
|
|
" __global Node *pNode = &pNodes[i];\n"
|
|
"\n"
|
|
" for(int j=0; j < list_length; j++)\n"
|
|
" {\n"
|
|
" if( pNode->global_id == i && pNode->position_in_list == j)\n"
|
|
" {\n"
|
|
" atomic_inc(num_correct);\n"
|
|
" } \n"
|
|
" else {\n"
|
|
" break;\n"
|
|
" }\n"
|
|
" pNode = pNode->pNext;\n"
|
|
" }\n"
|
|
"}\n"
|
|
};
|
|
|
|
|
|
// The first N nodes in pNodes will be the heads of the lists.
|
|
void create_linked_lists(Node* pNodes, size_t num_lists, int list_length)
|
|
{
|
|
size_t allocation_index = num_lists; // heads of lists are in first num_lists nodes.
|
|
|
|
for(cl_uint i = 0; i < num_lists; i++)
|
|
{
|
|
Node *pNode = &pNodes[i];
|
|
pNode->global_id = i;
|
|
pNode->position_in_list = 0;
|
|
Node *pNew;
|
|
for(int j=1; j < list_length; j++)
|
|
{
|
|
pNew = &pNodes[ allocation_index++ ];// allocate a new node
|
|
pNew->global_id = i;
|
|
pNew->position_in_list = j;
|
|
pNode->pNext = pNew; // link new node onto end of list
|
|
pNode = pNew; // move to end of list
|
|
}
|
|
}
|
|
}
|
|
|
|
cl_int verify_linked_lists(Node* pNodes, size_t num_lists, int list_length)
|
|
{
|
|
cl_int error = CL_SUCCESS;
|
|
int numCorrect = 0;
|
|
|
|
log_info(" and verifying on host ");
|
|
for(cl_uint i=0; i < num_lists; i++)
|
|
{
|
|
Node *pNode = &pNodes[i];
|
|
for(int j=0; j < list_length; j++)
|
|
{
|
|
if( pNode->global_id == i && pNode->position_in_list == j)
|
|
{
|
|
numCorrect++;
|
|
}
|
|
else {
|
|
break;
|
|
}
|
|
pNode = pNode->pNext;
|
|
}
|
|
}
|
|
if(numCorrect != list_length * (cl_uint)num_lists)
|
|
{
|
|
error = -1;
|
|
log_info("Failed\n");
|
|
}
|
|
else
|
|
log_info("Passed\n");
|
|
|
|
return error;
|
|
}
|
|
|
|
// Note that we don't use the context provided by the test harness since it doesn't support multiple devices,
|
|
// so we create are own context here that has all devices, we use the same platform that the harness used.
|
|
cl_int create_cl_objects(cl_device_id device_from_harness, const char** ppCodeString, cl_context* context, cl_program *program, cl_command_queue *queues, cl_uint *num_devices, cl_device_svm_capabilities required_svm_caps, std::vector<std::string> extensions_list)
|
|
{
|
|
cl_int error;
|
|
|
|
cl_platform_id platform_id;
|
|
// find out what platform the harness is using.
|
|
error = clGetDeviceInfo(device_from_harness, CL_DEVICE_PLATFORM,sizeof(cl_platform_id),&platform_id,NULL);
|
|
test_error(error,"clGetDeviceInfo failed");
|
|
|
|
error = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, 0, NULL, num_devices );
|
|
test_error(error, "clGetDeviceIDs failed");
|
|
|
|
std::vector<cl_device_id> devicesTmp(*num_devices), devices, capable_devices;
|
|
|
|
error = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_ALL, *num_devices, &devicesTmp[0], NULL );
|
|
test_error(error, "clGetDeviceIDs failed");
|
|
|
|
devices.push_back(device_from_harness);
|
|
for (size_t i = 0; i < devicesTmp.size(); ++i)
|
|
{
|
|
if (device_from_harness != devicesTmp[i])
|
|
devices.push_back(devicesTmp[i]);
|
|
}
|
|
|
|
// Select only the devices that support the SVM level needed for the test.
|
|
// Note that if requested SVM capabilities are not supported by any device then the test still passes (even though it does not execute).
|
|
cl_device_svm_capabilities caps;
|
|
cl_uint num_capable_devices = 0;
|
|
for(cl_uint i = 0; i < *num_devices; i++)
|
|
{
|
|
Version version = get_device_cl_version(devices[i]);
|
|
|
|
if(device_from_harness != devices[i] && version < Version(2,0))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
error = clGetDeviceInfo(devices[i], CL_DEVICE_SVM_CAPABILITIES, sizeof(cl_device_svm_capabilities), &caps, NULL);
|
|
test_error(error,"clGetDeviceInfo failed for CL_DEVICE_SVM_CAPABILITIES");
|
|
if(caps & (~(CL_DEVICE_SVM_COARSE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_BUFFER | CL_DEVICE_SVM_FINE_GRAIN_SYSTEM | CL_DEVICE_SVM_ATOMICS)))
|
|
{
|
|
log_error("clGetDeviceInfo returned an invalid cl_device_svm_capabilities value");
|
|
return -1;
|
|
}
|
|
bool extensions_supported = true;
|
|
for (auto extension : extensions_list)
|
|
{
|
|
if (!is_extension_available(devices[i], extension.c_str()))
|
|
{
|
|
log_error("Required extension not found - device id %d - %s\n", i,
|
|
extension.c_str());
|
|
extensions_supported = false;
|
|
break;
|
|
}
|
|
}
|
|
if((caps & required_svm_caps) == required_svm_caps && extensions_supported)
|
|
{
|
|
capable_devices.push_back(devices[i]);
|
|
++num_capable_devices;
|
|
}
|
|
}
|
|
devices = capable_devices; // the only devices we care about from here on are the ones capable of supporting the requested SVM level.
|
|
*num_devices = num_capable_devices;
|
|
if(num_capable_devices == 0)
|
|
// if(svm_level > CL_DEVICE_COARSE_SVM && 0 == num_capable_devices)
|
|
{
|
|
log_info("Requested SVM level or required extensions not supported by any device on this platform, test not executed.\n");
|
|
return 1; // 1 indicates do not execute, but counts as passing.
|
|
}
|
|
|
|
cl_context_properties context_properties[3] = {CL_CONTEXT_PLATFORM, (cl_context_properties)platform_id, 0 };
|
|
*context = clCreateContext(context_properties, *num_devices, &devices[0], NULL, NULL, &error);
|
|
test_error(error, "Unable to create context" );
|
|
|
|
// *queues = (cl_command_queue *) malloc( *num_devices * sizeof( cl_command_queue ) );
|
|
|
|
for(cl_uint i = 0; i < *num_devices; i++)
|
|
{
|
|
queues[i] = clCreateCommandQueueWithProperties(*context, devices[i], 0, &error);
|
|
test_error(error, "clCreateCommandQueue failed");
|
|
}
|
|
|
|
if (ppCodeString)
|
|
{
|
|
error =
|
|
create_single_kernel_helper(*context, program, 0, 1, ppCodeString, 0);
|
|
test_error(error, "failed to create program");
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
test_definition test_list[] = {
|
|
ADD_TEST( svm_byte_granularity),
|
|
ADD_TEST( svm_set_kernel_exec_info_svm_ptrs ),
|
|
ADD_TEST( svm_fine_grain_memory_consistency ),
|
|
ADD_TEST( svm_fine_grain_sync_buffers ),
|
|
ADD_TEST( svm_shared_address_space_fine_grain ),
|
|
ADD_TEST( svm_shared_sub_buffers ),
|
|
ADD_TEST( svm_shared_address_space_fine_grain_buffers ),
|
|
ADD_TEST( svm_allocate_shared_buffer ),
|
|
ADD_TEST( svm_shared_address_space_coarse_grain_old_api ),
|
|
ADD_TEST( svm_shared_address_space_coarse_grain_new_api ),
|
|
ADD_TEST( svm_cross_buffer_pointers_coarse_grain ),
|
|
ADD_TEST( svm_pointer_passing ),
|
|
ADD_TEST( svm_enqueue_api ),
|
|
ADD_TEST_VERSION( svm_migrate, Version(2, 1)),
|
|
};
|
|
|
|
const int test_num = ARRAY_SIZE( test_list );
|
|
|
|
test_status InitCL(cl_device_id device) {
|
|
auto version = get_device_cl_version(device);
|
|
auto expected_min_version = Version(2, 0);
|
|
if (version < expected_min_version)
|
|
{
|
|
version_expected_info("Test", "OpenCL",
|
|
expected_min_version.to_string().c_str(),
|
|
version.to_string().c_str());
|
|
return TEST_SKIP;
|
|
}
|
|
|
|
int error;
|
|
cl_device_svm_capabilities svm_caps;
|
|
error = clGetDeviceInfo(device, CL_DEVICE_SVM_CAPABILITIES,
|
|
sizeof(svm_caps), &svm_caps, NULL);
|
|
if (error != CL_SUCCESS) {
|
|
print_error(error, "Unable to get svm capabilities");
|
|
return TEST_FAIL;
|
|
}
|
|
|
|
if ((svm_caps == 0) && (version >= Version(3, 0)))
|
|
{
|
|
return TEST_SKIP;
|
|
}
|
|
|
|
return TEST_PASS;
|
|
}
|
|
|
|
int main(int argc, const char *argv[])
|
|
{
|
|
return runTestHarnessWithCheck(argc, argv, test_num, test_list, true, 0, InitCL);
|
|
}
|
|
|