You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
442 lines
14 KiB
442 lines
14 KiB
//
|
|
// Copyright (c) 2017 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#include "cl_utils.h"
|
|
#include <stdlib.h>
|
|
|
|
#if !defined (_WIN32)
|
|
#include <sys/mman.h>
|
|
#endif
|
|
|
|
#include "test_config.h"
|
|
#include "string.h"
|
|
#include "harness/kernelHelpers.h"
|
|
|
|
#include "harness/testHarness.h"
|
|
|
|
#define HALF_MIN 1.0p-14
|
|
|
|
|
|
const char *vector_size_name_extensions[kVectorSizeCount+kStrangeVectorSizeCount] = { "", "2", "4", "8", "16", "3" };
|
|
const char *vector_size_strings[kVectorSizeCount+kStrangeVectorSizeCount] = { "1", "2", "4", "8", "16", "3" };
|
|
const char *align_divisors[kVectorSizeCount+kStrangeVectorSizeCount] = { "1", "2", "4", "8", "16", "4" };
|
|
const char *align_types[kVectorSizeCount+kStrangeVectorSizeCount] = { "half", "int", "int2", "int4", "int8", "int2" };
|
|
|
|
|
|
void *gIn_half = NULL;
|
|
void *gOut_half = NULL;
|
|
void *gOut_half_reference = NULL;
|
|
void *gOut_half_reference_double = NULL;
|
|
void *gIn_single = NULL;
|
|
void *gOut_single = NULL;
|
|
void *gOut_single_reference = NULL;
|
|
void *gIn_double = NULL;
|
|
// void *gOut_double = NULL;
|
|
// void *gOut_double_reference = NULL;
|
|
cl_mem gInBuffer_half = NULL;
|
|
cl_mem gOutBuffer_half = NULL;
|
|
cl_mem gInBuffer_single = NULL;
|
|
cl_mem gOutBuffer_single = NULL;
|
|
cl_mem gInBuffer_double = NULL;
|
|
// cl_mem gOutBuffer_double = NULL;
|
|
|
|
cl_context gContext = NULL;
|
|
cl_command_queue gQueue = NULL;
|
|
uint32_t gDeviceFrequency = 0;
|
|
uint32_t gComputeDevices = 0;
|
|
size_t gMaxThreadGroupSize = 0;
|
|
size_t gWorkGroupSize = 0;
|
|
bool gWimpyMode = false;
|
|
int gWimpyReductionFactor = 512;
|
|
int gTestDouble = 0;
|
|
|
|
#if defined( __APPLE__ )
|
|
int gReportTimes = 1;
|
|
#else
|
|
int gReportTimes = 0;
|
|
#endif
|
|
|
|
#pragma mark -
|
|
|
|
test_status InitCL( cl_device_id device )
|
|
{
|
|
size_t configSize = sizeof( gComputeDevices );
|
|
int error;
|
|
|
|
#if MULTITHREAD
|
|
if( (error = clGetDeviceInfo( device, CL_DEVICE_MAX_COMPUTE_UNITS, configSize, &gComputeDevices, NULL )) )
|
|
#endif
|
|
gComputeDevices = 1;
|
|
|
|
configSize = sizeof( gMaxThreadGroupSize );
|
|
if( (error = clGetDeviceInfo( device, CL_DEVICE_MAX_WORK_GROUP_SIZE, configSize, &gMaxThreadGroupSize, NULL )) )
|
|
gMaxThreadGroupSize = 1;
|
|
|
|
// Use only one-eighth the work group size
|
|
if (gMaxThreadGroupSize > 8)
|
|
gWorkGroupSize = gMaxThreadGroupSize / 8;
|
|
else
|
|
gWorkGroupSize = gMaxThreadGroupSize;
|
|
|
|
configSize = sizeof( gDeviceFrequency );
|
|
if( (error = clGetDeviceInfo( device, CL_DEVICE_MAX_CLOCK_FREQUENCY, configSize, &gDeviceFrequency, NULL )) )
|
|
gDeviceFrequency = 1;
|
|
|
|
// Check extensions
|
|
int hasDouble = is_extension_available(device, "cl_khr_fp64");
|
|
gTestDouble ^= hasDouble;
|
|
|
|
//detect whether profile of the device is embedded
|
|
char profile[64] = "";
|
|
if( (error = clGetDeviceInfo( device, CL_DEVICE_PROFILE, sizeof(profile), profile, NULL ) ) )
|
|
{
|
|
vlog_error( "Unable to get device CL DEVICE PROFILE string. (%d) \n", error );
|
|
}
|
|
else if( strstr(profile, "EMBEDDED_PROFILE" ) )
|
|
{
|
|
gIsEmbedded = 1;
|
|
}
|
|
|
|
vlog( "%d compute devices at %f GHz\n", gComputeDevices, (double) gDeviceFrequency / 1000. );
|
|
vlog( "Max thread group size is %lld.\n", (uint64_t) gMaxThreadGroupSize );
|
|
|
|
gContext = clCreateContext( NULL, 1, &device, notify_callback, NULL, &error );
|
|
if( NULL == gContext )
|
|
{
|
|
vlog_error( "clCreateDeviceGroup failed. (%d)\n", error );
|
|
return TEST_FAIL;
|
|
}
|
|
|
|
gQueue = clCreateCommandQueue(gContext, device, 0, &error);
|
|
if( NULL == gQueue )
|
|
{
|
|
vlog_error( "clCreateCommandQueue failed. (%d)\n", error );
|
|
return TEST_FAIL;
|
|
}
|
|
|
|
#if defined( __APPLE__ )
|
|
// FIXME: use clProtectedArray
|
|
#endif
|
|
//Allocate buffers
|
|
gIn_half = malloc( getBufferSize(device)/2 );
|
|
gOut_half = malloc( BUFFER_SIZE/2 );
|
|
gOut_half_reference = malloc( BUFFER_SIZE/2 );
|
|
gOut_half_reference_double = malloc( BUFFER_SIZE/2 );
|
|
gIn_single = malloc( BUFFER_SIZE );
|
|
gOut_single = malloc( getBufferSize(device) );
|
|
gOut_single_reference = malloc( getBufferSize(device) );
|
|
gIn_double = malloc( 2*BUFFER_SIZE );
|
|
// gOut_double = malloc( (2*getBufferSize(device)) );
|
|
// gOut_double_reference = malloc( (2*getBufferSize(device)) );
|
|
|
|
if ( NULL == gIn_half ||
|
|
NULL == gOut_half ||
|
|
NULL == gOut_half_reference ||
|
|
NULL == gOut_half_reference_double ||
|
|
NULL == gIn_single ||
|
|
NULL == gOut_single ||
|
|
NULL == gOut_single_reference ||
|
|
NULL == gIn_double // || NULL == gOut_double || NULL == gOut_double_reference
|
|
)
|
|
return TEST_FAIL;
|
|
|
|
gInBuffer_half = clCreateBuffer(gContext, CL_MEM_READ_ONLY, getBufferSize(device) / 2, NULL, &error);
|
|
if( gInBuffer_half == NULL )
|
|
{
|
|
vlog_error( "clCreateArray failed for input (%d)\n", error );
|
|
return TEST_FAIL;
|
|
}
|
|
|
|
gInBuffer_single = clCreateBuffer(gContext, CL_MEM_READ_ONLY, BUFFER_SIZE, NULL, &error );
|
|
if( gInBuffer_single == NULL )
|
|
{
|
|
vlog_error( "clCreateArray failed for input (%d)\n", error );
|
|
return TEST_FAIL;
|
|
}
|
|
|
|
gInBuffer_double = clCreateBuffer(gContext, CL_MEM_READ_ONLY, BUFFER_SIZE*2, NULL, &error );
|
|
if( gInBuffer_double == NULL )
|
|
{
|
|
vlog_error( "clCreateArray failed for input (%d)\n", error );
|
|
return TEST_FAIL;
|
|
}
|
|
|
|
gOutBuffer_half = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, BUFFER_SIZE/2, NULL, &error );
|
|
if( gOutBuffer_half == NULL )
|
|
{
|
|
vlog_error( "clCreateArray failed for output (%d)\n", error );
|
|
return TEST_FAIL;
|
|
}
|
|
|
|
gOutBuffer_single = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, getBufferSize(device), NULL, &error );
|
|
if( gOutBuffer_single == NULL )
|
|
{
|
|
vlog_error( "clCreateArray failed for output (%d)\n", error );
|
|
return TEST_FAIL;
|
|
}
|
|
|
|
#if 0
|
|
gOutBuffer_double = clCreateBuffer(gContext, CL_MEM_WRITE_ONLY, (size_t)(2*getBufferSize(device)), NULL, &error );
|
|
if( gOutBuffer_double == NULL )
|
|
{
|
|
vlog_error( "clCreateArray failed for output (%d)\n", error );
|
|
return TEST_FAIL;
|
|
}
|
|
#endif
|
|
|
|
char string[16384];
|
|
vlog( "\nCompute Device info:\n" );
|
|
error = clGetDeviceInfo(device, CL_DEVICE_NAME, sizeof(string), string, NULL);
|
|
vlog( "\tDevice Name: %s\n", string );
|
|
error = clGetDeviceInfo(device, CL_DEVICE_VENDOR, sizeof(string), string, NULL);
|
|
vlog( "\tVendor: %s\n", string );
|
|
error = clGetDeviceInfo(device, CL_DEVICE_VERSION, sizeof(string), string, NULL);
|
|
vlog( "\tDevice Version: %s\n", string );
|
|
error = clGetDeviceInfo(device, CL_DEVICE_OPENCL_C_VERSION, sizeof(string), string, NULL);
|
|
vlog( "\tOpenCL C Version: %s\n", string );
|
|
error = clGetDeviceInfo(device, CL_DRIVER_VERSION, sizeof(string), string, NULL);
|
|
vlog( "\tDriver Version: %s\n", string );
|
|
vlog( "\tProcessing with %d devices\n", gComputeDevices );
|
|
vlog( "\tDevice Frequency: %d MHz\n", gDeviceFrequency );
|
|
vlog( "\tHas double? %s\n", hasDouble ? "YES" : "NO" );
|
|
vlog( "\tTest double? %s\n", gTestDouble ? "YES" : "NO" );
|
|
|
|
return TEST_PASS;
|
|
}
|
|
|
|
cl_program MakeProgram( cl_device_id device, const char *source[], int count )
|
|
{
|
|
int error;
|
|
int i;
|
|
|
|
//create the program
|
|
cl_program program;
|
|
error = create_single_kernel_helper_create_program(gContext, &program, (cl_uint)count, source);
|
|
if( NULL == program )
|
|
{
|
|
vlog_error( "\t\tFAILED -- Failed to create program. (%d)\n", error );
|
|
return NULL;
|
|
}
|
|
|
|
// build it
|
|
if( (error = clBuildProgram( program, 1, &device, NULL, NULL, NULL )) )
|
|
{
|
|
size_t len;
|
|
char buffer[16384];
|
|
|
|
vlog_error("\t\tFAILED -- clBuildProgramExecutable() failed:\n");
|
|
clGetProgramBuildInfo(program, device, CL_PROGRAM_BUILD_LOG, sizeof(buffer), buffer, &len);
|
|
vlog_error("Log: %s\n", buffer);
|
|
vlog_error("Source :\n");
|
|
for(i = 0; i < count; ++i) {
|
|
vlog_error("%s", source[i]);
|
|
}
|
|
vlog_error("\n");
|
|
|
|
clReleaseProgram( program );
|
|
return NULL;
|
|
}
|
|
|
|
return program;
|
|
}
|
|
|
|
void ReleaseCL(void)
|
|
{
|
|
clReleaseMemObject(gInBuffer_half);
|
|
clReleaseMemObject(gOutBuffer_half);
|
|
clReleaseMemObject(gInBuffer_single);
|
|
clReleaseMemObject(gOutBuffer_single);
|
|
clReleaseMemObject(gInBuffer_double);
|
|
// clReleaseMemObject(gOutBuffer_double);
|
|
clReleaseCommandQueue(gQueue);
|
|
clReleaseContext(gContext);
|
|
|
|
free(gIn_half);
|
|
free(gOut_half);
|
|
free(gOut_half_reference);
|
|
free(gOut_half_reference_double);
|
|
free(gIn_single);
|
|
free(gOut_single);
|
|
free(gOut_single_reference);
|
|
free(gIn_double);
|
|
}
|
|
|
|
cl_uint numVecs(cl_uint count, int vectorSizeIdx, bool aligned) {
|
|
if(aligned && g_arrVecSizes[vectorSizeIdx] == 3) {
|
|
return count/4;
|
|
}
|
|
return (count + g_arrVecSizes[vectorSizeIdx] - 1)/
|
|
( (g_arrVecSizes[vectorSizeIdx]) );
|
|
}
|
|
|
|
cl_uint runsOverBy(cl_uint count, int vectorSizeIdx, bool aligned) {
|
|
if(aligned || g_arrVecSizes[vectorSizeIdx] != 3) { return -1; }
|
|
return count% (g_arrVecSizes[vectorSizeIdx]);
|
|
}
|
|
|
|
void printSource(const char * src[], int len) {
|
|
int i;
|
|
for(i = 0; i < len; ++i) {
|
|
vlog("%s", src[i]);
|
|
}
|
|
}
|
|
|
|
int RunKernel( cl_device_id device, cl_kernel kernel, void *inBuf, void *outBuf, uint32_t blockCount , int extraArg)
|
|
{
|
|
size_t localCount = blockCount;
|
|
size_t wg_size;
|
|
int error;
|
|
|
|
error = clSetKernelArg(kernel, 0, sizeof inBuf, &inBuf);
|
|
error |= clSetKernelArg(kernel, 1, sizeof outBuf, &outBuf);
|
|
|
|
if(extraArg >= 0) {
|
|
error |= clSetKernelArg(kernel, 2, sizeof(cl_uint), &extraArg);
|
|
}
|
|
|
|
if( error )
|
|
{
|
|
vlog_error( "FAILED -- could not set kernel args\n" );
|
|
return -3;
|
|
}
|
|
|
|
error = clGetKernelWorkGroupInfo(kernel, device, CL_KERNEL_WORK_GROUP_SIZE, sizeof( wg_size ), &wg_size, NULL);
|
|
if (error)
|
|
{
|
|
vlog_error( "FAILED -- could not get kernel work group info\n" );
|
|
return -4;
|
|
}
|
|
|
|
wg_size = (wg_size > gWorkGroupSize) ? gWorkGroupSize : wg_size;
|
|
while( localCount % wg_size )
|
|
wg_size--;
|
|
|
|
if( (error = clEnqueueNDRangeKernel( gQueue, kernel, 1, NULL, &localCount, &wg_size, 0, NULL, NULL )) )
|
|
{
|
|
vlog_error( "FAILED -- could not execute kernel\n" );
|
|
return -5;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
#if defined (__APPLE__ )
|
|
|
|
#include <mach/mach_time.h>
|
|
|
|
uint64_t ReadTime( void )
|
|
{
|
|
return mach_absolute_time(); // returns time since boot. Ticks have better than microsecond precsion.
|
|
}
|
|
|
|
double SubtractTime( uint64_t endTime, uint64_t startTime )
|
|
{
|
|
static double conversion = 0.0;
|
|
|
|
if( 0.0 == conversion )
|
|
{
|
|
mach_timebase_info_data_t info;
|
|
kern_return_t err = mach_timebase_info( &info );
|
|
if( 0 == err )
|
|
conversion = 1e-9 * (double) info.numer / (double) info.denom;
|
|
}
|
|
|
|
return (double) (endTime - startTime) * conversion;
|
|
}
|
|
|
|
#elif defined( _WIN32 ) && defined (_MSC_VER)
|
|
|
|
// functions are defined in compat.h
|
|
|
|
#else
|
|
|
|
//
|
|
// Please feel free to substitute your own timing facility here.
|
|
//
|
|
|
|
#warning Times are meaningless. No timing facility in place for this platform.
|
|
uint64_t ReadTime( void )
|
|
{
|
|
return 0ULL;
|
|
}
|
|
|
|
// return the difference between two times obtained from ReadTime in seconds
|
|
double SubtractTime( uint64_t endTime, uint64_t startTime )
|
|
{
|
|
return INFINITY;
|
|
}
|
|
|
|
#endif
|
|
|
|
size_t getBufferSize(cl_device_id device_id)
|
|
{
|
|
static int s_initialized = 0;
|
|
static cl_device_id s_device_id;
|
|
static cl_ulong s_result = 64*1024;
|
|
|
|
if(s_initialized == 0 || s_device_id != device_id)
|
|
{
|
|
cl_ulong result, maxGlobalSize;
|
|
cl_int err = clGetDeviceInfo (device_id,
|
|
CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE,
|
|
sizeof(result), (void *)&result,
|
|
NULL);
|
|
if(err)
|
|
{
|
|
vlog_error("clGetDeviceInfo(CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE) failed\n");
|
|
s_result = 64*1024;
|
|
goto exit;
|
|
}
|
|
if (result > BUFFER_SIZE)
|
|
result = BUFFER_SIZE;
|
|
log_info("Using const buffer size 0x%lx (%lu)\n", (unsigned long)result, (unsigned long)result);
|
|
err = clGetDeviceInfo (device_id,
|
|
CL_DEVICE_GLOBAL_MEM_SIZE,
|
|
sizeof(maxGlobalSize), (void *)&maxGlobalSize,
|
|
NULL);
|
|
if(err)
|
|
{
|
|
vlog_error("clGetDeviceInfo(CL_DEVICE_GLOBAL_MEM_SIZE) failed\n");
|
|
goto exit;
|
|
}
|
|
result = result / 2;
|
|
if(maxGlobalSize < result * 10)
|
|
result = result / 10;
|
|
s_initialized = 1;
|
|
s_device_id = device_id;
|
|
s_result = result;
|
|
}
|
|
|
|
exit:
|
|
if( s_result > SIZE_MAX )
|
|
{
|
|
vlog_error( "ERROR: clGetDeviceInfo is reporting a CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE larger than addressable memory on the host.\n It seems highly unlikely that this is usable, due to the API design.\n" );
|
|
fflush(stdout);
|
|
abort();
|
|
}
|
|
|
|
return (size_t) s_result;
|
|
}
|
|
|
|
cl_ulong getBufferCount(cl_device_id device_id, size_t vecSize, size_t typeSize)
|
|
{
|
|
cl_ulong tmp = getBufferSize(device_id);
|
|
if(vecSize == 3)
|
|
{
|
|
return tmp/(cl_ulong)(4*typeSize);
|
|
}
|
|
return tmp/(cl_ulong)(vecSize*typeSize);
|
|
}
|