You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1129 lines
43 KiB
1129 lines
43 KiB
//
|
|
// Copyright (c) 2017 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#include "testBase.h"
|
|
#include "harness/conversions.h"
|
|
#ifndef _WIN32
|
|
#include <unistd.h>
|
|
#endif
|
|
|
|
#define INT_TEST_VALUE 402258822
|
|
#define LONG_TEST_VALUE 515154531254381446LL
|
|
|
|
|
|
const char *atomic_global_pattern[] = {
|
|
"__kernel void test_atomic_fn(volatile __global %s *destMemory, __global %s *oldValues)\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
"\n"
|
|
,
|
|
"\n"
|
|
"}\n" };
|
|
|
|
const char *atomic_local_pattern[] = {
|
|
"__kernel void test_atomic_fn(__global %s *finalDest, __global %s *oldValues, volatile __local %s *destMemory, int numDestItems )\n"
|
|
"{\n"
|
|
" int tid = get_global_id(0);\n"
|
|
" int dstItemIdx;\n"
|
|
"\n"
|
|
" // Everybody does the following line(s), but it all has the same result. We still need to ensure we sync before the atomic op, though\n"
|
|
" for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
|
|
" destMemory[ dstItemIdx ] = finalDest[ dstItemIdx ];\n"
|
|
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
|
"\n"
|
|
,
|
|
" barrier( CLK_LOCAL_MEM_FENCE );\n"
|
|
" // Finally, write out the last value. Again, we're synced, so everyone will be writing the same value\n"
|
|
" for( dstItemIdx = 0; dstItemIdx < numDestItems; dstItemIdx++ )\n"
|
|
" finalDest[ dstItemIdx ] = destMemory[ dstItemIdx ];\n"
|
|
"}\n" };
|
|
|
|
|
|
#define TEST_COUNT 128 * 1024
|
|
|
|
|
|
struct TestFns
|
|
{
|
|
cl_int mIntStartValue;
|
|
cl_long mLongStartValue;
|
|
|
|
size_t (*NumResultsFn)( size_t threadSize, ExplicitType dataType );
|
|
|
|
// Integer versions
|
|
cl_int (*ExpectedValueIntFn)( size_t size, cl_int *startRefValues, size_t whichDestValue );
|
|
void (*GenerateRefsIntFn)( size_t size, cl_int *startRefValues, MTdata d );
|
|
bool (*VerifyRefsIntFn)( size_t size, cl_int *refValues, cl_int finalValue );
|
|
|
|
// Long versions
|
|
cl_long (*ExpectedValueLongFn)( size_t size, cl_long *startRefValues, size_t whichDestValue );
|
|
void (*GenerateRefsLongFn)( size_t size, cl_long *startRefValues, MTdata d );
|
|
bool (*VerifyRefsLongFn)( size_t size, cl_long *refValues, cl_long finalValue );
|
|
|
|
// Float versions
|
|
cl_float (*ExpectedValueFloatFn)( size_t size, cl_float *startRefValues, size_t whichDestValue );
|
|
void (*GenerateRefsFloatFn)( size_t size, cl_float *startRefValues, MTdata d );
|
|
bool (*VerifyRefsFloatFn)( size_t size, cl_float *refValues, cl_float finalValue );
|
|
};
|
|
|
|
bool check_atomic_support( cl_device_id device, bool extended, bool isLocal, ExplicitType dataType )
|
|
{
|
|
const char *extensionNames[8] = {
|
|
"cl_khr_global_int32_base_atomics", "cl_khr_global_int32_extended_atomics",
|
|
"cl_khr_local_int32_base_atomics", "cl_khr_local_int32_extended_atomics",
|
|
"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics",
|
|
"cl_khr_int64_base_atomics", "cl_khr_int64_extended_atomics" // this line intended to be the same as the last one
|
|
};
|
|
|
|
size_t index = 0;
|
|
if( extended )
|
|
index += 1;
|
|
if( isLocal )
|
|
index += 2;
|
|
|
|
Version version = get_device_cl_version(device);
|
|
|
|
switch (dataType)
|
|
{
|
|
case kInt:
|
|
case kUInt:
|
|
if( version >= Version(1,1) )
|
|
return 1;
|
|
break;
|
|
case kLong:
|
|
case kULong:
|
|
index += 4;
|
|
break;
|
|
case kFloat: // this has to stay separate since the float atomics arent in the 1.0 extensions
|
|
return version >= Version(1,1);
|
|
default:
|
|
log_error( "ERROR: Unsupported data type (%d) in check_atomic_support\n", dataType );
|
|
return 0;
|
|
}
|
|
|
|
return is_extension_available( device, extensionNames[index] );
|
|
}
|
|
|
|
int test_atomic_function(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const char *programCore,
|
|
TestFns testFns,
|
|
bool extended, bool isLocal, ExplicitType dataType, bool matchGroupSize )
|
|
{
|
|
clProgramWrapper program;
|
|
clKernelWrapper kernel;
|
|
int error;
|
|
size_t threads[1];
|
|
clMemWrapper streams[2];
|
|
void *refValues, *startRefValues;
|
|
size_t threadSize, groupSize;
|
|
const char *programLines[4];
|
|
char pragma[ 512 ];
|
|
char programHeader[ 512 ];
|
|
MTdata d;
|
|
size_t typeSize = get_explicit_type_size( dataType );
|
|
|
|
|
|
// Verify we can run first
|
|
bool isUnsigned = ( dataType == kULong ) || ( dataType == kUInt );
|
|
if( !check_atomic_support( deviceID, extended, isLocal, dataType ) )
|
|
{
|
|
// Only print for the signed (unsigned comes right after, and if signed isn't supported, unsigned isn't either)
|
|
if( dataType == kFloat )
|
|
log_info( "\t%s float not supported\n", isLocal ? "Local" : "Global" );
|
|
else if( !isUnsigned )
|
|
log_info( "\t%s %sint%d not supported\n", isLocal ? "Local" : "Global", isUnsigned ? "u" : "", (int)typeSize * 8 );
|
|
// Since we don't support the operation, they implicitly pass
|
|
return 0;
|
|
}
|
|
else
|
|
{
|
|
if( dataType == kFloat )
|
|
log_info( "\t%s float%s...", isLocal ? "local" : "global", isLocal ? " " : "" );
|
|
else
|
|
log_info( "\t%s %sint%d%s%s...", isLocal ? "local" : "global", isUnsigned ? "u" : "",
|
|
(int)typeSize * 8, isUnsigned ? "" : " ", isLocal ? " " : "" );
|
|
}
|
|
|
|
//// Set up the kernel code
|
|
|
|
// Create the pragma line for this kernel
|
|
bool isLong = ( dataType == kLong || dataType == kULong );
|
|
sprintf( pragma, "#pragma OPENCL EXTENSION cl_khr%s_int%s_%s_atomics : enable\n",
|
|
isLong ? "" : (isLocal ? "_local" : "_global"), isLong ? "64" : "32",
|
|
extended ? "extended" : "base" );
|
|
|
|
// Now create the program header
|
|
const char *typeName = get_explicit_type_name( dataType );
|
|
if( isLocal )
|
|
sprintf( programHeader, atomic_local_pattern[ 0 ], typeName, typeName, typeName );
|
|
else
|
|
sprintf( programHeader, atomic_global_pattern[ 0 ], typeName, typeName );
|
|
|
|
// Set up our entire program now
|
|
programLines[ 0 ] = pragma;
|
|
programLines[ 1 ] = programHeader;
|
|
programLines[ 2 ] = programCore;
|
|
programLines[ 3 ] = ( isLocal ) ? atomic_local_pattern[ 1 ] : atomic_global_pattern[ 1 ];
|
|
|
|
if( create_single_kernel_helper( context, &program, &kernel, 4, programLines, "test_atomic_fn" ) )
|
|
{
|
|
return -1;
|
|
}
|
|
|
|
//// Set up to actually run
|
|
threadSize = num_elements;
|
|
|
|
error = get_max_common_work_group_size( context, kernel, threadSize, &groupSize );
|
|
test_error( error, "Unable to get thread group max size" );
|
|
|
|
if( matchGroupSize )
|
|
// HACK because xchg and cmpxchg apparently are limited by hardware
|
|
threadSize = groupSize;
|
|
|
|
if( isLocal )
|
|
{
|
|
size_t maxSizes[3] = {0, 0, 0};
|
|
error = clGetDeviceInfo(deviceID, CL_DEVICE_MAX_WORK_ITEM_SIZES, 3*sizeof(size_t), maxSizes, 0);
|
|
test_error( error, "Unable to obtain max work item sizes for the device" );
|
|
|
|
size_t workSize;
|
|
error = clGetKernelWorkGroupInfo( kernel, deviceID, CL_KERNEL_WORK_GROUP_SIZE, sizeof( workSize ), &workSize, NULL );
|
|
test_error( error, "Unable to obtain max work group size for device and kernel combo" );
|
|
|
|
// "workSize" is limited to that of the first dimension as only a 1DRange is executed.
|
|
if( maxSizes[0] < workSize )
|
|
{
|
|
workSize = maxSizes[0];
|
|
}
|
|
|
|
threadSize = groupSize = workSize;
|
|
}
|
|
|
|
|
|
log_info( "\t(thread count %d, group size %d)\n", (int)threadSize, (int)groupSize );
|
|
|
|
refValues = (cl_int *)malloc( typeSize * threadSize );
|
|
|
|
if( testFns.GenerateRefsIntFn != NULL )
|
|
{
|
|
// We have a ref generator provided
|
|
d = init_genrand( gRandomSeed );
|
|
startRefValues = malloc( typeSize * threadSize );
|
|
if( typeSize == 4 )
|
|
testFns.GenerateRefsIntFn( threadSize, (cl_int *)startRefValues, d );
|
|
else
|
|
testFns.GenerateRefsLongFn( threadSize, (cl_long *)startRefValues, d );
|
|
free_mtdata(d);
|
|
d = NULL;
|
|
}
|
|
else
|
|
startRefValues = NULL;
|
|
|
|
// If we're given a num_results function, we need to determine how many result objects we need. If
|
|
// we don't have it, we assume it's just 1
|
|
size_t numDestItems = ( testFns.NumResultsFn != NULL ) ? testFns.NumResultsFn( threadSize, dataType ) : 1;
|
|
|
|
char * destItems = new char[ typeSize * numDestItems ];
|
|
if( destItems == NULL )
|
|
{
|
|
log_error( "ERROR: Unable to allocate memory!\n" );
|
|
return -1;
|
|
}
|
|
void * startValue = ( typeSize == 4 ) ? (void *)&testFns.mIntStartValue : (void *)&testFns.mLongStartValue;
|
|
for( size_t i = 0; i < numDestItems; i++ )
|
|
memcpy( destItems + i * typeSize, startValue, typeSize );
|
|
|
|
streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR,
|
|
typeSize * numDestItems, destItems, NULL);
|
|
if (!streams[0])
|
|
{
|
|
log_error("ERROR: Creating output array failed!\n");
|
|
return -1;
|
|
}
|
|
streams[1] = clCreateBuffer(
|
|
context,
|
|
((startRefValues != NULL ? CL_MEM_COPY_HOST_PTR : CL_MEM_READ_WRITE)),
|
|
typeSize * threadSize, startRefValues, NULL);
|
|
if (!streams[1])
|
|
{
|
|
log_error("ERROR: Creating reference array failed!\n");
|
|
return -1;
|
|
}
|
|
|
|
/* Set the arguments */
|
|
error = clSetKernelArg( kernel, 0, sizeof( streams[0] ), &streams[0] );
|
|
test_error( error, "Unable to set indexed kernel arguments" );
|
|
error = clSetKernelArg( kernel, 1, sizeof( streams[1] ), &streams[1] );
|
|
test_error( error, "Unable to set indexed kernel arguments" );
|
|
|
|
if( isLocal )
|
|
{
|
|
error = clSetKernelArg( kernel, 2, typeSize * numDestItems, NULL );
|
|
test_error( error, "Unable to set indexed local kernel argument" );
|
|
|
|
cl_int numDestItemsInt = (cl_int)numDestItems;
|
|
error = clSetKernelArg( kernel, 3, sizeof( cl_int ), &numDestItemsInt );
|
|
test_error( error, "Unable to set indexed kernel argument" );
|
|
}
|
|
|
|
/* Run the kernel */
|
|
threads[0] = threadSize;
|
|
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, &groupSize, 0, NULL, NULL );
|
|
test_error( error, "Unable to execute test kernel" );
|
|
|
|
error = clEnqueueReadBuffer( queue, streams[0], true, 0, typeSize * numDestItems, destItems, 0, NULL, NULL );
|
|
test_error( error, "Unable to read result value!" );
|
|
|
|
error = clEnqueueReadBuffer( queue, streams[1], true, 0, typeSize * threadSize, refValues, 0, NULL, NULL );
|
|
test_error( error, "Unable to read reference values!" );
|
|
|
|
// If we have an expectedFn, then we need to generate a final value to compare against. If we don't
|
|
// have one, it's because we're comparing ref values only
|
|
if( testFns.ExpectedValueIntFn != NULL )
|
|
{
|
|
for( size_t i = 0; i < numDestItems; i++ )
|
|
{
|
|
char expected[ 8 ];
|
|
cl_int intVal;
|
|
cl_long longVal;
|
|
if( typeSize == 4 )
|
|
{
|
|
// Int version
|
|
intVal = testFns.ExpectedValueIntFn( threadSize, (cl_int *)startRefValues, i );
|
|
memcpy( expected, &intVal, sizeof( intVal ) );
|
|
}
|
|
else
|
|
{
|
|
// Long version
|
|
longVal = testFns.ExpectedValueLongFn( threadSize, (cl_long *)startRefValues, i );
|
|
memcpy( expected, &longVal, sizeof( longVal ) );
|
|
}
|
|
|
|
if( memcmp( expected, destItems + i * typeSize, typeSize ) != 0 )
|
|
{
|
|
if( typeSize == 4 )
|
|
{
|
|
cl_int *outValue = (cl_int *)( destItems + i * typeSize );
|
|
log_error( "ERROR: Result %ld from kernel does not validate! (should be %d, was %d)\n", i, intVal, *outValue );
|
|
cl_int *startRefs = (cl_int *)startRefValues;
|
|
cl_int *refs = (cl_int *)refValues;
|
|
for( i = 0; i < threadSize; i++ )
|
|
{
|
|
if( startRefs != NULL )
|
|
log_info( " --- %ld - %d --- %d\n", i, startRefs[i], refs[i] );
|
|
else
|
|
log_info( " --- %ld --- %d\n", i, refs[i] );
|
|
}
|
|
}
|
|
else
|
|
{
|
|
cl_long *outValue = (cl_long *)( destItems + i * typeSize );
|
|
log_error( "ERROR: Result %ld from kernel does not validate! (should be %lld, was %lld)\n", i, longVal, *outValue );
|
|
cl_long *startRefs = (cl_long *)startRefValues;
|
|
cl_long *refs = (cl_long *)refValues;
|
|
for( i = 0; i < threadSize; i++ )
|
|
{
|
|
if( startRefs != NULL )
|
|
log_info( " --- %ld - %lld --- %lld\n", i, startRefs[i], refs[i] );
|
|
else
|
|
log_info( " --- %ld --- %lld\n", i, refs[i] );
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
|
|
if( testFns.VerifyRefsIntFn != NULL )
|
|
{
|
|
/* Use the verify function to also check the results */
|
|
if( dataType == kFloat )
|
|
{
|
|
cl_float *outValue = (cl_float *)destItems;
|
|
if( !testFns.VerifyRefsFloatFn( threadSize, (cl_float *)refValues, *outValue ) != 0 )
|
|
{
|
|
log_error( "ERROR: Reference values did not validate!\n" );
|
|
return -1;
|
|
}
|
|
}
|
|
else if( typeSize == 4 )
|
|
{
|
|
cl_int *outValue = (cl_int *)destItems;
|
|
if( !testFns.VerifyRefsIntFn( threadSize, (cl_int *)refValues, *outValue ) != 0 )
|
|
{
|
|
log_error( "ERROR: Reference values did not validate!\n" );
|
|
return -1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
cl_long *outValue = (cl_long *)destItems;
|
|
if( !testFns.VerifyRefsLongFn( threadSize, (cl_long *)refValues, *outValue ) != 0 )
|
|
{
|
|
log_error( "ERROR: Reference values did not validate!\n" );
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
else if( testFns.ExpectedValueIntFn == NULL )
|
|
{
|
|
log_error( "ERROR: Test doesn't check total or refs; no values are verified!\n" );
|
|
return -1;
|
|
}
|
|
|
|
|
|
/* Re-write the starting value */
|
|
for( size_t i = 0; i < numDestItems; i++ )
|
|
memcpy( destItems + i * typeSize, startValue, typeSize );
|
|
error = clEnqueueWriteBuffer( queue, streams[0], true, 0, typeSize * numDestItems, destItems, 0, NULL, NULL );
|
|
test_error( error, "Unable to write starting values!" );
|
|
|
|
/* Run the kernel once for a single thread, so we can verify that the returned value is the original one */
|
|
threads[0] = 1;
|
|
error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, threads, 0, NULL, NULL );
|
|
test_error( error, "Unable to execute test kernel" );
|
|
|
|
error = clEnqueueReadBuffer( queue, streams[1], true, 0, typeSize, refValues, 0, NULL, NULL );
|
|
test_error( error, "Unable to read reference values!" );
|
|
|
|
if( memcmp( refValues, destItems, typeSize ) != 0 )
|
|
{
|
|
if( typeSize == 4 )
|
|
{
|
|
cl_int *s = (cl_int *)destItems;
|
|
cl_int *r = (cl_int *)refValues;
|
|
log_error( "ERROR: atomic function operated correctly but did NOT return correct 'old' value "
|
|
" (should have been %d, returned %d)!\n", *s, *r );
|
|
}
|
|
else
|
|
{
|
|
cl_long *s = (cl_long *)destItems;
|
|
cl_long *r = (cl_long *)refValues;
|
|
log_error( "ERROR: atomic function operated correctly but did NOT return correct 'old' value "
|
|
" (should have been %lld, returned %lld)!\n", *s, *r );
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
delete [] destItems;
|
|
free( refValues );
|
|
if( startRefValues != NULL )
|
|
free( startRefValues );
|
|
|
|
return 0;
|
|
}
|
|
|
|
int test_atomic_function_set(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements, const char *programCore,
|
|
TestFns testFns,
|
|
bool extended, bool matchGroupSize, bool usingAtomicPrefix )
|
|
{
|
|
log_info(" Testing %s functions...\n", usingAtomicPrefix ? "atomic_" : "atom_");
|
|
|
|
int errors = 0;
|
|
errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kInt, matchGroupSize );
|
|
errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kUInt, matchGroupSize );
|
|
errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kInt, matchGroupSize );
|
|
errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kUInt, matchGroupSize );
|
|
|
|
// Only the 32 bit atomic functions use the "atomic" prefix in 1.1, the 64 bit functions still use the "atom" prefix.
|
|
// The argument usingAtomicPrefix is set to true if programCore was generated with the "atomic" prefix.
|
|
if (!usingAtomicPrefix) {
|
|
errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kLong, matchGroupSize );
|
|
errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, false, kULong, matchGroupSize );
|
|
errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kLong, matchGroupSize );
|
|
errors |= test_atomic_function( deviceID, context, queue, num_elements, programCore, testFns, extended, true, kULong, matchGroupSize );
|
|
}
|
|
|
|
return errors;
|
|
}
|
|
|
|
#pragma mark ---- add
|
|
|
|
const char atom_add_core[] =
|
|
" oldValues[tid] = atom_add( &destMemory[0], tid + 3 );\n"
|
|
" atom_add( &destMemory[0], tid + 3 );\n"
|
|
" atom_add( &destMemory[0], tid + 3 );\n"
|
|
" atom_add( &destMemory[0], tid + 3 );\n";
|
|
|
|
const char atomic_add_core[] =
|
|
" oldValues[tid] = atomic_add( &destMemory[0], tid + 3 );\n"
|
|
" atomic_add( &destMemory[0], tid + 3 );\n"
|
|
" atomic_add( &destMemory[0], tid + 3 );\n"
|
|
" atomic_add( &destMemory[0], tid + 3 );\n";
|
|
|
|
cl_int test_atomic_add_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
|
|
{
|
|
cl_int total = 0;
|
|
for( size_t i = 0; i < size; i++ )
|
|
total += ( (cl_int)i + 3 ) * 4;
|
|
return total;
|
|
}
|
|
|
|
cl_long test_atomic_add_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
|
|
{
|
|
cl_long total = 0;
|
|
for( size_t i = 0; i < size; i++ )
|
|
total += ( ( i + 3 ) * 4 );
|
|
return total;
|
|
}
|
|
|
|
int test_atomic_add(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
TestFns set = { 0, 0LL, NULL, test_atomic_add_result_int, NULL, NULL, test_atomic_add_result_long, NULL, NULL };
|
|
|
|
if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_add_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
|
|
return -1;
|
|
if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_add_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
#pragma mark ---- sub
|
|
|
|
const char atom_sub_core[] = " oldValues[tid] = atom_sub( &destMemory[0], tid + 3 );\n";
|
|
|
|
const char atomic_sub_core[] = " oldValues[tid] = atomic_sub( &destMemory[0], tid + 3 );\n";
|
|
|
|
cl_int test_atomic_sub_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
|
|
{
|
|
cl_int total = INT_TEST_VALUE;
|
|
for( size_t i = 0; i < size; i++ )
|
|
total -= (cl_int)i + 3;
|
|
return total;
|
|
}
|
|
|
|
cl_long test_atomic_sub_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
|
|
{
|
|
cl_long total = LONG_TEST_VALUE;
|
|
for( size_t i = 0; i < size; i++ )
|
|
total -= i + 3;
|
|
return total;
|
|
}
|
|
|
|
int test_atomic_sub(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_sub_result_int, NULL, NULL, test_atomic_sub_result_long, NULL, NULL };
|
|
|
|
if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_sub_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
|
|
return -1;
|
|
if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_sub_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
#pragma mark ---- xchg
|
|
|
|
const char atom_xchg_core[] = " oldValues[tid] = atom_xchg( &destMemory[0], tid );\n";
|
|
|
|
const char atomic_xchg_core[] = " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
|
|
const char atomic_xchg_float_core[] = " oldValues[tid] = atomic_xchg( &destMemory[0], tid );\n";
|
|
|
|
bool test_atomic_xchg_verify_int( size_t size, cl_int *refValues, cl_int finalValue )
|
|
{
|
|
/* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
|
|
char *valids;
|
|
size_t i;
|
|
char originalValidCount = 0;
|
|
|
|
valids = (char *)malloc( sizeof( char ) * size );
|
|
memset( valids, 0, sizeof( char ) * size );
|
|
|
|
for( i = 0; i < size; i++ )
|
|
{
|
|
if( refValues[ i ] == INT_TEST_VALUE )
|
|
{
|
|
// Special initial value
|
|
originalValidCount++;
|
|
continue;
|
|
}
|
|
if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
|
|
{
|
|
log_error( "ERROR: Reference value %ld outside of valid range! (%d)\n", i, refValues[ i ] );
|
|
return false;
|
|
}
|
|
valids[ refValues[ i ] ] ++;
|
|
}
|
|
|
|
/* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
|
|
the final value outputted */
|
|
if( valids[ finalValue ] > 0 )
|
|
{
|
|
log_error( "ERROR: Final value %d was also in ref list!\n", finalValue );
|
|
return false;
|
|
}
|
|
else
|
|
valids[ finalValue ] = 1; // So the following loop will be okay
|
|
|
|
/* Now check that every entry has one and only one count */
|
|
if( originalValidCount != 1 )
|
|
{
|
|
log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
|
|
return false;
|
|
}
|
|
for( i = 0; i < size; i++ )
|
|
{
|
|
if( valids[ i ] != 1 )
|
|
{
|
|
log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
|
|
for( size_t j = 0; j < size; j++ )
|
|
log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
|
|
return false;
|
|
}
|
|
}
|
|
|
|
free( valids );
|
|
return true;
|
|
}
|
|
|
|
bool test_atomic_xchg_verify_long( size_t size, cl_long *refValues, cl_long finalValue )
|
|
{
|
|
/* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
|
|
char *valids;
|
|
size_t i;
|
|
char originalValidCount = 0;
|
|
|
|
valids = (char *)malloc( sizeof( char ) * size );
|
|
memset( valids, 0, sizeof( char ) * size );
|
|
|
|
for( i = 0; i < size; i++ )
|
|
{
|
|
if( refValues[ i ] == LONG_TEST_VALUE )
|
|
{
|
|
// Special initial value
|
|
originalValidCount++;
|
|
continue;
|
|
}
|
|
if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
|
|
{
|
|
log_error( "ERROR: Reference value %ld outside of valid range! (%lld)\n", i, refValues[ i ] );
|
|
return false;
|
|
}
|
|
valids[ refValues[ i ] ] ++;
|
|
}
|
|
|
|
/* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
|
|
the final value outputted */
|
|
if( valids[ finalValue ] > 0 )
|
|
{
|
|
log_error( "ERROR: Final value %lld was also in ref list!\n", finalValue );
|
|
return false;
|
|
}
|
|
else
|
|
valids[ finalValue ] = 1; // So the following loop will be okay
|
|
|
|
/* Now check that every entry has one and only one count */
|
|
if( originalValidCount != 1 )
|
|
{
|
|
log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
|
|
return false;
|
|
}
|
|
for( i = 0; i < size; i++ )
|
|
{
|
|
if( valids[ i ] != 1 )
|
|
{
|
|
log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
|
|
for( size_t j = 0; j < size; j++ )
|
|
log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
|
|
return false;
|
|
}
|
|
}
|
|
|
|
free( valids );
|
|
return true;
|
|
}
|
|
|
|
bool test_atomic_xchg_verify_float( size_t size, cl_float *refValues, cl_float finalValue )
|
|
{
|
|
/* For xchg, each value from 0 to size - 1 should have an entry in the ref array, and ONLY one entry */
|
|
char *valids;
|
|
size_t i;
|
|
char originalValidCount = 0;
|
|
|
|
valids = (char *)malloc( sizeof( char ) * size );
|
|
memset( valids, 0, sizeof( char ) * size );
|
|
|
|
for( i = 0; i < size; i++ )
|
|
{
|
|
cl_int *intRefValue = (cl_int *)( &refValues[ i ] );
|
|
if( *intRefValue == INT_TEST_VALUE )
|
|
{
|
|
// Special initial value
|
|
originalValidCount++;
|
|
continue;
|
|
}
|
|
if( refValues[ i ] < 0 || (size_t)refValues[ i ] >= size )
|
|
{
|
|
log_error( "ERROR: Reference value %ld outside of valid range! (%a)\n", i, refValues[ i ] );
|
|
return false;
|
|
}
|
|
valids[ (int)refValues[ i ] ] ++;
|
|
}
|
|
|
|
/* Note: ONE entry will have zero count. It'll be the last one that executed, because that value should be
|
|
the final value outputted */
|
|
if( valids[ (int)finalValue ] > 0 )
|
|
{
|
|
log_error( "ERROR: Final value %a was also in ref list!\n", finalValue );
|
|
return false;
|
|
}
|
|
else
|
|
valids[ (int)finalValue ] = 1; // So the following loop will be okay
|
|
|
|
/* Now check that every entry has one and only one count */
|
|
if( originalValidCount != 1 )
|
|
{
|
|
log_error( "ERROR: Starting reference value %d did not occur once-and-only-once (occurred %d)\n", 65191, originalValidCount );
|
|
return false;
|
|
}
|
|
for( i = 0; i < size; i++ )
|
|
{
|
|
if( valids[ i ] != 1 )
|
|
{
|
|
log_error( "ERROR: Reference value %ld did not occur once-and-only-once (occurred %d)\n", i, valids[ i ] );
|
|
for( size_t j = 0; j < size; j++ )
|
|
log_info( "%d: %d\n", (int)j, (int)valids[ j ] );
|
|
return false;
|
|
}
|
|
}
|
|
|
|
free( valids );
|
|
return true;
|
|
}
|
|
|
|
int test_atomic_xchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, NULL, NULL, test_atomic_xchg_verify_int, NULL, NULL, test_atomic_xchg_verify_long, NULL, NULL, test_atomic_xchg_verify_float };
|
|
|
|
int errors = test_atomic_function_set( deviceID, context, queue, num_elements, atom_xchg_core, set, false, true, /*usingAtomicPrefix*/ false );
|
|
errors |= test_atomic_function_set( deviceID, context, queue, num_elements, atomic_xchg_core, set, false, true, /*usingAtomicPrefix*/ true );
|
|
|
|
errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_xchg_float_core, set, false, false, kFloat, true );
|
|
errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_xchg_float_core, set, false, true, kFloat, true );
|
|
|
|
return errors;
|
|
}
|
|
|
|
|
|
#pragma mark ---- min
|
|
|
|
const char atom_min_core[] = " oldValues[tid] = atom_min( &destMemory[0], oldValues[tid] );\n";
|
|
|
|
const char atomic_min_core[] = " oldValues[tid] = atomic_min( &destMemory[0], oldValues[tid] );\n";
|
|
|
|
cl_int test_atomic_min_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
|
|
{
|
|
cl_int total = 0x7fffffffL;
|
|
for( size_t i = 0; i < size; i++ )
|
|
{
|
|
if( startRefValues[ i ] < total )
|
|
total = startRefValues[ i ];
|
|
}
|
|
return total;
|
|
}
|
|
|
|
void test_atomic_min_gen_int( size_t size, cl_int *startRefValues, MTdata d )
|
|
{
|
|
for( size_t i = 0; i < size; i++ )
|
|
startRefValues[i] = (cl_int)( genrand_int32(d) % 0x3fffffff ) + 0x3fffffff;
|
|
}
|
|
|
|
cl_long test_atomic_min_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
|
|
{
|
|
cl_long total = 0x7fffffffffffffffLL;
|
|
for( size_t i = 0; i < size; i++ )
|
|
{
|
|
if( startRefValues[ i ] < total )
|
|
total = startRefValues[ i ];
|
|
}
|
|
return total;
|
|
}
|
|
|
|
void test_atomic_min_gen_long( size_t size, cl_long *startRefValues, MTdata d )
|
|
{
|
|
for( size_t i = 0; i < size; i++ )
|
|
startRefValues[i] = (cl_long)( genrand_int32(d) | ( ( (cl_long)genrand_int32(d) & 0x7fffffffL ) << 16 ) );
|
|
}
|
|
|
|
int test_atomic_min(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
TestFns set = { 0x7fffffffL, 0x7fffffffffffffffLL, NULL, test_atomic_min_result_int, test_atomic_min_gen_int, NULL, test_atomic_min_result_long, test_atomic_min_gen_long, NULL };
|
|
|
|
if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_min_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
|
|
return -1;
|
|
if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_min_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
#pragma mark ---- max
|
|
|
|
const char atom_max_core[] = " oldValues[tid] = atom_max( &destMemory[0], oldValues[tid] );\n";
|
|
|
|
const char atomic_max_core[] = " oldValues[tid] = atomic_max( &destMemory[0], oldValues[tid] );\n";
|
|
|
|
cl_int test_atomic_max_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
|
|
{
|
|
cl_int total = 0;
|
|
for( size_t i = 0; i < size; i++ )
|
|
{
|
|
if( startRefValues[ i ] > total )
|
|
total = startRefValues[ i ];
|
|
}
|
|
return total;
|
|
}
|
|
|
|
void test_atomic_max_gen_int( size_t size, cl_int *startRefValues, MTdata d )
|
|
{
|
|
for( size_t i = 0; i < size; i++ )
|
|
startRefValues[i] = (cl_int)( genrand_int32(d) % 0x3fffffff ) + 0x3fffffff;
|
|
}
|
|
|
|
cl_long test_atomic_max_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
|
|
{
|
|
cl_long total = 0;
|
|
for( size_t i = 0; i < size; i++ )
|
|
{
|
|
if( startRefValues[ i ] > total )
|
|
total = startRefValues[ i ];
|
|
}
|
|
return total;
|
|
}
|
|
|
|
void test_atomic_max_gen_long( size_t size, cl_long *startRefValues, MTdata d )
|
|
{
|
|
for( size_t i = 0; i < size; i++ )
|
|
startRefValues[i] = (cl_long)( genrand_int32(d) | ( ( (cl_long)genrand_int32(d) & 0x7fffffffL ) << 16 ) );
|
|
}
|
|
|
|
int test_atomic_max(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
TestFns set = { 0, 0, NULL, test_atomic_max_result_int, test_atomic_max_gen_int, NULL, test_atomic_max_result_long, test_atomic_max_gen_long, NULL };
|
|
|
|
if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_max_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
|
|
return -1;
|
|
if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_max_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
#pragma mark ---- inc
|
|
|
|
const char atom_inc_core[] = " oldValues[tid] = atom_inc( &destMemory[0] );\n";
|
|
|
|
const char atomic_inc_core[] = " oldValues[tid] = atomic_inc( &destMemory[0] );\n";
|
|
|
|
cl_int test_atomic_inc_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
|
|
{
|
|
return INT_TEST_VALUE + (cl_int)size;
|
|
}
|
|
|
|
cl_long test_atomic_inc_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
|
|
{
|
|
return LONG_TEST_VALUE + size;
|
|
}
|
|
|
|
int test_atomic_inc(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_inc_result_int, NULL, NULL, test_atomic_inc_result_long, NULL, NULL };
|
|
|
|
if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_inc_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
|
|
return -1;
|
|
if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_inc_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
#pragma mark ---- dec
|
|
|
|
const char atom_dec_core[] = " oldValues[tid] = atom_dec( &destMemory[0] );\n";
|
|
|
|
const char atomic_dec_core[] = " oldValues[tid] = atomic_dec( &destMemory[0] );\n";
|
|
|
|
cl_int test_atomic_dec_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
|
|
{
|
|
return INT_TEST_VALUE - (cl_int)size;
|
|
}
|
|
|
|
cl_long test_atomic_dec_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
|
|
{
|
|
return LONG_TEST_VALUE - size;
|
|
}
|
|
|
|
int test_atomic_dec(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_dec_result_int, NULL, NULL, test_atomic_dec_result_long, NULL, NULL };
|
|
|
|
if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_dec_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
|
|
return -1;
|
|
if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_dec_core, set, false, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
#pragma mark ---- cmpxchg
|
|
|
|
/* We test cmpxchg by implementing (the long way) atom_add */
|
|
const char atom_cmpxchg_core[] =
|
|
" int oldValue, origValue, newValue;\n"
|
|
" do { \n"
|
|
" origValue = destMemory[0];\n"
|
|
" newValue = origValue + tid + 2;\n"
|
|
" oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
|
|
" } while( oldValue != origValue );\n"
|
|
" oldValues[tid] = oldValue;\n"
|
|
;
|
|
|
|
const char atom_cmpxchg64_core[] =
|
|
" long oldValue, origValue, newValue;\n"
|
|
" do { \n"
|
|
" origValue = destMemory[0];\n"
|
|
" newValue = origValue + tid + 2;\n"
|
|
" oldValue = atom_cmpxchg( &destMemory[0], origValue, newValue );\n"
|
|
" } while( oldValue != origValue );\n"
|
|
" oldValues[tid] = oldValue;\n"
|
|
;
|
|
|
|
const char atomic_cmpxchg_core[] =
|
|
" int oldValue, origValue, newValue;\n"
|
|
" do { \n"
|
|
" origValue = destMemory[0];\n"
|
|
" newValue = origValue + tid + 2;\n"
|
|
" oldValue = atomic_cmpxchg( &destMemory[0], origValue, newValue );\n"
|
|
" } while( oldValue != origValue );\n"
|
|
" oldValues[tid] = oldValue;\n"
|
|
;
|
|
|
|
cl_int test_atomic_cmpxchg_result_int( size_t size, cl_int *startRefValues, size_t whichDestValue )
|
|
{
|
|
cl_int total = INT_TEST_VALUE;
|
|
for( size_t i = 0; i < size; i++ )
|
|
total += (cl_int)i + 2;
|
|
return total;
|
|
}
|
|
|
|
cl_long test_atomic_cmpxchg_result_long( size_t size, cl_long *startRefValues, size_t whichDestValue )
|
|
{
|
|
cl_long total = LONG_TEST_VALUE;
|
|
for( size_t i = 0; i < size; i++ )
|
|
total += i + 2;
|
|
return total;
|
|
}
|
|
|
|
int test_atomic_cmpxchg(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
TestFns set = { INT_TEST_VALUE, LONG_TEST_VALUE, NULL, test_atomic_cmpxchg_result_int, NULL, NULL, test_atomic_cmpxchg_result_long, NULL, NULL };
|
|
|
|
int errors = 0;
|
|
|
|
log_info(" Testing atom_ functions...\n");
|
|
errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, false, kInt, true );
|
|
errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, false, kUInt, true );
|
|
errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, true, kInt, true );
|
|
errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg_core, set, false, true, kUInt, true );
|
|
|
|
errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, false, kLong, true );
|
|
errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, false, kULong, true );
|
|
errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, true, kLong, true );
|
|
errors |= test_atomic_function( deviceID, context, queue, num_elements, atom_cmpxchg64_core, set, false, true, kULong, true );
|
|
|
|
log_info(" Testing atomic_ functions...\n");
|
|
errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, false, kInt, true );
|
|
errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, false, kUInt, true );
|
|
errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, true, kInt, true );
|
|
errors |= test_atomic_function( deviceID, context, queue, num_elements, atomic_cmpxchg_core, set, false, true, kUInt, true );
|
|
|
|
if( errors )
|
|
return -1;
|
|
|
|
return 0;
|
|
}
|
|
|
|
#pragma mark -------- Bitwise functions
|
|
|
|
size_t test_bitwise_num_results( size_t threadCount, ExplicitType dataType )
|
|
{
|
|
size_t numBits = get_explicit_type_size( dataType ) * 8;
|
|
|
|
return ( threadCount + numBits - 1 ) / numBits;
|
|
}
|
|
|
|
#pragma mark ---- and
|
|
|
|
const char atom_and_core[] =
|
|
" size_t numBits = sizeof( destMemory[0] ) * 8;\n"
|
|
" int whichResult = tid / numBits;\n"
|
|
" int bitIndex = tid - ( whichResult * numBits );\n"
|
|
"\n"
|
|
" oldValues[tid] = atom_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n"
|
|
;
|
|
|
|
const char atomic_and_core[] =
|
|
" size_t numBits = sizeof( destMemory[0] ) * 8;\n"
|
|
" int whichResult = tid / numBits;\n"
|
|
" int bitIndex = tid - ( whichResult * numBits );\n"
|
|
"\n"
|
|
" oldValues[tid] = atomic_and( &destMemory[whichResult], ~( 1L << bitIndex ) );\n"
|
|
;
|
|
|
|
|
|
cl_int test_atomic_and_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
|
|
{
|
|
size_t numThreads = ( (size_t)size + 31 ) / 32;
|
|
if( whichResult < numThreads - 1 )
|
|
return 0;
|
|
|
|
// Last item doesn't get and'ed on every bit, so we have to mask away
|
|
size_t numBits = (size_t)size - whichResult * 32;
|
|
cl_int bits = (cl_int)0xffffffffL;
|
|
for( size_t i = 0; i < numBits; i++ )
|
|
bits &= ~( 1 << i );
|
|
|
|
return bits;
|
|
}
|
|
|
|
cl_long test_atomic_and_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
|
|
{
|
|
size_t numThreads = ( (size_t)size + 63 ) / 64;
|
|
if( whichResult < numThreads - 1 )
|
|
return 0;
|
|
|
|
// Last item doesn't get and'ed on every bit, so we have to mask away
|
|
size_t numBits = (size_t)size - whichResult * 64;
|
|
cl_long bits = (cl_long)0xffffffffffffffffLL;
|
|
for (size_t i = 0; i < numBits; i++) bits &= ~(1LL << i);
|
|
|
|
return bits;
|
|
}
|
|
|
|
int test_atomic_and(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
TestFns set = { 0xffffffff, 0xffffffffffffffffLL, test_bitwise_num_results,
|
|
test_atomic_and_result_int, NULL, NULL, test_atomic_and_result_long, NULL, NULL };
|
|
|
|
if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_and_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
|
|
return -1;
|
|
if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_and_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
#pragma mark ---- or
|
|
|
|
const char atom_or_core[] =
|
|
" size_t numBits = sizeof( destMemory[0] ) * 8;\n"
|
|
" int whichResult = tid / numBits;\n"
|
|
" int bitIndex = tid - ( whichResult * numBits );\n"
|
|
"\n"
|
|
" oldValues[tid] = atom_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n"
|
|
;
|
|
|
|
const char atomic_or_core[] =
|
|
" size_t numBits = sizeof( destMemory[0] ) * 8;\n"
|
|
" int whichResult = tid / numBits;\n"
|
|
" int bitIndex = tid - ( whichResult * numBits );\n"
|
|
"\n"
|
|
" oldValues[tid] = atomic_or( &destMemory[whichResult], ( 1L << bitIndex ) );\n"
|
|
;
|
|
|
|
cl_int test_atomic_or_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
|
|
{
|
|
size_t numThreads = ( (size_t)size + 31 ) / 32;
|
|
if( whichResult < numThreads - 1 )
|
|
return 0xffffffff;
|
|
|
|
// Last item doesn't get and'ed on every bit, so we have to mask away
|
|
size_t numBits = (size_t)size - whichResult * 32;
|
|
cl_int bits = 0;
|
|
for( size_t i = 0; i < numBits; i++ )
|
|
bits |= ( 1 << i );
|
|
|
|
return bits;
|
|
}
|
|
|
|
cl_long test_atomic_or_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
|
|
{
|
|
size_t numThreads = ( (size_t)size + 63 ) / 64;
|
|
if( whichResult < numThreads - 1 )
|
|
return 0x0ffffffffffffffffLL;
|
|
|
|
// Last item doesn't get and'ed on every bit, so we have to mask away
|
|
size_t numBits = (size_t)size - whichResult * 64;
|
|
cl_long bits = 0;
|
|
for( size_t i = 0; i < numBits; i++ )
|
|
bits |= ( 1LL << i );
|
|
|
|
return bits;
|
|
}
|
|
|
|
int test_atomic_or(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
TestFns set = { 0, 0LL, test_bitwise_num_results, test_atomic_or_result_int, NULL, NULL, test_atomic_or_result_long, NULL, NULL };
|
|
|
|
if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_or_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
|
|
return -1;
|
|
if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_or_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
#pragma mark ---- xor
|
|
|
|
const char atom_xor_core[] =
|
|
" size_t numBits = sizeof( destMemory[0] ) * 8;\n"
|
|
" int bitIndex = tid & ( numBits - 1 );\n"
|
|
"\n"
|
|
" oldValues[tid] = atom_xor( &destMemory[0], 1L << bitIndex );\n";
|
|
|
|
const char atomic_xor_core[] =
|
|
" size_t numBits = sizeof( destMemory[0] ) * 8;\n"
|
|
" int bitIndex = tid & ( numBits - 1 );\n"
|
|
"\n"
|
|
" oldValues[tid] = atomic_xor( &destMemory[0], 1L << bitIndex );\n";
|
|
|
|
cl_int test_atomic_xor_result_int( size_t size, cl_int *startRefValues, size_t whichResult )
|
|
{
|
|
cl_int total = 0x2f08ab41;
|
|
for( size_t i = 0; i < size; i++ )
|
|
total ^= ( 1 << ( i & 31 ) );
|
|
return total;
|
|
}
|
|
|
|
cl_long test_atomic_xor_result_long( size_t size, cl_long *startRefValues, size_t whichResult )
|
|
{
|
|
cl_long total = 0x2f08ab418ba0541LL;
|
|
for( size_t i = 0; i < size; i++ )
|
|
total ^= ( 1LL << ( i & 63 ) );
|
|
return total;
|
|
}
|
|
|
|
int test_atomic_xor(cl_device_id deviceID, cl_context context, cl_command_queue queue, int num_elements)
|
|
{
|
|
TestFns set = { 0x2f08ab41, 0x2f08ab418ba0541LL, NULL, test_atomic_xor_result_int, NULL, NULL, test_atomic_xor_result_long, NULL, NULL };
|
|
|
|
if( test_atomic_function_set( deviceID, context, queue, num_elements, atom_xor_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ false ) != 0 )
|
|
return -1;
|
|
if( test_atomic_function_set( deviceID, context, queue, num_elements, atomic_xor_core, set, true, /*matchGroupSize*/ false, /*usingAtomicPrefix*/ true ) != 0 )
|
|
return -1;
|
|
return 0;
|
|
}
|
|
|
|
|
|
|
|
|