// // Copyright (c) 2017 The Khronos Group Inc. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // #include "testBase.h" #include "harness/conversions.h" #include "harness/typeWrappers.h" #include "harness/testHarness.h" // #define USE_NEW_SYNTAX 1 // The number of shuffles to test per test #define NUM_TESTS 32 // The number of times to run each combination of shuffles #define NUM_ITERATIONS_PER_TEST 2 #define MAX_PROGRAM_SIZE NUM_TESTS*1024 #define PRINT_SHUFFLE_KERNEL_SOURCE 0 #define SPEW_ORDER_DETAILS 0 enum ShuffleMode { kNormalMode = 0, kFunctionCallMode, kArrayAccessMode, kBuiltInFnMode, kBuiltInDualInputFnMode }; static const char *shuffleKernelPattern[3] = { "__kernel void sample_test( __global %s%s *source, __global %s%s *dest )\n" "{\n" " if (get_global_id(0) != 0) return;\n" " //%s%s src1 %s, src2%s;\n",// Here's a comma... // Above code is commented out for now, but keeping around for testing local storage options "}\n" }; static const char *shuffleTempPattern = " %s%s tmp;\n"; static const char *clearTempPattern = " tmp = (%s%s)((%s)0);\n"; static const char *shuffleSinglePattern = " tmp%s%s = source[%d]%s%s;\n" " dest[%d] = tmp;\n" ; static const char * shuffleSinglePatternV3src = " tmp%s%s = vload3(%d, source)%s%s;\n" " dest[%d] = tmp;\n"; static const char * shuffleSinglePatternV3dst = " tmp%s%s = source[%d]%s%s;\n" " vstore3(tmp, %d, dest);\n"; static const char * shuffleSinglePatternV3srcV3dst = "tmp%s%s = vload3(%d, source)%s%s;\n" "vstore3(tmp, %d, dest);\n"; static const char *shuffleFnLinePattern = "%s%s shuffle_fn( %s%s source );\n%s%s shuffle_fn( %s%s source ) { return source; }\n\n"; static const char *shuffleFnPattern = " tmp%s%s = shuffle_fn( source[%d] )%s%s;\n" " dest[%d] = tmp;\n" ; static const char *shuffleFnPatternV3src = " tmp%s%s = shuffle_fn( vload3(%d, source) )%s%s;\n" " dest[%d] = tmp;\n" ; static const char *shuffleFnPatternV3dst = " tmp%s%s = shuffle_fn( source[%d] )%s%s;\n" " vstore3(tmp, %d, dest);\n" ; static const char *shuffleFnPatternV3srcV3dst = " tmp%s%s = shuffle_fn(vload3(%d, source) )%s%s;\n" " vstore3(tmp, %d, dest);\n" ; // shuffle() built-in function patterns static const char *shuffleBuiltInPattern = " {\n" " %s%s src1 = %s;\n" " %s%s%s mask = (%s%s%s)( %s );\n" " tmp = shuffle( src1, mask );\n" " %s;\n" " }\n" ; // shuffle() built-in dual-input function patterns static const char *shuffleBuiltInDualPattern = " {\n" " %s%s src1 = %s;\n" " %s%s src2 = %s;\n" " %s%s%s mask = (%s%s%s)( %s );\n" " tmp = shuffle2( src1, src2, mask );\n" " %s;\n" " }\n" ; typedef unsigned char ShuffleOrder[ 16 ]; void incrementShuffleOrder( ShuffleOrder &order, size_t orderSize, size_t orderRange ) { for( size_t i = 0; i < orderSize; i++ ) { order[ i ]++; if( order[ i ] < orderRange ) return; order[ i ] = 0; } } bool shuffleOrderContainsDuplicates( ShuffleOrder &order, size_t orderSize ) { bool flags[ 16 ] = { false, false, false, false, false, false, false, false, false, false, false, false, false, false, false, false }; for( size_t i = 0; i < orderSize; i++ ) { if( flags[ order[ i ] ] ) return true; flags[ order[ i ] ] = true; } return false; } static void shuffleVector( unsigned char *inVector, unsigned char *outVector, ShuffleOrder order, size_t vecSize, size_t typeSize, cl_uint lengthToUse ) { for(size_t i = 0; i < lengthToUse; i++ ) { unsigned char *inPtr = inVector + typeSize *order[ i ]; memcpy( outVector, inPtr, typeSize ); outVector += typeSize; } } static void shuffleVector2( unsigned char *inVector, unsigned char *outVector, ShuffleOrder order, size_t vecSize, size_t typeSize, cl_uint lengthToUse ) { for(size_t i = 0; i < lengthToUse; i++ ) { unsigned char *outPtr = outVector + typeSize *order[ i ]; memcpy( outPtr, inVector, typeSize ); inVector += typeSize; } } static void shuffleVectorDual( unsigned char *inVector, unsigned char *inSecondVector, unsigned char *outVector, ShuffleOrder order, size_t vecSize, size_t typeSize, cl_uint lengthToUse ) { // This is tricky: the indices of each shuffle are in a range (0-srcVecSize * 2-1), // where (srcVecSize-srcVecSize*2-1) refers to the second input. size_t uphalfMask = (size_t)vecSize; size_t lowerBits = (size_t)( vecSize - 1 ); for(size_t i = 0; i < lengthToUse; i++ ) { unsigned char *inPtr; #if SPEW_ORDER_DETAILS log_info("order[%d] is %d, or %d of %s\n", (int)i, (int)(order[i]), (int)(order[i] & lowerBits), ((order[i]&uphalfMask) == 0)?"lower num":"upper num"); #endif if( order[ i ] & uphalfMask ) inPtr = inSecondVector + typeSize * ( order[ i ] & lowerBits ); else inPtr = inVector + typeSize * ( order[ i ] & lowerBits ); memcpy( outVector, inPtr, typeSize ); outVector += typeSize; } } static ShuffleOrder sNaturalOrder = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; static int useNumbersFlip = 0; const char *get_order_string( ShuffleOrder &order, size_t vecSize, cl_uint lengthToUse, bool byNumber, MTdata d ) { // NOTE: names are only valid for hex characters (up to F) but for debugging, we use // this to print out orders for dual inputs, which actually can be valid up to position 31 (two 16-element vectors) // so we go ahead and fake the rest of the alphabet for those other 16 positions, so we have // some (indirectly) meaningful output char names[] = "0123456789abcdefghijklmnopqrstuv"; char namesUpperCase[] = "0123456789ABCDEFGHIJKLMNOPQRSTUV"; char names2[] = "xyzw!!!!!!!!!!!!"; static char orderString[ 18 ]; size_t j, idx; // Assume we don't have to use numbers byNumber = 0; // Check to see for( j = 0; j < lengthToUse; j++ ) { if (order[j] > 3) { // An index is > xyzw so we need to use numbers byNumber = 1; break; } } // If we can use numbers, do so half the time. if (!byNumber) { byNumber = (useNumbersFlip++)%2; } // Do not use xyzw for vectors whose length is not 2 or 4 per the spec. if (vecSize != 2 || vecSize != 4 || vecSize != 3) byNumber = 1; if( byNumber || vecSize > 4 ) { idx = 0; // Randomly chose upper and lower case S orderString[ idx++ ] = random_in_range(0, 1, d) ? 's' : 'S'; for( j = 0; j < vecSize && j < lengthToUse; j++ ) { // Randomly choose upper and lower case. orderString[ idx++ ] = random_in_range(0, 1, d) ? names[ (int)order[ j ] ] : namesUpperCase[ (int)order[ j ] ]; } orderString[ idx++ ] = 0; } else { for( j = 0; j < vecSize && j < lengthToUse; j++ ) { // Randomly choose upper and lower case. orderString[ j ] = names2[ (int)order[ j ] ]; } orderString[ j ] = 0; } return orderString; } char * get_order_name( ExplicitType vecType, size_t inVecSize, size_t outVecSize, ShuffleOrder &inOrder, ShuffleOrder &outOrder, cl_uint lengthToUse, MTdata d, bool inUseNumerics, bool outUseNumerics ) { static char orderName[ 512 ] = ""; char inOrderStr[ 512 ], outOrderStr[ 512 ]; if( inVecSize == 1 ) inOrderStr[ 0 ] = 0; else sprintf( inOrderStr, "%d.%s", (int)inVecSize, get_order_string( inOrder, outVecSize, lengthToUse, inUseNumerics, d ) ); if( outVecSize == 1 ) outOrderStr[ 0 ] = 0; else sprintf( outOrderStr, "%d.%s", (int)outVecSize, get_order_string( outOrder, outVecSize, lengthToUse, outUseNumerics, d ) ); sprintf( orderName, "order %s%s -> %s%s", get_explicit_type_name( vecType ), inOrderStr, get_explicit_type_name( vecType ), outOrderStr ); return orderName; } void print_hex_mem_dump( const unsigned char *inDataPtr, const unsigned char * inDataPtr2, const unsigned char *expected, const unsigned char *outDataPtr, size_t inVecSize, size_t outVecSize, size_t typeSize ) { char error [4096] = ""; strcat(error, " Source: "); for( unsigned int j = 0; j < inVecSize * typeSize; j++ ) { sprintf(error, "%s%s%02x ",error, ( j % typeSize ) ? "" : " ", (cl_uchar)inDataPtr[ j ] ); } if( inDataPtr2 != NULL ) { strcat(error, "\n Source 2: "); for( unsigned int j = 0; j < inVecSize * typeSize; j++ ) { sprintf(error, "%s%s%02x ",error, ( j % typeSize ) ? "" : " ", (cl_uchar)inDataPtr2[ j ] ); } } strcat(error, "\n Expected: " ); for( unsigned int j = 0; j < outVecSize * typeSize; j++ ) { sprintf(error, "%s%s%02x ",error, ( j % typeSize ) ? "" : " ", (cl_uchar)expected[ j ] ); } strcat(error, "\n Actual: " ); for( unsigned int j = 0; j < outVecSize * typeSize; j++ ) { sprintf(error, "%s%s%02x ",error, ( j % typeSize ) ? "" : " ", (cl_uchar)outDataPtr[ j ] ); } log_info("%s\n", error); } void generate_shuffle_mask( char *outMaskString, size_t maskSize, const ShuffleOrder *order ) { outMaskString[ 0 ] = 0; if( order != NULL ) { for( size_t jj = 0; jj < maskSize; jj++ ) { char thisMask[ 16 ]; sprintf( thisMask, "%s%d", ( jj == 0 ) ? "" : ", ", (*order)[ jj ] ); strcat( outMaskString, thisMask ); } } else { for( size_t jj = 0; jj < maskSize; jj++ ) { char thisMask[ 16 ]; sprintf( thisMask, "%s%ld", ( jj == 0 ) ? "" : ", ", jj ); strcat( outMaskString, thisMask ); } } } static int create_shuffle_kernel( cl_context context, cl_program *outProgram, cl_kernel *outKernel, size_t *outRealVecSize, ExplicitType vecType, size_t inVecSize, size_t outVecSize, cl_uint *lengthToUse, bool inUseNumerics, bool outUseNumerics, size_t numOrders, ShuffleOrder *inOrders, ShuffleOrder *outOrders, MTdata d, ShuffleMode shuffleMode = kNormalMode ) { char inOrder[18], shuffledOrder[18]; size_t typeSize; char kernelSource[MAX_PROGRAM_SIZE], progLine[ 10240 ]; char *programPtr; char inSizeName[4], outSizeName[4], outRealSizeName[4], inSizeArgName[4]; char outSizeNameTmpVar[4]; /* Create the source; note vec size is the vector length we are testing */ if( inVecSize == 1 ) //|| (inVecSize == 3)) // just have arrays if we go with size 3 inSizeName[ 0 ] = 0; else sprintf( inSizeName, "%ld", inVecSize ); if( inVecSize == 3 ) inSizeArgName[ 0 ] = 0; else strcpy( inSizeArgName, inSizeName ); typeSize = get_explicit_type_size( vecType ); *outRealVecSize = outVecSize; if( outVecSize == 1 || (outVecSize == 3)) outSizeName[ 0 ] = 0; else sprintf( outSizeName, "%d", (int)outVecSize ); if(outVecSize == 1) { outSizeNameTmpVar[0] = 0; } else { sprintf(outSizeNameTmpVar, "%d", (int)outVecSize); } if( *outRealVecSize == 1 || ( *outRealVecSize == 3)) outRealSizeName[ 0 ] = 0; else sprintf( outRealSizeName, "%d", (int)*outRealVecSize ); // Loop through and create the source for all order strings kernelSource[ 0 ] = 0; if (vecType == kDouble) { strcat(kernelSource, "#pragma OPENCL EXTENSION cl_khr_fp64 : enable\n"); } if( shuffleMode == kFunctionCallMode ) { sprintf( progLine, shuffleFnLinePattern, get_explicit_type_name( vecType ), inSizeName, get_explicit_type_name( vecType ), inSizeName, get_explicit_type_name( vecType ), inSizeName, get_explicit_type_name( vecType ), inSizeName ); strcat(kernelSource, progLine); } // We're going to play a REALLY NASTY trick here. We're going to use the inSize insert point // to put in an entire third parameter if we need it char inParamSizeString[ 1024 ]; if( shuffleMode == kBuiltInDualInputFnMode ) sprintf( inParamSizeString, "%s *secondSource, __global %s%s", inSizeArgName, get_explicit_type_name( vecType ), inSizeArgName ); else strcpy( inParamSizeString, inSizeArgName ); // These two take care of unused variable warnings const char * src2EnableA = ( shuffleMode == kBuiltInDualInputFnMode ) ? "" : "/*"; const char * src2EnableB = ( shuffleMode == kBuiltInDualInputFnMode ) ? "" : "*/"; sprintf( progLine, shuffleKernelPattern[ 0 ], get_explicit_type_name( vecType ), inParamSizeString, get_explicit_type_name( vecType ), outRealSizeName, get_explicit_type_name( vecType ), inSizeName, src2EnableA, src2EnableB ); strcat(kernelSource, progLine); if( inOrders == NULL ) strcpy( inOrder, get_order_string( sNaturalOrder, outVecSize, (cl_uint)outVecSize, inUseNumerics, d ) ); sprintf( progLine, shuffleTempPattern, get_explicit_type_name( vecType ), outSizeNameTmpVar); strcat(kernelSource, progLine); for( unsigned int i = 0; i < numOrders; i++ ) { if( inOrders != NULL ) strcpy( inOrder, get_order_string( inOrders[ i ], outVecSize, lengthToUse[i], inUseNumerics, d ) ); strcpy( shuffledOrder, get_order_string( outOrders[ i ], outVecSize, lengthToUse[i], outUseNumerics, d ) ); sprintf( progLine, clearTempPattern, get_explicit_type_name( vecType ), outSizeName,get_explicit_type_name( vecType )); strcat(kernelSource, progLine); if( shuffleMode == kNormalMode ) { if(outVecSize == 3 && inVecSize == 3) { // shuffleSinglePatternV3srcV3dst sprintf( progLine, shuffleSinglePatternV3srcV3dst, outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i, inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i ); } else if(inVecSize == 3) { // shuffleSinglePatternV3src sprintf( progLine, shuffleSinglePatternV3src, outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i, inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i ); } else if(outVecSize == 3) { // shuffleSinglePatternV3dst sprintf( progLine, shuffleSinglePatternV3dst, outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i, inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i ); } else { sprintf( progLine, shuffleSinglePattern, outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i, inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i ); } } else if( shuffleMode == kFunctionCallMode ) { // log_info("About to make a shuffle line\n"); // fflush(stdout); if(inVecSize == 3 && outVecSize == 3) { // swap last two sprintf( progLine, shuffleFnPatternV3srcV3dst, outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i, inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i ); } else if(outVecSize == 3) { // swap last two // log_info("Here\n\n"); // fflush(stdout); sprintf( progLine, shuffleFnPatternV3dst, outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i, inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i ); // log_info("\n%s\n", progLine); // fflush(stdout); } else if(inVecSize == 3) { sprintf( progLine, shuffleFnPatternV3src, outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i, inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i ); } else { sprintf( progLine, shuffleFnPattern, outVecSize > 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", (int)i, inVecSize > 1 ? "." : "", inVecSize > 1 ? inOrder : "", (int)i ); } } else if( shuffleMode == kArrayAccessMode ) { // now we want to replace inSizeName with inSizeNameShuffleFn int vectorSizeToCastTo = 16; cl_uint item; for (item =0; item 1 ? "." : "", outVecSize > 1 ? shuffledOrder : "", get_explicit_type_name( vecType ), vectorSizeToCastTo, castVectorIndex, vectorSizeToCastTo > 1 ? "." : "", vectorSizeToCastTo > 1 ? inOrder : ""); strcat(kernelSource, progLine); } if(outVecSize == 3) { sprintf(progLine," vstore3(tmp, %d, (__global %s *)dest);\n", i, get_explicit_type_name( vecType )); // probably don't need that last // cast to (__global %s *) where %s is get_explicit_type_name( vecType) } else { sprintf(progLine," dest[%d] = tmp;\n", i ); } } else // shuffleMode == kBuiltInFnMode or kBuiltInDualInputFnMode { if(inVecSize == 3 || outVecSize == 3 || inVecSize == 1 || outVecSize == 1) { // log_info("Skipping test for size 3\n"); continue; } ExplicitType maskType = vecType; if( maskType == kFloat ) maskType = kUInt; if( maskType == kDouble) { maskType = kULong; } char maskString[ 1024 ] = ""; size_t maskSize = outVecSize;// ( shuffleMode == kBuiltInDualInputFnMode ) ? ( outVecSize << 1 ) : outVecSize; generate_shuffle_mask( maskString, maskSize, ( outOrders != NULL ) ? &outOrders[ i ] : NULL ); // Set up a quick prefix, so mask gets unsigned type regardless of the input/output type char maskPrefix[ 2 ] = "u"; if( get_explicit_type_name( maskType )[ 0 ] == 'u' ) maskPrefix[ 0 ] = 0; char progLine2[ 10240 ]; if( shuffleMode == kBuiltInDualInputFnMode ) { sprintf( progLine2, shuffleBuiltInDualPattern, get_explicit_type_name( vecType ), inSizeName, ( inVecSize == 3 ) ? "vload3( %ld, (__global %s *)source )" : "source[ %ld ]", get_explicit_type_name( vecType ), inSizeName, ( inVecSize == 3 ) ? "vload3( %ld, (__global %s *)secondSource )" : "secondSource[ %ld ]", maskPrefix, get_explicit_type_name( maskType ), outSizeName, maskPrefix, get_explicit_type_name( maskType ), outSizeName, maskString, ( outVecSize == 3 ) ? "vstore3( tmp, %ld, (__global %s *)dest )" : "dest[ %ld ] = tmp" ); if( outVecSize == 3 ) { if( inVecSize == 3 ) sprintf( progLine, progLine2, i, get_explicit_type_name( vecType ), i, get_explicit_type_name( vecType ), i, get_explicit_type_name( vecType ) ); else sprintf( progLine, progLine2, i, i, i, get_explicit_type_name( vecType ) ); } else { if( inVecSize == 3 ) sprintf( progLine, progLine2, i, get_explicit_type_name( vecType ), i, get_explicit_type_name( vecType ), i ); else sprintf( progLine, progLine2, i, i, i ); } } else { sprintf( progLine2, shuffleBuiltInPattern, get_explicit_type_name( vecType ), inSizeName, ( inVecSize == 3 ) ? "vload3( %ld, (__global %s *)source )" : "source[ %ld ]", maskPrefix, get_explicit_type_name( maskType ), outSizeName, maskPrefix, get_explicit_type_name( maskType ), outSizeName, maskString, ( outVecSize == 3 ) ? "vstore3( tmp, %ld, (__global %s *)dest )" : "dest[ %ld ] = tmp" ); if( outVecSize == 3 ) { if( inVecSize == 3 ) sprintf( progLine, progLine2, i, get_explicit_type_name( vecType ), i, get_explicit_type_name( vecType ) ); else sprintf( progLine, progLine2, i, i, get_explicit_type_name( vecType ) ); } else { if( inVecSize == 3 ) sprintf( progLine, progLine2, i, get_explicit_type_name( vecType ), i ); else sprintf( progLine, progLine2, i, i ); } } } strcat( kernelSource, progLine ); if (strlen(kernelSource) > 0.9*MAX_PROGRAM_SIZE) log_info("WARNING: Program has grown to 90%% (%d) of the defined max program size of %d\n", (int)strlen(kernelSource), (int)MAX_PROGRAM_SIZE); } strcat( kernelSource, shuffleKernelPattern[ 1 ] ); // Print the kernel source if (PRINT_SHUFFLE_KERNEL_SOURCE) log_info( "Kernel:%s\n", kernelSource ); /* Create kernel */ programPtr = kernelSource; if( create_single_kernel_helper( context, outProgram, outKernel, 1, (const char **)&programPtr, "sample_test" ) ) { return -1; } return 0; } int test_shuffle_dual_kernel(cl_context context, cl_command_queue queue, ExplicitType vecType, size_t inVecSize, size_t outVecSize, cl_uint *lengthToUse, size_t numOrders, ShuffleOrder *inOrderIdx, ShuffleOrder *outOrderIdx, bool inUseNumerics, bool outUseNumerics, MTdata d, ShuffleMode shuffleMode = kNormalMode ) { clProgramWrapper program; clKernelWrapper kernel; int error; size_t threads[1], localThreads[1]; size_t typeSize, outRealVecSize; clMemWrapper streams[ 3 ]; /* Create the source */ error = create_shuffle_kernel( context, &program, &kernel, &outRealVecSize, vecType, inVecSize, outVecSize, lengthToUse, inUseNumerics, outUseNumerics, numOrders, inOrderIdx, outOrderIdx, d, shuffleMode ); if( error != 0 ) return error; typeSize = get_explicit_type_size( vecType ); #if !(defined(_WIN32) && defined (_MSC_VER)) cl_long inData[ inVecSize * numOrders ]; cl_long inSecondData[ inVecSize * numOrders ]; cl_long outData[ outRealVecSize * numOrders ]; #else cl_long* inData = (cl_long*)_malloca(inVecSize * numOrders * sizeof(cl_long)); cl_long* inSecondData = (cl_long*)_malloca(inVecSize * numOrders * sizeof(cl_long)); cl_long* outData = (cl_long*)_malloca(outRealVecSize * numOrders * sizeof(cl_long)); #endif memset(outData, 0, outRealVecSize * numOrders * sizeof(cl_long) ); generate_random_data( vecType, (unsigned int)( numOrders * inVecSize ), d, inData ); if( shuffleMode == kBuiltInDualInputFnMode ) generate_random_data( vecType, (unsigned int)( numOrders * inVecSize ), d, inSecondData ); streams[0] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, typeSize * inVecSize * numOrders, inData, &error); test_error( error, "Unable to create input stream" ); streams[1] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, typeSize * outRealVecSize * numOrders, outData, &error); test_error( error, "Unable to create output stream" ); int argIndex = 0; if( shuffleMode == kBuiltInDualInputFnMode ) { streams[2] = clCreateBuffer(context, CL_MEM_COPY_HOST_PTR, typeSize * inVecSize * numOrders, inSecondData, &error); test_error( error, "Unable to create second input stream" ); error = clSetKernelArg( kernel, argIndex++, sizeof( streams[ 2 ] ), &streams[ 2 ] ); test_error( error, "Unable to set kernel argument" ); } // Set kernel arguments error = clSetKernelArg( kernel, argIndex++, sizeof( streams[ 0 ] ), &streams[ 0 ] ); test_error( error, "Unable to set kernel argument" ); error = clSetKernelArg( kernel, argIndex++, sizeof( streams[ 1 ] ), &streams[ 1 ] ); test_error( error, "Unable to set kernel argument" ); /* Run the kernel */ threads[0] = numOrders; error = get_max_common_work_group_size( context, kernel, threads[0], &localThreads[0] ); test_error( error, "Unable to get work group size to use" ); error = clEnqueueNDRangeKernel( queue, kernel, 1, NULL, threads, localThreads, 0, NULL, NULL ); test_error( error, "Unable to execute test kernel" ); // Read the results back error = clEnqueueReadBuffer( queue, streams[ 1 ], CL_TRUE, 0, typeSize * numOrders * outRealVecSize, outData, 0, NULL, NULL ); test_error( error, "Unable to read results" ); unsigned char *inDataPtr = (unsigned char *)inData; unsigned char *inSecondDataPtr = (unsigned char *)inSecondData; unsigned char *outDataPtr = (unsigned char *)outData; int ret = 0; int errors_printed = 0; for( size_t i = 0; i < numOrders; i++ ) { unsigned char expected[ 1024 ]; unsigned char temp[ 1024 ]; memset(expected, 0, sizeof(expected)); memset(temp, 0, sizeof(temp)); if( shuffleMode == kBuiltInFnMode ) shuffleVector( inDataPtr, expected, outOrderIdx[ i ], outVecSize, typeSize, lengthToUse[i] ); else if( shuffleMode == kBuiltInDualInputFnMode ) shuffleVectorDual( inDataPtr, inSecondDataPtr, expected, outOrderIdx[ i ], inVecSize, typeSize, lengthToUse[i] ); else { shuffleVector( inDataPtr, temp, inOrderIdx[ i ], inVecSize, typeSize, lengthToUse[i] ); shuffleVector2( temp, expected, outOrderIdx[ i ], outVecSize, typeSize, lengthToUse[i] ); } if( memcmp( expected, outDataPtr, outVecSize * typeSize ) != 0 ) { log_error( " ERROR: Shuffle test %d FAILED for %s (memory hex dump follows)\n", (int)i, get_order_name( vecType, inVecSize, outVecSize, inOrderIdx[ i ], outOrderIdx[ i ], lengthToUse[i], d, inUseNumerics, outUseNumerics ) ); print_hex_mem_dump( inDataPtr, ( shuffleMode == kBuiltInDualInputFnMode ) ? inSecondDataPtr : NULL, expected, outDataPtr, inVecSize, outVecSize, typeSize ); if( ( shuffleMode == kBuiltInFnMode ) || ( shuffleMode == kBuiltInDualInputFnMode ) ) { // Mask would've been different for every shuffle done, so we have to regen it to print it char maskString[ 1024 ]; generate_shuffle_mask( maskString, outVecSize, ( outOrderIdx != NULL ) ? &outOrderIdx[ i ] : NULL ); log_error( " Mask: %s\n", maskString ); } ret++; errors_printed++; if (errors_printed > MAX_ERRORS_TO_PRINT) { log_info("Further errors suppressed.\n"); return ret; } } inDataPtr += inVecSize * typeSize; inSecondDataPtr += inVecSize * typeSize; outDataPtr += outRealVecSize * typeSize; } return ret; } void build_random_shuffle_order( ShuffleOrder &outIndices, unsigned int length, unsigned int selectLength, bool allowRepeats, MTdata d ) { char flags[ 16 ]; memset( flags, 0, sizeof( flags ) ); for( unsigned int i = 0; i < length; i++ ) { char selector = (char)random_in_range( 0, selectLength - 1, d ); if( !allowRepeats ) { while( flags[ (int)selector ] ) selector = (char)random_in_range( 0, selectLength - 1, d ); flags[ (int)selector ] = true; } outIndices[ i ] = selector; } } class shuffleBuffer { public: shuffleBuffer( cl_context ctx, cl_command_queue queue, ExplicitType type, size_t inSize, size_t outSize, ShuffleMode mode ) { mContext = ctx; mQueue = queue; mVecType = type; mInVecSize = inSize; mOutVecSize = outSize; mShuffleMode = mode; mCount = 0; // Here's the deal with mLengthToUse[i]. // if you have, for instance // uchar4 dst; // uchar8 src; // you can do // src.s0213 = dst.s1045; // but you can also do // src.s02 = dst.s10; // which has a different effect // The intent with these "sub lengths" is to test all such // possibilities // Calculate a range of sub-lengths within the vector to copy. int i; size_t maxSize = (mInVecSize < mOutVecSize) ? mInVecSize : mOutVecSize; for(i=0; i 0 ) { err = test_shuffle_dual_kernel( mContext, mQueue, mVecType, mInVecSize, mOutVecSize, mLengthToUse, mCount, mInOrders, mOutOrders, true, true, d, mShuffleMode ); mCount = 0; } return err; } protected: cl_context mContext; cl_command_queue mQueue; ExplicitType mVecType; size_t mInVecSize, mOutVecSize, mCount; ShuffleMode mShuffleMode; cl_uint mLengthToUse[ NUM_TESTS ]; ShuffleOrder mInOrders[ NUM_TESTS ], mOutOrders[ NUM_TESTS ]; }; int test_shuffle_random(cl_device_id device, cl_context context, cl_command_queue queue, ShuffleMode shuffleMode, MTdata d ) { ExplicitType vecType[] = { kChar, kUChar, kShort, kUShort, kInt, kUInt, kLong, kULong, kFloat, kDouble }; unsigned int vecSizes[] = { 1, 2, 3, 4, 8, 16, 0 }; unsigned int srcIdx, dstIdx, typeIndex; int error = 0, totalError = 0, prevTotalError = 0; RandomSeed seed(gRandomSeed); for( typeIndex = 0; typeIndex < 10; typeIndex++ ) { //log_info( "\n\t%s... ", get_explicit_type_name( vecType[ typeIndex ] ) ); //fflush( stdout ); if (vecType[typeIndex] == kDouble) { if (!is_extension_available(device, "cl_khr_fp64")) { log_info("Extension cl_khr_fp64 not supported; skipping double tests.\n"); continue; } log_info("Testing doubles.\n"); } if ((vecType[typeIndex] == kLong || vecType[typeIndex] == kULong) && !gHasLong ) { log_info("Long types are unsupported, skipping."); continue; } error = 0; for( srcIdx = 0; vecSizes[ srcIdx ] != 0 /*&& error == 0*/; srcIdx++ ) { for( dstIdx = 0; vecSizes[ dstIdx ] != 0 /*&& error == 0*/; dstIdx++ ) { if( ( ( shuffleMode == kBuiltInDualInputFnMode ) || ( shuffleMode == kBuiltInFnMode ) ) && ( ( vecSizes[ dstIdx ] & 1 ) || ( vecSizes[ srcIdx ] & 1 ) ) ) { // Built-in shuffle functions don't work on size 1 (scalars) or size 3 (vec3s) continue; } log_info("Testing [%s%d to %s%d]... ", get_explicit_type_name( vecType[ typeIndex ] ) , vecSizes[srcIdx], get_explicit_type_name( vecType[ typeIndex ] ) , vecSizes[dstIdx]); shuffleBuffer buffer( context, queue, vecType[ typeIndex ], vecSizes[ srcIdx ], vecSizes[ dstIdx ], shuffleMode ); int numTests = NUM_TESTS*NUM_ITERATIONS_PER_TEST; for( int i = 0; i < numTests /*&& error == 0*/; i++ ) { ShuffleOrder src, dst; if( shuffleMode == kBuiltInFnMode ) { build_random_shuffle_order( dst, vecSizes[ dstIdx ], vecSizes[ srcIdx ], true, d ); } else if(shuffleMode == kBuiltInDualInputFnMode) { build_random_shuffle_order(dst, vecSizes[dstIdx], 2*vecSizes[srcIdx], true, d); } else { build_random_shuffle_order( src, vecSizes[ dstIdx ], vecSizes[ srcIdx ], true, d ); build_random_shuffle_order( dst, vecSizes[ dstIdx ], vecSizes[ dstIdx ], false, d ); } error = buffer.AddRun( src, dst, seed ); if (error) totalError++; } int test_error = buffer.Flush(seed); if (test_error) totalError++; if (totalError == prevTotalError) log_info("\tPassed.\n"); else { log_error("\tFAILED.\n"); prevTotalError = totalError; } } } } return totalError; } int test_shuffle_copy(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { RandomSeed seed(gRandomSeed); return test_shuffle_random( device, context, queue, kNormalMode, seed ); } int test_shuffle_function_call(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { RandomSeed seed(gRandomSeed); return test_shuffle_random( device, context, queue, kFunctionCallMode, seed ); } int test_shuffle_array_cast(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { RandomSeed seed(gRandomSeed); return test_shuffle_random( device, context, queue, kArrayAccessMode, seed ); } int test_shuffle_built_in(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { RandomSeed seed(gRandomSeed); return test_shuffle_random( device, context, queue, kBuiltInFnMode, seed ); } int test_shuffle_built_in_dual_input(cl_device_id device, cl_context context, cl_command_queue queue, int n_elems) { RandomSeed seed(gRandomSeed); return test_shuffle_random( device, context, queue, kBuiltInDualInputFnMode, seed ); }