You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
406 lines
11 KiB
406 lines
11 KiB
//
|
|
// Copyright (c) 2017 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#include "structs.h"
|
|
|
|
|
|
#include "defines.h"
|
|
|
|
#define DEBUG_MEM_ALLOC 0
|
|
|
|
/** typedef struct _bufferStruct
|
|
{
|
|
void * m_pIn;
|
|
void * m_pOut;
|
|
|
|
cl_mem m_outBuffer;
|
|
cl_mem m_inBuffer;
|
|
|
|
size_t m_bufSize;
|
|
} bufferStruct;
|
|
*/
|
|
|
|
|
|
clState *newClState(cl_device_id device, cl_context context,
|
|
cl_command_queue queue)
|
|
{
|
|
clState *pResult = (clState *)malloc(sizeof(clState));
|
|
#if DEBUG_MEM_ALLOC
|
|
log_info("malloc clState * %x\n", pResult);
|
|
#endif
|
|
|
|
pResult->m_device = device;
|
|
pResult->m_context = context;
|
|
pResult->m_queue = queue;
|
|
|
|
pResult->m_kernel = NULL;
|
|
pResult->m_program = NULL;
|
|
return pResult;
|
|
}
|
|
|
|
clState *destroyClState(clState *pState)
|
|
{
|
|
clStateDestroyProgramAndKernel(pState);
|
|
#if DEBUG_MEM_ALLOC
|
|
log_info("delete (free) clState * %x\n", pState);
|
|
#endif
|
|
free(pState);
|
|
return NULL;
|
|
}
|
|
|
|
|
|
int clStateMakeProgram(clState *pState, const char *prog,
|
|
const char *kernelName)
|
|
{
|
|
const char *srcArr[1] = { NULL };
|
|
srcArr[0] = prog;
|
|
int err =
|
|
create_single_kernel_helper(pState->m_context, &(pState->m_program),
|
|
&(pState->m_kernel), 1, srcArr, kernelName);
|
|
#if DEBUG_MEM_ALLOC
|
|
log_info("create program and kernel\n");
|
|
#endif
|
|
return err;
|
|
}
|
|
|
|
int runKernel(clState *pState, size_t numThreads)
|
|
{
|
|
int err;
|
|
pState->m_numThreads = numThreads;
|
|
err = clEnqueueNDRangeKernel(pState->m_queue, pState->m_kernel, 1, NULL,
|
|
&(pState->m_numThreads), NULL, 0, NULL, NULL);
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
log_error("clEnqueueNDRangeKernel returned %d (%x)\n", err, err);
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
void clStateDestroyProgramAndKernel(clState *pState)
|
|
{
|
|
#if DEBUG_MEM_ALLOC
|
|
log_info("destroy program and kernel\n");
|
|
#endif
|
|
if (pState->m_kernel != NULL)
|
|
{
|
|
clReleaseKernel(pState->m_kernel);
|
|
pState->m_kernel = NULL;
|
|
}
|
|
if (pState->m_program != NULL)
|
|
{
|
|
clReleaseProgram(pState->m_program);
|
|
pState->m_program = NULL;
|
|
}
|
|
}
|
|
|
|
bufferStruct *newBufferStruct(size_t inSize, size_t outSize, clState *pClState)
|
|
{
|
|
int error;
|
|
bufferStruct *pResult = (bufferStruct *)malloc(sizeof(bufferStruct));
|
|
#if DEBUG_MEM_ALLOC
|
|
log_info("malloc bufferStruct * %x\n", pResult);
|
|
#endif
|
|
|
|
pResult->m_bufSizeIn = inSize;
|
|
pResult->m_bufSizeOut = outSize;
|
|
|
|
pResult->m_pIn = malloc(inSize);
|
|
pResult->m_pOut = malloc(outSize);
|
|
#if DEBUG_MEM_ALLOC
|
|
log_info("malloc m_pIn %x\n", pResult->m_pIn);
|
|
log_info("malloc m_pOut %x\n", pResult->m_pOut);
|
|
#endif
|
|
|
|
pResult->m_inBuffer = clCreateBuffer(pClState->m_context, CL_MEM_READ_ONLY,
|
|
inSize, NULL, &error);
|
|
if (pResult->m_inBuffer == NULL)
|
|
{
|
|
vlog_error("clCreateArray failed for input (%d)\n", error);
|
|
return destroyBufferStruct(pResult, pClState);
|
|
}
|
|
#if DEBUG_MEM_ALLOC
|
|
log_info("clCreateBuffer %x\n", pResult->m_inBuffer);
|
|
#endif
|
|
|
|
pResult->m_outBuffer = clCreateBuffer(
|
|
pClState->m_context, CL_MEM_WRITE_ONLY, outSize, NULL, &error);
|
|
if (pResult->m_outBuffer == NULL)
|
|
{
|
|
vlog_error("clCreateArray failed for output (%d)\n", error);
|
|
return destroyBufferStruct(pResult, pClState);
|
|
}
|
|
#if DEBUG_MEM_ALLOC
|
|
log_info("clCreateBuffer %x\n", pResult->m_outBuffer);
|
|
#endif
|
|
|
|
pResult->m_bufferUploaded = false;
|
|
|
|
return pResult;
|
|
}
|
|
|
|
bufferStruct *destroyBufferStruct(bufferStruct *destroyMe, clState *pClState)
|
|
{
|
|
if (destroyMe)
|
|
{
|
|
if (destroyMe->m_outBuffer != NULL)
|
|
{
|
|
#if DEBUG_MEM_ALLOC
|
|
log_info("clReleaseMemObject %x\n", destroyMe->m_outBuffer);
|
|
#endif
|
|
clReleaseMemObject(destroyMe->m_outBuffer);
|
|
destroyMe->m_outBuffer = NULL;
|
|
}
|
|
if (destroyMe->m_inBuffer != NULL)
|
|
{
|
|
#if DEBUG_MEM_ALLOC
|
|
log_info("clReleaseMemObject %x\n", destroyMe->m_outBuffer);
|
|
#endif
|
|
clReleaseMemObject(destroyMe->m_inBuffer);
|
|
destroyMe->m_inBuffer = NULL;
|
|
}
|
|
if (destroyMe->m_pIn != NULL)
|
|
{
|
|
#if DEBUG_MEM_ALLOC
|
|
log_info("delete (free) m_pIn %x\n", destroyMe->m_pIn);
|
|
#endif
|
|
free(destroyMe->m_pIn);
|
|
destroyMe->m_pIn = NULL;
|
|
}
|
|
if (destroyMe->m_pOut != NULL)
|
|
{
|
|
#if DEBUG_MEM_ALLOC
|
|
log_info("delete (free) m_pOut %x\n", destroyMe->m_pOut);
|
|
#endif
|
|
free(destroyMe->m_pOut);
|
|
destroyMe->m_pOut = NULL;
|
|
}
|
|
#if DEBUG_MEM_ALLOC
|
|
log_info("delete (free) bufferStruct * %x\n", destroyMe);
|
|
#endif
|
|
free((void *)destroyMe);
|
|
destroyMe = NULL;
|
|
}
|
|
return destroyMe;
|
|
}
|
|
|
|
void initContents(bufferStruct *pBufferStruct, clState *pClState,
|
|
size_t typeSize, size_t countIn, size_t countOut)
|
|
{
|
|
size_t i;
|
|
|
|
uint64_t start = 0;
|
|
|
|
switch (typeSize)
|
|
{
|
|
case 1: {
|
|
uint8_t *ub = (uint8_t *)(pBufferStruct->m_pIn);
|
|
for (i = 0; i < countIn; ++i)
|
|
{
|
|
ub[i] = (uint8_t)start++;
|
|
}
|
|
break;
|
|
}
|
|
case 2: {
|
|
uint16_t *us = (uint16_t *)(pBufferStruct->m_pIn);
|
|
for (i = 0; i < countIn; ++i)
|
|
{
|
|
us[i] = (uint16_t)start++;
|
|
}
|
|
break;
|
|
}
|
|
case 4: {
|
|
if (!g_wimpyMode)
|
|
{
|
|
uint32_t *ui = (uint32_t *)(pBufferStruct->m_pIn);
|
|
for (i = 0; i < countIn; ++i)
|
|
{
|
|
ui[i] = (uint32_t)start++;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// The short test doesn't iterate over the entire 32 bit space
|
|
// so we alternate between positive and negative values
|
|
int32_t *ui = (int32_t *)(pBufferStruct->m_pIn);
|
|
int32_t sign = 1;
|
|
for (i = 0; i < countIn; ++i, ++start)
|
|
{
|
|
ui[i] = (int32_t)start * sign;
|
|
sign = sign * -1;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
case 8: {
|
|
// We don't iterate over the entire space of 64 bit so for the
|
|
// selects, we want to test positive and negative values
|
|
int64_t *ll = (int64_t *)(pBufferStruct->m_pIn);
|
|
int64_t sign = 1;
|
|
for (i = 0; i < countIn; ++i, ++start)
|
|
{
|
|
ll[i] = start * sign;
|
|
sign = sign * -1;
|
|
}
|
|
break;
|
|
}
|
|
default: {
|
|
log_error("invalid type size %x\n", (int)typeSize);
|
|
}
|
|
}
|
|
// pBufferStruct->m_bufSizeIn
|
|
// pBufferStruct->m_bufSizeOut
|
|
}
|
|
|
|
int pushArgs(bufferStruct *pBufferStruct, clState *pClState)
|
|
{
|
|
int err;
|
|
if (!pBufferStruct->m_bufferUploaded)
|
|
{
|
|
err = clEnqueueWriteBuffer(pClState->m_queue, pBufferStruct->m_inBuffer,
|
|
CL_TRUE, 0, pBufferStruct->m_bufSizeIn,
|
|
pBufferStruct->m_pIn, 0, NULL, NULL);
|
|
#if DEBUG_MEM_ALLOC
|
|
log_info("clEnqueueWriteBuffer %x\n", pBufferStruct->m_inBuffer);
|
|
#endif
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
log_error("clEnqueueWriteBuffer failed\n");
|
|
return -1;
|
|
}
|
|
pBufferStruct->m_bufferUploaded = true;
|
|
}
|
|
|
|
err = clSetKernelArg(
|
|
pClState->m_kernel, 0,
|
|
sizeof(pBufferStruct->m_inBuffer), // pBufferStruct->m_bufSizeIn,
|
|
&(pBufferStruct->m_inBuffer));
|
|
#if DEBUG_MEM_ALLOC
|
|
// log_info("clSetKernelArg 0, %x\n", pBufferStruct->m_inBuffer);
|
|
#endif
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
log_error("clSetKernelArgs failed, first arg (0)\n");
|
|
return -1;
|
|
}
|
|
|
|
err = clSetKernelArg(
|
|
pClState->m_kernel, 1,
|
|
sizeof(pBufferStruct->m_outBuffer), // pBufferStruct->m_bufSizeOut,
|
|
&(pBufferStruct->m_outBuffer));
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
log_error("clSetKernelArgs failed, second arg (1)\n");
|
|
return -1;
|
|
}
|
|
|
|
#if DEBUG_MEM_ALLOC
|
|
// log_info("clSetKernelArg 0, %x\n", pBufferStruct->m_outBuffer);
|
|
#endif
|
|
|
|
return 0;
|
|
}
|
|
|
|
int retrieveResults(bufferStruct *pBufferStruct, clState *pClState)
|
|
{
|
|
int err;
|
|
err = clEnqueueReadBuffer(pClState->m_queue, pBufferStruct->m_outBuffer,
|
|
CL_TRUE, 0, pBufferStruct->m_bufSizeOut,
|
|
pBufferStruct->m_pOut, 0, NULL, NULL);
|
|
if (err != CL_SUCCESS)
|
|
{
|
|
log_error("clEnqueueReadBuffer failed\n");
|
|
return -1;
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames
|
|
// and g_arrVecSizes
|
|
int checkCorrectnessAlign(bufferStruct *pBufferStruct, clState *pClState,
|
|
size_t minAlign)
|
|
{
|
|
size_t i;
|
|
cl_uint *targetArr = (cl_uint *)(pBufferStruct->m_pOut);
|
|
for (i = 0; i < pClState->m_numThreads; ++i)
|
|
{
|
|
if ((targetArr[i]) % minAlign != (cl_uint)0)
|
|
{
|
|
vlog_error("Error %d (of %d). Expected a multple of %x, got %x\n",
|
|
i, pClState->m_numThreads, minAlign, targetArr[i]);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/* log_info("\n");
|
|
for(i = 0; i < 4; ++i) {
|
|
log_info("%lx, ", targetArr[i]);
|
|
}
|
|
log_info("\n");
|
|
fflush(stdout); */
|
|
return 0;
|
|
}
|
|
|
|
int checkCorrectnessStep(bufferStruct *pBufferStruct, clState *pClState,
|
|
size_t typeSize, size_t vecWidth)
|
|
{
|
|
size_t i;
|
|
cl_int targetSize = (cl_int)vecWidth;
|
|
cl_int *targetArr = (cl_int *)(pBufferStruct->m_pOut);
|
|
if (targetSize == 3)
|
|
{
|
|
targetSize = 4; // hack for 4-aligned vec3 types
|
|
}
|
|
for (i = 0; i < pClState->m_numThreads; ++i)
|
|
{
|
|
if (targetArr[i] != targetSize)
|
|
{
|
|
vlog_error("Error %ld (of %ld). Expected %d, got %d\n", i,
|
|
pClState->m_numThreads, targetSize, targetArr[i]);
|
|
return -1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
// vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames
|
|
// and g_arrVecSizes
|
|
int checkPackedCorrectness(bufferStruct *pBufferStruct, clState *pClState,
|
|
size_t totSize, size_t beforeSize)
|
|
{
|
|
size_t i;
|
|
cl_uint *targetArr = (cl_uint *)(pBufferStruct->m_pOut);
|
|
for (i = 0; i < pClState->m_numThreads; ++i)
|
|
{
|
|
if ((targetArr[i] - beforeSize) % totSize != (cl_uint)0)
|
|
{
|
|
vlog_error("Error %d (of %d). Expected %d more than a multple of "
|
|
"%d, got %d \n",
|
|
i, pClState->m_numThreads, beforeSize, totSize,
|
|
targetArr[i]);
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
/* log_info("\n");
|
|
for(i = 0; i < 4; ++i) {
|
|
log_info("%lx, ", targetArr[i]);
|
|
}
|
|
log_info("\n");
|
|
fflush(stdout); */
|
|
return 0;
|
|
}
|