You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

406 lines
11 KiB

//
// Copyright (c) 2017 The Khronos Group Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
#include "structs.h"
#include "defines.h"
#define DEBUG_MEM_ALLOC 0
/** typedef struct _bufferStruct
{
void * m_pIn;
void * m_pOut;
cl_mem m_outBuffer;
cl_mem m_inBuffer;
size_t m_bufSize;
} bufferStruct;
*/
clState *newClState(cl_device_id device, cl_context context,
cl_command_queue queue)
{
clState *pResult = (clState *)malloc(sizeof(clState));
#if DEBUG_MEM_ALLOC
log_info("malloc clState * %x\n", pResult);
#endif
pResult->m_device = device;
pResult->m_context = context;
pResult->m_queue = queue;
pResult->m_kernel = NULL;
pResult->m_program = NULL;
return pResult;
}
clState *destroyClState(clState *pState)
{
clStateDestroyProgramAndKernel(pState);
#if DEBUG_MEM_ALLOC
log_info("delete (free) clState * %x\n", pState);
#endif
free(pState);
return NULL;
}
int clStateMakeProgram(clState *pState, const char *prog,
const char *kernelName)
{
const char *srcArr[1] = { NULL };
srcArr[0] = prog;
int err =
create_single_kernel_helper(pState->m_context, &(pState->m_program),
&(pState->m_kernel), 1, srcArr, kernelName);
#if DEBUG_MEM_ALLOC
log_info("create program and kernel\n");
#endif
return err;
}
int runKernel(clState *pState, size_t numThreads)
{
int err;
pState->m_numThreads = numThreads;
err = clEnqueueNDRangeKernel(pState->m_queue, pState->m_kernel, 1, NULL,
&(pState->m_numThreads), NULL, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueNDRangeKernel returned %d (%x)\n", err, err);
return -1;
}
return 0;
}
void clStateDestroyProgramAndKernel(clState *pState)
{
#if DEBUG_MEM_ALLOC
log_info("destroy program and kernel\n");
#endif
if (pState->m_kernel != NULL)
{
clReleaseKernel(pState->m_kernel);
pState->m_kernel = NULL;
}
if (pState->m_program != NULL)
{
clReleaseProgram(pState->m_program);
pState->m_program = NULL;
}
}
bufferStruct *newBufferStruct(size_t inSize, size_t outSize, clState *pClState)
{
int error;
bufferStruct *pResult = (bufferStruct *)malloc(sizeof(bufferStruct));
#if DEBUG_MEM_ALLOC
log_info("malloc bufferStruct * %x\n", pResult);
#endif
pResult->m_bufSizeIn = inSize;
pResult->m_bufSizeOut = outSize;
pResult->m_pIn = malloc(inSize);
pResult->m_pOut = malloc(outSize);
#if DEBUG_MEM_ALLOC
log_info("malloc m_pIn %x\n", pResult->m_pIn);
log_info("malloc m_pOut %x\n", pResult->m_pOut);
#endif
pResult->m_inBuffer = clCreateBuffer(pClState->m_context, CL_MEM_READ_ONLY,
inSize, NULL, &error);
if (pResult->m_inBuffer == NULL)
{
vlog_error("clCreateArray failed for input (%d)\n", error);
return destroyBufferStruct(pResult, pClState);
}
#if DEBUG_MEM_ALLOC
log_info("clCreateBuffer %x\n", pResult->m_inBuffer);
#endif
pResult->m_outBuffer = clCreateBuffer(
pClState->m_context, CL_MEM_WRITE_ONLY, outSize, NULL, &error);
if (pResult->m_outBuffer == NULL)
{
vlog_error("clCreateArray failed for output (%d)\n", error);
return destroyBufferStruct(pResult, pClState);
}
#if DEBUG_MEM_ALLOC
log_info("clCreateBuffer %x\n", pResult->m_outBuffer);
#endif
pResult->m_bufferUploaded = false;
return pResult;
}
bufferStruct *destroyBufferStruct(bufferStruct *destroyMe, clState *pClState)
{
if (destroyMe)
{
if (destroyMe->m_outBuffer != NULL)
{
#if DEBUG_MEM_ALLOC
log_info("clReleaseMemObject %x\n", destroyMe->m_outBuffer);
#endif
clReleaseMemObject(destroyMe->m_outBuffer);
destroyMe->m_outBuffer = NULL;
}
if (destroyMe->m_inBuffer != NULL)
{
#if DEBUG_MEM_ALLOC
log_info("clReleaseMemObject %x\n", destroyMe->m_outBuffer);
#endif
clReleaseMemObject(destroyMe->m_inBuffer);
destroyMe->m_inBuffer = NULL;
}
if (destroyMe->m_pIn != NULL)
{
#if DEBUG_MEM_ALLOC
log_info("delete (free) m_pIn %x\n", destroyMe->m_pIn);
#endif
free(destroyMe->m_pIn);
destroyMe->m_pIn = NULL;
}
if (destroyMe->m_pOut != NULL)
{
#if DEBUG_MEM_ALLOC
log_info("delete (free) m_pOut %x\n", destroyMe->m_pOut);
#endif
free(destroyMe->m_pOut);
destroyMe->m_pOut = NULL;
}
#if DEBUG_MEM_ALLOC
log_info("delete (free) bufferStruct * %x\n", destroyMe);
#endif
free((void *)destroyMe);
destroyMe = NULL;
}
return destroyMe;
}
void initContents(bufferStruct *pBufferStruct, clState *pClState,
size_t typeSize, size_t countIn, size_t countOut)
{
size_t i;
uint64_t start = 0;
switch (typeSize)
{
case 1: {
uint8_t *ub = (uint8_t *)(pBufferStruct->m_pIn);
for (i = 0; i < countIn; ++i)
{
ub[i] = (uint8_t)start++;
}
break;
}
case 2: {
uint16_t *us = (uint16_t *)(pBufferStruct->m_pIn);
for (i = 0; i < countIn; ++i)
{
us[i] = (uint16_t)start++;
}
break;
}
case 4: {
if (!g_wimpyMode)
{
uint32_t *ui = (uint32_t *)(pBufferStruct->m_pIn);
for (i = 0; i < countIn; ++i)
{
ui[i] = (uint32_t)start++;
}
}
else
{
// The short test doesn't iterate over the entire 32 bit space
// so we alternate between positive and negative values
int32_t *ui = (int32_t *)(pBufferStruct->m_pIn);
int32_t sign = 1;
for (i = 0; i < countIn; ++i, ++start)
{
ui[i] = (int32_t)start * sign;
sign = sign * -1;
}
}
break;
}
case 8: {
// We don't iterate over the entire space of 64 bit so for the
// selects, we want to test positive and negative values
int64_t *ll = (int64_t *)(pBufferStruct->m_pIn);
int64_t sign = 1;
for (i = 0; i < countIn; ++i, ++start)
{
ll[i] = start * sign;
sign = sign * -1;
}
break;
}
default: {
log_error("invalid type size %x\n", (int)typeSize);
}
}
// pBufferStruct->m_bufSizeIn
// pBufferStruct->m_bufSizeOut
}
int pushArgs(bufferStruct *pBufferStruct, clState *pClState)
{
int err;
if (!pBufferStruct->m_bufferUploaded)
{
err = clEnqueueWriteBuffer(pClState->m_queue, pBufferStruct->m_inBuffer,
CL_TRUE, 0, pBufferStruct->m_bufSizeIn,
pBufferStruct->m_pIn, 0, NULL, NULL);
#if DEBUG_MEM_ALLOC
log_info("clEnqueueWriteBuffer %x\n", pBufferStruct->m_inBuffer);
#endif
if (err != CL_SUCCESS)
{
log_error("clEnqueueWriteBuffer failed\n");
return -1;
}
pBufferStruct->m_bufferUploaded = true;
}
err = clSetKernelArg(
pClState->m_kernel, 0,
sizeof(pBufferStruct->m_inBuffer), // pBufferStruct->m_bufSizeIn,
&(pBufferStruct->m_inBuffer));
#if DEBUG_MEM_ALLOC
// log_info("clSetKernelArg 0, %x\n", pBufferStruct->m_inBuffer);
#endif
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed, first arg (0)\n");
return -1;
}
err = clSetKernelArg(
pClState->m_kernel, 1,
sizeof(pBufferStruct->m_outBuffer), // pBufferStruct->m_bufSizeOut,
&(pBufferStruct->m_outBuffer));
if (err != CL_SUCCESS)
{
log_error("clSetKernelArgs failed, second arg (1)\n");
return -1;
}
#if DEBUG_MEM_ALLOC
// log_info("clSetKernelArg 0, %x\n", pBufferStruct->m_outBuffer);
#endif
return 0;
}
int retrieveResults(bufferStruct *pBufferStruct, clState *pClState)
{
int err;
err = clEnqueueReadBuffer(pClState->m_queue, pBufferStruct->m_outBuffer,
CL_TRUE, 0, pBufferStruct->m_bufSizeOut,
pBufferStruct->m_pOut, 0, NULL, NULL);
if (err != CL_SUCCESS)
{
log_error("clEnqueueReadBuffer failed\n");
return -1;
}
return 0;
}
// vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames
// and g_arrVecSizes
int checkCorrectnessAlign(bufferStruct *pBufferStruct, clState *pClState,
size_t minAlign)
{
size_t i;
cl_uint *targetArr = (cl_uint *)(pBufferStruct->m_pOut);
for (i = 0; i < pClState->m_numThreads; ++i)
{
if ((targetArr[i]) % minAlign != (cl_uint)0)
{
vlog_error("Error %d (of %d). Expected a multple of %x, got %x\n",
i, pClState->m_numThreads, minAlign, targetArr[i]);
return -1;
}
}
/* log_info("\n");
for(i = 0; i < 4; ++i) {
log_info("%lx, ", targetArr[i]);
}
log_info("\n");
fflush(stdout); */
return 0;
}
int checkCorrectnessStep(bufferStruct *pBufferStruct, clState *pClState,
size_t typeSize, size_t vecWidth)
{
size_t i;
cl_int targetSize = (cl_int)vecWidth;
cl_int *targetArr = (cl_int *)(pBufferStruct->m_pOut);
if (targetSize == 3)
{
targetSize = 4; // hack for 4-aligned vec3 types
}
for (i = 0; i < pClState->m_numThreads; ++i)
{
if (targetArr[i] != targetSize)
{
vlog_error("Error %ld (of %ld). Expected %d, got %d\n", i,
pClState->m_numThreads, targetSize, targetArr[i]);
return -1;
}
}
return 0;
}
// vecSizeIdx indexes into g_arrVecAlignMasks, g_arrVecSizeNames
// and g_arrVecSizes
int checkPackedCorrectness(bufferStruct *pBufferStruct, clState *pClState,
size_t totSize, size_t beforeSize)
{
size_t i;
cl_uint *targetArr = (cl_uint *)(pBufferStruct->m_pOut);
for (i = 0; i < pClState->m_numThreads; ++i)
{
if ((targetArr[i] - beforeSize) % totSize != (cl_uint)0)
{
vlog_error("Error %d (of %d). Expected %d more than a multple of "
"%d, got %d \n",
i, pClState->m_numThreads, beforeSize, totSize,
targetArr[i]);
return -1;
}
}
/* log_info("\n");
for(i = 0; i < 4; ++i) {
log_info("%lx, ", targetArr[i]);
}
log_info("\n");
fflush(stdout); */
return 0;
}