You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
426 lines
13 KiB
426 lines
13 KiB
//
|
|
// Copyright (c) 2017 The Khronos Group Inc.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
//
|
|
#include "harness/compat.h"
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
#include <time.h>
|
|
#include <sys/types.h>
|
|
#include <sys/stat.h>
|
|
|
|
#include "procs.h"
|
|
#include "harness/testHarness.h"
|
|
#include "harness/errorHelpers.h"
|
|
|
|
#ifndef uchar
|
|
typedef unsigned char uchar;
|
|
#endif
|
|
|
|
#undef MIN
|
|
#define MIN(x,y) ( (x) < (y) ? (x) : (y) )
|
|
|
|
#undef MAX
|
|
#define MAX(x,y) ( (x) > (y) ? (x) : (y) )
|
|
|
|
//#define CREATE_OUTPUT 1
|
|
|
|
extern int writePPM( const char *filename, uchar *buf, int xsize, int ysize );
|
|
|
|
|
|
|
|
//--- the code for kernel executables
|
|
static const char *image_filter_src =
|
|
"constant sampler_t sampler = CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST;\n"
|
|
"\n"
|
|
"__kernel void image_filter( int n, int m, __global float *filter_weights,\n"
|
|
" read_only image2d_t src_image, write_only image2d_t dst_image )\n"
|
|
"{\n"
|
|
" int i, j;\n"
|
|
" int indx = 0;\n"
|
|
" int tid_x = get_global_id(0);\n"
|
|
" int tid_y = get_global_id(1);\n"
|
|
" float4 filter_result = (float4)( 0.f, 0.f, 0.f, 0.f );\n"
|
|
"\n"
|
|
" for (i=-m/2; i<(m+1)/2; i++){\n"
|
|
" for (j=-n/2; j<(n+1)/2; j++){\n"
|
|
" float w = filter_weights[indx++];\n"
|
|
"\n"
|
|
" if (w != 0.0f){\n"
|
|
" filter_result += w * read_imagef(src_image, sampler,\n"
|
|
" (int2)(tid_x + j, tid_y + i));\n"
|
|
" }\n"
|
|
" }\n"
|
|
" }\n"
|
|
"\n"
|
|
" write_imagef(dst_image, (int2)(tid_x, tid_y), filter_result);\n"
|
|
"}\n";
|
|
|
|
|
|
//--- equivalent non-kernel code
|
|
static void read_imagef( int x, int y, int w, int h, int nChannels, uchar *src, float *srcRgb )
|
|
{
|
|
// clamp the coords
|
|
int x0 = MIN( MAX( x, 0 ), w - 1 );
|
|
int y0 = MIN( MAX( y, 0 ), h - 1 );
|
|
|
|
// get tine index
|
|
int indx = ( y0 * w + x0 ) * nChannels;
|
|
|
|
// seed the return array
|
|
int i;
|
|
for( i = 0; i < nChannels; i++ ){
|
|
srcRgb[i] = (float)src[indx+i];
|
|
}
|
|
} // end read_imagef()
|
|
|
|
|
|
static void write_imagef( uchar *dst, int x, int y, int w, int h, int nChannels, float *dstRgb )
|
|
{
|
|
// get tine index
|
|
int indx = ( y * w + x ) * nChannels;
|
|
|
|
// seed the return array
|
|
int i;
|
|
for( i = 0; i < nChannels; i++ ){
|
|
dst[indx+i] = (uchar)dstRgb[i];
|
|
}
|
|
} // end write_imagef()
|
|
|
|
|
|
static void basicFilterPixel( int x, int y, int n, int m, int xsize, int ysize, int nChannels, const float *filter_weights, uchar *src, uchar *dst )
|
|
{
|
|
int i, j, k;
|
|
int indx = 0;
|
|
float filter_result[] = { 0.f, 0.f, 0.f, 0.f };
|
|
float srcRgb[4];
|
|
|
|
for( i = -m/2; i < (m+1)/2; i++ ){
|
|
for( j = -n/2; j < (n+1)/2; j++ ){
|
|
float w = filter_weights[indx++];
|
|
|
|
if( w != 0 ){
|
|
read_imagef( x + j, y + i, xsize, ysize, nChannels, src, srcRgb );
|
|
for( k = 0; k < nChannels; k++ ){
|
|
filter_result[k] += w * srcRgb[k];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
write_imagef( dst, x, y, xsize, ysize, nChannels, filter_result );
|
|
|
|
} // end basicFilterPixel()
|
|
|
|
|
|
//--- helper functions
|
|
static uchar *createImage( int elements, MTdata d)
|
|
{
|
|
int i;
|
|
uchar *ptr = (uchar *)malloc( elements * sizeof( cl_uchar ) );
|
|
if( ! ptr )
|
|
return NULL;
|
|
|
|
for( i = 0; i < elements; i++ ){
|
|
ptr[i] = (uchar)genrand_int32(d);
|
|
}
|
|
|
|
return ptr;
|
|
|
|
} // end createImage()
|
|
|
|
|
|
static int verifyImages( uchar *ptr0, uchar *ptr1, uchar tolerance, int xsize, int ysize, int nChannels )
|
|
{
|
|
int x, y, z;
|
|
uchar *p0 = ptr0;
|
|
uchar *p1 = ptr1;
|
|
|
|
for( y = 0; y < ysize; y++ ){
|
|
for( x = 0; x < xsize; x++ ){
|
|
for( z = 0; z < nChannels; z++ ){
|
|
if( (uchar)abs( (int)( *p0++ - *p1++ ) ) > tolerance ){
|
|
log_error( " images differ at x,y = %d,%d, channel = %d, %d to %d\n", x, y, z,
|
|
(int)p0[-1], (int)p1[-1] );
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
|
|
} // end verifyImages()
|
|
|
|
|
|
static int kernelFilter( cl_device_id device, cl_context context, cl_command_queue queue, int w, int h, int nChannels,
|
|
uchar *inptr, uchar *outptr )
|
|
{
|
|
cl_program program[1];
|
|
cl_kernel kernel[1];
|
|
cl_mem memobjs[3];
|
|
cl_image_format image_format_desc = { CL_RGBA, CL_UNORM_INT8 };
|
|
cl_event executeEvent;
|
|
cl_ulong queueStart, submitStart, writeStart, writeEnd;
|
|
size_t threads[2];
|
|
float filter_weights[] = { .1f, .1f, .1f, .1f, .2f, .1f, .1f, .1f, .1f };
|
|
int filter_w = 3, filter_h = 3;
|
|
int err = 0;
|
|
|
|
// set thread dimensions
|
|
threads[0] = w;
|
|
threads[1] = h;
|
|
|
|
// allocate the input and output image memory objects
|
|
memobjs[0] =
|
|
create_image_2d(context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
|
&image_format_desc, w, h, 0, inptr, &err);
|
|
if( memobjs[0] == (cl_mem)0 ){
|
|
log_error( " unable to create 2D image using create_image_2d\n" );
|
|
return -1;
|
|
}
|
|
|
|
memobjs[1] = create_image_2d( context, CL_MEM_WRITE_ONLY, &image_format_desc, w, h, 0, NULL, &err );
|
|
if( memobjs[1] == (cl_mem)0 ){
|
|
log_error( " unable to create 2D image using create_image_2d\n" );
|
|
clReleaseMemObject( memobjs[0] );
|
|
return -1;
|
|
}
|
|
|
|
// allocate an array memory object to load the filter weights
|
|
memobjs[2] = clCreateBuffer(
|
|
context, CL_MEM_READ_ONLY | CL_MEM_COPY_HOST_PTR,
|
|
sizeof(cl_float) * filter_w * filter_h, &filter_weights, &err);
|
|
if( memobjs[2] == (cl_mem)0 ){
|
|
log_error( " unable to create array using clCreateBuffer\n" );
|
|
clReleaseMemObject( memobjs[1] );
|
|
clReleaseMemObject( memobjs[0] );
|
|
return -1;
|
|
}
|
|
|
|
// create the compute program
|
|
err = create_single_kernel_helper( context, &program[0], &kernel[0], 1, &image_filter_src, "image_filter" );
|
|
if( err ){
|
|
clReleaseMemObject( memobjs[2] );
|
|
clReleaseMemObject( memobjs[1] );
|
|
clReleaseMemObject( memobjs[0] );
|
|
return -1;
|
|
}
|
|
|
|
|
|
// create kernel args object and set arg values.
|
|
// set the args values
|
|
err = clSetKernelArg( kernel[0], 0, sizeof( cl_int ), (void *)&filter_w );
|
|
err |= clSetKernelArg( kernel[0], 1, sizeof( cl_int ), (void *)&filter_h );
|
|
err |= clSetKernelArg( kernel[0], 2, sizeof( cl_mem ), (void *)&memobjs[2] );
|
|
err |= clSetKernelArg( kernel[0], 3, sizeof( cl_mem ), (void *)&memobjs[0] );
|
|
err |= clSetKernelArg( kernel[0], 4, sizeof( cl_mem ), (void *)&memobjs[1] );
|
|
|
|
if( err != CL_SUCCESS ){
|
|
print_error( err, "clSetKernelArg failed\n" );
|
|
clReleaseKernel( kernel[0] );
|
|
clReleaseProgram( program[0] );
|
|
clReleaseMemObject( memobjs[2] );
|
|
clReleaseMemObject( memobjs[1] );
|
|
clReleaseMemObject( memobjs[0] );
|
|
return -1;
|
|
}
|
|
|
|
err = clEnqueueNDRangeKernel( queue, kernel[0], 2, NULL, threads, NULL, 0, NULL, &executeEvent );
|
|
|
|
if( err != CL_SUCCESS ){
|
|
print_error( err, "clEnqueueNDRangeKernel failed\n" );
|
|
clReleaseEvent( executeEvent );
|
|
clReleaseKernel( kernel[0] );
|
|
clReleaseProgram( program[0] );
|
|
clReleaseMemObject( memobjs[2] );
|
|
clReleaseMemObject( memobjs[1] );
|
|
clReleaseMemObject( memobjs[0] );
|
|
return -1;
|
|
}
|
|
|
|
// This synchronization point is needed in order to assume the data is valid.
|
|
// Getting profiling information is not a synchronization point.
|
|
err = clWaitForEvents( 1, &executeEvent );
|
|
if( err != CL_SUCCESS )
|
|
{
|
|
clReleaseEvent( executeEvent );
|
|
clReleaseKernel( kernel[0] );
|
|
clReleaseProgram( program[0] );
|
|
clReleaseMemObject( memobjs[2] );
|
|
clReleaseMemObject( memobjs[1] );
|
|
clReleaseMemObject( memobjs[0] );
|
|
return -1;
|
|
}
|
|
|
|
// test profiling
|
|
while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_QUEUED, sizeof( cl_ulong ), &queueStart, NULL ) ) ==
|
|
CL_PROFILING_INFO_NOT_AVAILABLE );
|
|
if( err != CL_SUCCESS ){
|
|
print_error( err, "clGetEventProfilingInfo failed" );
|
|
clReleaseEvent( executeEvent );
|
|
clReleaseKernel( kernel[0] );
|
|
clReleaseProgram( program[0] );
|
|
clReleaseMemObject( memobjs[2] );
|
|
clReleaseMemObject( memobjs[1] );
|
|
clReleaseMemObject( memobjs[0] );
|
|
return -1;
|
|
}
|
|
|
|
while( ( err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_SUBMIT, sizeof( cl_ulong ), &submitStart, NULL ) ) ==
|
|
CL_PROFILING_INFO_NOT_AVAILABLE );
|
|
if( err != CL_SUCCESS ){
|
|
print_error( err, "clGetEventProfilingInfo failed" );
|
|
clReleaseEvent( executeEvent );
|
|
clReleaseKernel( kernel[0] );
|
|
clReleaseProgram( program[0] );
|
|
clReleaseMemObject( memobjs[2] );
|
|
clReleaseMemObject( memobjs[1] );
|
|
clReleaseMemObject( memobjs[0] );
|
|
return -1;
|
|
}
|
|
|
|
err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_START, sizeof( cl_ulong ), &writeStart, NULL );
|
|
if( err != CL_SUCCESS ){
|
|
print_error( err, "clGetEventProfilingInfo failed" );
|
|
clReleaseEvent( executeEvent );
|
|
clReleaseKernel( kernel[0] );
|
|
clReleaseProgram( program[0] );
|
|
clReleaseMemObject( memobjs[2] );
|
|
clReleaseMemObject( memobjs[1] );
|
|
clReleaseMemObject( memobjs[0] );
|
|
return -1;
|
|
}
|
|
|
|
err = clGetEventProfilingInfo( executeEvent, CL_PROFILING_COMMAND_END, sizeof( cl_ulong ), &writeEnd, NULL );
|
|
if( err != CL_SUCCESS ){
|
|
print_error( err, "clGetEventProfilingInfo failed" );
|
|
clReleaseEvent( executeEvent );
|
|
clReleaseKernel( kernel[0] );
|
|
clReleaseProgram( program[0] );
|
|
clReleaseMemObject( memobjs[2] );
|
|
clReleaseMemObject( memobjs[1] );
|
|
clReleaseMemObject( memobjs[0] );
|
|
return -1;
|
|
}
|
|
|
|
// read output image
|
|
size_t origin[3] = { 0, 0, 0 };
|
|
size_t region[3] = { w, h, 1 };
|
|
err = clEnqueueReadImage( queue, memobjs[1], true, origin, region, 0, 0, outptr, 0, NULL, NULL);
|
|
if( err != CL_SUCCESS ){
|
|
print_error( err, "clReadImage failed\n" );
|
|
clReleaseEvent( executeEvent );
|
|
clReleaseKernel( kernel[0] );
|
|
clReleaseProgram( program[0] );
|
|
clReleaseMemObject( memobjs[2] );
|
|
clReleaseMemObject( memobjs[1] );
|
|
clReleaseMemObject( memobjs[0] );
|
|
return -1;
|
|
}
|
|
|
|
// release event, kernel, program, and memory objects
|
|
clReleaseEvent( executeEvent );
|
|
clReleaseKernel( kernel[0] );
|
|
clReleaseProgram( program[0] );
|
|
clReleaseMemObject( memobjs[2] );
|
|
clReleaseMemObject( memobjs[1] );
|
|
clReleaseMemObject( memobjs[0] );
|
|
|
|
if (check_times(queueStart, submitStart, writeStart, writeEnd, device))
|
|
err = -1;
|
|
|
|
return err;
|
|
|
|
} // end kernelFilter()
|
|
|
|
|
|
static int basicFilter( int w, int h, int nChannels, uchar *inptr, uchar *outptr )
|
|
{
|
|
const float filter_weights[] = { .1f, .1f, .1f, .1f, .2f, .1f, .1f, .1f, .1f };
|
|
int filter_w = 3, filter_h = 3;
|
|
int x, y;
|
|
|
|
for( y = 0; y < h; y++ ){
|
|
for( x = 0; x < w; x++ ){
|
|
basicFilterPixel( x, y, filter_w, filter_h, w, h, nChannels, filter_weights, inptr, outptr );
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
|
|
} // end of basicFilter()
|
|
|
|
|
|
int test_execute( cl_device_id device, cl_context context, cl_command_queue queue, int num_elements )
|
|
{
|
|
uchar *inptr;
|
|
uchar *outptr[2];
|
|
int w = 256, h = 256;
|
|
int nChannels = 4;
|
|
int nElements = w * h * nChannels;
|
|
int err = 0;
|
|
MTdata d;
|
|
|
|
|
|
PASSIVE_REQUIRE_IMAGE_SUPPORT( device )
|
|
|
|
d = init_genrand( gRandomSeed );
|
|
inptr = createImage( nElements, d );
|
|
free_mtdata( d); d = NULL;
|
|
|
|
if( ! inptr ){
|
|
log_error( " unable to allocate %d bytes of memory for image\n", nElements );
|
|
return -1;
|
|
}
|
|
|
|
outptr[0] = (uchar *)malloc( nElements * sizeof( cl_uchar ) );
|
|
if( ! outptr[0] ){
|
|
log_error( " unable to allocate %d bytes of memory for output image #1\n", nElements );
|
|
free( (void *)inptr );
|
|
return -1;
|
|
}
|
|
|
|
outptr[1] = (uchar *)malloc( nElements * sizeof( cl_uchar ) );
|
|
if( ! outptr[1] ){
|
|
log_error( " unable to allocate %d bytes of memory for output image #2\n", nElements );
|
|
free( (void *)outptr[0] );
|
|
free( (void *)inptr );
|
|
return -1;
|
|
}
|
|
|
|
err = kernelFilter( device, context, queue, w, h, nChannels, inptr, outptr[0] );
|
|
|
|
if( ! err ){
|
|
basicFilter( w, h, nChannels, inptr, outptr[1] );
|
|
|
|
// verify that the images are the same
|
|
err = verifyImages( outptr[0], outptr[1], (uchar)0x1, w, h, nChannels );
|
|
if( err )
|
|
log_error( " images do not match\n" );
|
|
}
|
|
|
|
// clean up
|
|
free( (void *)outptr[1] );
|
|
free( (void *)outptr[0] );
|
|
free( (void *)inptr );
|
|
|
|
return err;
|
|
|
|
} // end execute()
|
|
|
|
|
|
|