You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
2275 lines
86 KiB
2275 lines
86 KiB
/*-------------------------------------------------------------------------
|
|
* drawElements Quality Program OpenGL ES 2.0 Module
|
|
* -------------------------------------------------
|
|
*
|
|
* Copyright 2014 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*
|
|
*//*!
|
|
* \file
|
|
* \brief Shader operator performance tests.
|
|
*//*--------------------------------------------------------------------*/
|
|
|
|
#include "es2pShaderOperatorTests.hpp"
|
|
#include "glsCalibration.hpp"
|
|
#include "gluShaderUtil.hpp"
|
|
#include "gluShaderProgram.hpp"
|
|
#include "gluPixelTransfer.hpp"
|
|
#include "tcuTestLog.hpp"
|
|
#include "tcuRenderTarget.hpp"
|
|
#include "tcuCommandLine.hpp"
|
|
#include "tcuSurface.hpp"
|
|
#include "deStringUtil.hpp"
|
|
#include "deSharedPtr.hpp"
|
|
#include "deClock.h"
|
|
#include "deMath.h"
|
|
|
|
#include "glwEnums.hpp"
|
|
#include "glwFunctions.hpp"
|
|
|
|
#include <map>
|
|
#include <algorithm>
|
|
#include <limits>
|
|
#include <set>
|
|
|
|
namespace deqp
|
|
{
|
|
namespace gles2
|
|
{
|
|
namespace Performance
|
|
{
|
|
|
|
using namespace gls;
|
|
using namespace glu;
|
|
using tcu::Vec2;
|
|
using tcu::Vec4;
|
|
using tcu::TestLog;
|
|
using de::SharedPtr;
|
|
|
|
using std::string;
|
|
using std::vector;
|
|
|
|
#define MEASUREMENT_FAIL() throw tcu::InternalError("Unable to get sensible measurements for estimation", DE_NULL, __FILE__, __LINE__)
|
|
|
|
// Number of measurements in OperatorPerformanceCase for each workload size, unless specified otherwise by a command line argument.
|
|
static const int DEFAULT_NUM_MEASUREMENTS_PER_WORKLOAD = 3;
|
|
// How many different workload sizes are used by OperatorPerformanceCase.
|
|
static const int NUM_WORKLOADS = 8;
|
|
// Maximum workload size that can be attempted. In a sensible case, this most likely won't be reached.
|
|
static const int MAX_WORKLOAD_SIZE = 1<<29;
|
|
|
|
// BinaryOpCase-specific constants for shader generation.
|
|
static const int BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS = 4;
|
|
static const int BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT = 2;
|
|
static const int BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT = 4;
|
|
|
|
// FunctionCase-specific constants for shader generation.
|
|
static const int FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS = 4;
|
|
|
|
static const char* const s_swizzles[][4] =
|
|
{
|
|
{ "x", "yx", "yzx", "wzyx" },
|
|
{ "y", "zy", "wyz", "xwzy" },
|
|
{ "z", "wy", "zxy", "yzwx" },
|
|
{ "w", "xw", "yxw", "zyxw" }
|
|
};
|
|
|
|
template <int N>
|
|
static tcu::Vector<float, N> mean (const vector<tcu::Vector<float, N> >& data)
|
|
{
|
|
tcu::Vector<float, N> sum(0.0f);
|
|
for (int i = 0; i < (int)data.size(); i++)
|
|
sum += data[i];
|
|
return sum / tcu::Vector<float, N>((float)data.size());
|
|
}
|
|
|
|
static void uniformNfv (const glw::Functions& gl, int n, int location, int count, const float* data)
|
|
{
|
|
switch (n)
|
|
{
|
|
case 1: gl.uniform1fv(location, count, data); break;
|
|
case 2: gl.uniform2fv(location, count, data); break;
|
|
case 3: gl.uniform3fv(location, count, data); break;
|
|
case 4: gl.uniform4fv(location, count, data); break;
|
|
default: DE_ASSERT(false);
|
|
}
|
|
}
|
|
|
|
static void uniformNiv (const glw::Functions& gl, int n, int location, int count, const int* data)
|
|
{
|
|
switch (n)
|
|
{
|
|
case 1: gl.uniform1iv(location, count, data); break;
|
|
case 2: gl.uniform2iv(location, count, data); break;
|
|
case 3: gl.uniform3iv(location, count, data); break;
|
|
case 4: gl.uniform4iv(location, count, data); break;
|
|
default: DE_ASSERT(false);
|
|
}
|
|
}
|
|
|
|
static void uniformMatrixNfv (const glw::Functions& gl, int n, int location, int count, const float* data)
|
|
{
|
|
switch (n)
|
|
{
|
|
case 2: gl.uniformMatrix2fv(location, count, GL_FALSE, &data[0]); break;
|
|
case 3: gl.uniformMatrix3fv(location, count, GL_FALSE, &data[0]); break;
|
|
case 4: gl.uniformMatrix4fv(location, count, GL_FALSE, &data[0]); break;
|
|
default: DE_ASSERT(false);
|
|
}
|
|
}
|
|
|
|
static glu::DataType getDataTypeFloatOrVec (int size)
|
|
{
|
|
return size == 1 ? glu::TYPE_FLOAT : glu::getDataTypeFloatVec(size);
|
|
}
|
|
|
|
static int getIterationCountOrDefault (const tcu::CommandLine& cmdLine, int def)
|
|
{
|
|
const int cmdLineVal = cmdLine.getTestIterationCount();
|
|
return cmdLineVal > 0 ? cmdLineVal : def;
|
|
}
|
|
|
|
static string lineParamsString (const LineParameters& params)
|
|
{
|
|
return "y = " + de::toString(params.offset) + " + " + de::toString(params.coefficient) + "*x";
|
|
}
|
|
|
|
namespace
|
|
{
|
|
|
|
/*--------------------------------------------------------------------*//*!
|
|
* \brief Abstract class for measuring shader operator performance.
|
|
*
|
|
* This class draws multiple times with different workload sizes (set
|
|
* via a uniform, by subclass). Time for each frame is measured, and the
|
|
* slope of the workload size vs frame time data is estimated. This slope
|
|
* tells us the estimated increase in frame time caused by a workload
|
|
* increase of 1 unit (what 1 workload unit means is up to subclass).
|
|
*
|
|
* Generally, the shaders contain not just the operation we're interested
|
|
* in (e.g. addition) but also some other stuff (e.g. loop overhead). To
|
|
* eliminate this cost, we actually do the stuff described in the above
|
|
* paragraph with multiple programs (usually two), which contain different
|
|
* kinds of workload (e.g. different loop contents). Then we can (in
|
|
* theory) compute the cost of just one operation in a subclass-dependent
|
|
* manner.
|
|
*
|
|
* At this point, the result tells us the increase in frame time caused
|
|
* by the addition of one operation. Dividing this by the amount of
|
|
* draw calls in a frame, and further by the amount of vertices or
|
|
* fragments in a draw call, we get the time cost of one operation.
|
|
*
|
|
* In reality, there sometimes isn't just a trivial linear dependence
|
|
* between workload size and frame time. Instead, there tends to be some
|
|
* amount of initial "free" operations. That is, it may be that all
|
|
* workload sizes below some positive integer C yield the same frame time,
|
|
* and only workload sizes beyond C increase the frame time in a supposedly
|
|
* linear manner. Graphically, this means that there graph consists of two
|
|
* parts: a horizontal left part, and a linearly increasing right part; the
|
|
* right part starts where the left parts ends. The principal task of these
|
|
* tests is to look at the slope of the increasing right part. Additionally
|
|
* an estimate for the amount of initial free operations is calculated.
|
|
* Note that it is also normal to get graphs where the horizontal left part
|
|
* is of zero width, i.e. there are no free operations.
|
|
*//*--------------------------------------------------------------------*/
|
|
class OperatorPerformanceCase : public tcu::TestCase
|
|
{
|
|
public:
|
|
enum CaseType
|
|
{
|
|
CASETYPE_VERTEX = 0,
|
|
CASETYPE_FRAGMENT,
|
|
|
|
CASETYPE_LAST
|
|
};
|
|
|
|
struct InitialCalibration
|
|
{
|
|
int initialNumCalls;
|
|
InitialCalibration (void) : initialNumCalls(1) {}
|
|
};
|
|
|
|
typedef SharedPtr<InitialCalibration> InitialCalibrationStorage;
|
|
|
|
OperatorPerformanceCase (tcu::TestContext& testCtx, glu::RenderContext& renderCtx, const char* name, const char* description,
|
|
CaseType caseType, int numWorkloads, const InitialCalibrationStorage& initialCalibrationStorage);
|
|
~OperatorPerformanceCase (void);
|
|
|
|
void init (void);
|
|
void deinit (void);
|
|
|
|
IterateResult iterate (void);
|
|
|
|
struct AttribSpec
|
|
{
|
|
AttribSpec (const char* name_, const tcu::Vec4& p00_, const tcu::Vec4& p01_, const tcu::Vec4& p10_, const tcu::Vec4& p11_)
|
|
: name (name_)
|
|
, p00 (p00_)
|
|
, p01 (p01_)
|
|
, p10 (p10_)
|
|
, p11 (p11_)
|
|
{
|
|
}
|
|
|
|
AttribSpec (void) {}
|
|
|
|
std::string name;
|
|
tcu::Vec4 p00; //!< Bottom left.
|
|
tcu::Vec4 p01; //!< Bottom right.
|
|
tcu::Vec4 p10; //!< Top left.
|
|
tcu::Vec4 p11; //!< Top right.
|
|
};
|
|
|
|
protected:
|
|
struct ProgramContext
|
|
{
|
|
string vertShaderSource;
|
|
string fragShaderSource;
|
|
vector<AttribSpec> attributes;
|
|
|
|
string description;
|
|
|
|
ProgramContext (void) {}
|
|
ProgramContext (const string& vs, const string& fs, const vector<AttribSpec>& attrs, const string& desc)
|
|
: vertShaderSource(vs), fragShaderSource(fs), attributes(attrs), description(desc) {}
|
|
};
|
|
|
|
virtual vector<ProgramContext> generateProgramData (void) const = 0;
|
|
//! Sets program-specific uniforms that don't depend on the workload size.
|
|
virtual void setGeneralUniforms (deUint32 program) const = 0;
|
|
//! Sets the uniform(s) that specifies the workload size in the shader.
|
|
virtual void setWorkloadSizeUniform (deUint32 program, int workload) const = 0;
|
|
//! Computes the cost of a single operation, given the workload costs per program.
|
|
virtual float computeSingleOperationTime (const vector<float>& perProgramWorkloadCosts) const = 0;
|
|
//! Logs a human-readable description of what computeSingleOperationTime does.
|
|
virtual void logSingleOperationCalculationInfo (void) const = 0;
|
|
|
|
glu::RenderContext& m_renderCtx;
|
|
|
|
CaseType m_caseType;
|
|
|
|
private:
|
|
enum State
|
|
{
|
|
STATE_CALIBRATING = 0, //!< Calibrate draw call count, using first program in m_programs, with workload size 1.
|
|
STATE_FIND_HIGH_WORKLOAD, //!< Find an appropriate lower bound for the highest workload size we intend to use (one with high-enough frame time compared to workload size 1) for each program.
|
|
STATE_MEASURING, //!< Do actual measurements, for each program in m_programs.
|
|
STATE_REPORTING, //!< Measurements are done; calculate results and log.
|
|
STATE_FINISHED, //!< All done.
|
|
|
|
STATE_LAST
|
|
};
|
|
|
|
struct WorkloadRecord
|
|
{
|
|
int workloadSize;
|
|
vector<float> frameTimes; //!< In microseconds.
|
|
|
|
WorkloadRecord (int workloadSize_) : workloadSize(workloadSize_) {}
|
|
bool operator< (const WorkloadRecord& other) const { return this->workloadSize < other.workloadSize; }
|
|
void addFrameTime (float time) { frameTimes.push_back(time); }
|
|
float getMedianTime (void) const
|
|
{
|
|
vector<float> times = frameTimes;
|
|
std::sort(times.begin(), times.end());
|
|
return times.size() % 2 == 0 ?
|
|
(times[times.size()/2-1] + times[times.size()/2])*0.5f :
|
|
times[times.size()/2];
|
|
}
|
|
};
|
|
|
|
void prepareProgram (int progNdx); //!< Sets attributes and uniforms for m_programs[progNdx].
|
|
void prepareWorkload (int progNdx, int workload); //!< Calls setWorkloadSizeUniform and draws, in case the implementation does some draw-time compilation.
|
|
void prepareNextRound (void); //!< Increases workload and/or updates m_state.
|
|
void render (int numDrawCalls);
|
|
deUint64 renderAndMeasure (int numDrawCalls);
|
|
void adjustAndLogGridAndViewport (void); //!< Log grid and viewport sizes, after possibly reducing them to reduce draw time.
|
|
|
|
vector<Vec2> getWorkloadMedianDataPoints (int progNdx) const; //!< [ Vec2(r.workloadSize, r.getMedianTime()) for r in m_workloadRecords[progNdx] ]
|
|
|
|
const int m_numMeasurementsPerWorkload;
|
|
const int m_numWorkloads; //!< How many different workload sizes are used for measurement for each program.
|
|
|
|
int m_workloadNdx; //!< Runs from 0 to m_numWorkloads-1.
|
|
|
|
int m_workloadMeasurementNdx;
|
|
vector<vector<WorkloadRecord> > m_workloadRecordsFindHigh; //!< The measurements done during STATE_FIND_HIGH_WORKLOAD.
|
|
vector<vector<WorkloadRecord> > m_workloadRecords; //!< The measurements of each program in m_programs. Generated during STATE_MEASURING, into index specified by m_measureProgramNdx.
|
|
|
|
State m_state;
|
|
int m_measureProgramNdx; //!< When m_state is STATE_FIND_HIGH_WORKLOAD or STATE_MEASURING, this tells which program in m_programs is being measured.
|
|
|
|
vector<int> m_highWorkloadSizes; //!< The first workload size encountered during STATE_FIND_HIGH_WORKLOAD that was determined suitable, for each program.
|
|
|
|
TheilSenCalibrator m_calibrator;
|
|
InitialCalibrationStorage m_initialCalibrationStorage;
|
|
|
|
int m_viewportWidth;
|
|
int m_viewportHeight;
|
|
int m_gridSizeX;
|
|
int m_gridSizeY;
|
|
|
|
vector<ProgramContext> m_programData;
|
|
vector<SharedPtr<ShaderProgram> > m_programs;
|
|
|
|
std::vector<deUint32> m_attribBuffers;
|
|
};
|
|
|
|
static inline float triangleInterpolate (float v0, float v1, float v2, float x, float y)
|
|
{
|
|
return v0 + (v2-v0)*x + (v1-v0)*y;
|
|
}
|
|
|
|
static inline float triQuadInterpolate (float x, float y, const tcu::Vec4& quad)
|
|
{
|
|
// \note Top left fill rule.
|
|
if (x + y < 1.0f)
|
|
return triangleInterpolate(quad.x(), quad.y(), quad.z(), x, y);
|
|
else
|
|
return triangleInterpolate(quad.w(), quad.z(), quad.y(), 1.0f-x, 1.0f-y);
|
|
}
|
|
|
|
static inline int getNumVertices (int gridSizeX, int gridSizeY)
|
|
{
|
|
return gridSizeX * gridSizeY * 2 * 3;
|
|
}
|
|
|
|
static void generateVertices (std::vector<float>& dst, int gridSizeX, int gridSizeY, const OperatorPerformanceCase::AttribSpec& spec)
|
|
{
|
|
const int numComponents = 4;
|
|
|
|
DE_ASSERT(gridSizeX >= 1 && gridSizeY >= 1);
|
|
dst.resize(getNumVertices(gridSizeX, gridSizeY) * numComponents);
|
|
|
|
{
|
|
int dstNdx = 0;
|
|
|
|
for (int baseY = 0; baseY < gridSizeY; baseY++)
|
|
for (int baseX = 0; baseX < gridSizeX; baseX++)
|
|
{
|
|
const float xf0 = (float)(baseX + 0) / (float)gridSizeX;
|
|
const float yf0 = (float)(baseY + 0) / (float)gridSizeY;
|
|
const float xf1 = (float)(baseX + 1) / (float)gridSizeX;
|
|
const float yf1 = (float)(baseY + 1) / (float)gridSizeY;
|
|
|
|
#define ADD_VERTEX(XF, YF) \
|
|
for (int compNdx = 0; compNdx < numComponents; compNdx++) \
|
|
dst[dstNdx++] = triQuadInterpolate((XF), (YF), tcu::Vec4(spec.p00[compNdx], spec.p01[compNdx], spec.p10[compNdx], spec.p11[compNdx]))
|
|
|
|
ADD_VERTEX(xf0, yf0);
|
|
ADD_VERTEX(xf1, yf0);
|
|
ADD_VERTEX(xf0, yf1);
|
|
|
|
ADD_VERTEX(xf1, yf0);
|
|
ADD_VERTEX(xf1, yf1);
|
|
ADD_VERTEX(xf0, yf1);
|
|
|
|
#undef ADD_VERTEX
|
|
}
|
|
}
|
|
}
|
|
|
|
static float intersectionX (const gls::LineParameters& a, const gls::LineParameters& b)
|
|
{
|
|
return (a.offset - b.offset) / (b.coefficient - a.coefficient);
|
|
}
|
|
|
|
static int numDistinctX (const vector<Vec2>& data)
|
|
{
|
|
std::set<float> xs;
|
|
for (int i = 0; i < (int)data.size(); i++)
|
|
xs.insert(data[i].x());
|
|
return (int)xs.size();
|
|
}
|
|
|
|
static gls::LineParameters simpleLinearRegression (const vector<Vec2>& data)
|
|
{
|
|
const Vec2 mid = mean(data);
|
|
|
|
float slopeNumerator = 0.0f;
|
|
float slopeDenominator = 0.0f;
|
|
|
|
for (int i = 0; i < (int)data.size(); i++)
|
|
{
|
|
const Vec2 diff = data[i] - mid;
|
|
|
|
slopeNumerator += diff.x()*diff.y();
|
|
slopeDenominator += diff.x()*diff.x();
|
|
}
|
|
|
|
const float slope = slopeNumerator / slopeDenominator;
|
|
const float offset = mid.y() - slope*mid.x();
|
|
|
|
return gls::LineParameters(offset, slope);
|
|
}
|
|
|
|
static float simpleLinearRegressionError (const vector<Vec2>& data)
|
|
{
|
|
if (numDistinctX(data) <= 2)
|
|
return 0.0f;
|
|
else
|
|
{
|
|
const gls::LineParameters estimator = simpleLinearRegression(data);
|
|
float error = 0.0f;
|
|
|
|
for (int i = 0; i < (int)data.size(); i++)
|
|
{
|
|
const float estY = estimator.offset + estimator.coefficient*data[i].x();
|
|
const float diff = estY - data[i].y();
|
|
error += diff*diff;
|
|
}
|
|
|
|
return error / (float)data.size();
|
|
}
|
|
}
|
|
|
|
static float verticalVariance (const vector<Vec2>& data)
|
|
{
|
|
if (numDistinctX(data) <= 2)
|
|
return 0.0f;
|
|
else
|
|
{
|
|
const float meanY = mean(data).y();
|
|
float error = 0.0f;
|
|
|
|
for (int i = 0; i < (int)data.size(); i++)
|
|
{
|
|
const float diff = meanY - data[i].y();
|
|
error += diff*diff;
|
|
}
|
|
|
|
return error / (float)data.size();
|
|
}
|
|
}
|
|
|
|
/*--------------------------------------------------------------------*//*!
|
|
* \brief Find the x coord that divides the input data into two slopes.
|
|
*
|
|
* The operator performance measurements tend to produce results where
|
|
* we get small operation counts "for free" (e.g. because the operations
|
|
* are performed during some memory transfer overhead or something),
|
|
* resulting in a curve with two parts: an initial horizontal line segment,
|
|
* and a rising line.
|
|
*
|
|
* This function finds the x coordinate that divides the input data into
|
|
* two parts such that the sum of the mean square errors for the
|
|
* least-squares estimated lines for the two parts is minimized, under the
|
|
* additional condition that the left line is horizontal.
|
|
*
|
|
* This function returns a number X s.t. { pt | pt is in data, pt.x >= X }
|
|
* is the right line, and the rest of data is the left line.
|
|
*//*--------------------------------------------------------------------*/
|
|
static float findSlopePivotX (const vector<Vec2>& data)
|
|
{
|
|
std::set<float> xCoords;
|
|
for (int i = 0; i < (int)data.size(); i++)
|
|
xCoords.insert(data[i].x());
|
|
|
|
float lowestError = std::numeric_limits<float>::infinity();
|
|
float bestPivotX = -std::numeric_limits<float>::infinity();
|
|
|
|
for (std::set<float>::const_iterator pivotX = xCoords.begin(); pivotX != xCoords.end(); ++pivotX)
|
|
{
|
|
vector<Vec2> leftData;
|
|
vector<Vec2> rightData;
|
|
for (int i = 0; i < (int)data.size(); i++)
|
|
{
|
|
if (data[i].x() < *pivotX)
|
|
leftData.push_back(data[i]);
|
|
else
|
|
rightData.push_back(data[i]);
|
|
}
|
|
|
|
if (numDistinctX(rightData) < 3) // We don't trust the right data if there's too little of it.
|
|
break;
|
|
|
|
{
|
|
const float totalError = verticalVariance(leftData) + simpleLinearRegressionError(rightData);
|
|
|
|
if (totalError < lowestError)
|
|
{
|
|
lowestError = totalError;
|
|
bestPivotX = *pivotX;
|
|
}
|
|
}
|
|
}
|
|
|
|
DE_ASSERT(lowestError < std::numeric_limits<float>::infinity());
|
|
|
|
return bestPivotX;
|
|
}
|
|
|
|
struct SegmentedEstimator
|
|
{
|
|
float pivotX; //!< Value returned by findSlopePivotX, or -infinity if only single line.
|
|
gls::LineParameters left;
|
|
gls::LineParameters right;
|
|
SegmentedEstimator (const gls::LineParameters& l, const gls::LineParameters& r, float pivotX_) : pivotX(pivotX_), left(l), right(r) {}
|
|
};
|
|
|
|
/*--------------------------------------------------------------------*//*!
|
|
* \brief Compute line estimators for (potentially) two-segment data.
|
|
*
|
|
* Splits the given data into left and right parts (using findSlopePivotX)
|
|
* and returns the line estimates for them.
|
|
*
|
|
* Sometimes, however (especially in fragment shader cases) the data is
|
|
* in fact not segmented, but a straight line. This function attempts to
|
|
* detect if this the case, and if so, sets left.offset = right.offset and
|
|
* left.slope = 0, meaning essentially that the initial "flat" part of the
|
|
* data has zero width.
|
|
*//*--------------------------------------------------------------------*/
|
|
static SegmentedEstimator computeSegmentedEstimator (const vector<Vec2>& data)
|
|
{
|
|
const float pivotX = findSlopePivotX(data);
|
|
vector<Vec2> leftData;
|
|
vector<Vec2> rightData;
|
|
|
|
for (int i = 0; i < (int)data.size(); i++)
|
|
{
|
|
if (data[i].x() < pivotX)
|
|
leftData.push_back(data[i]);
|
|
else
|
|
rightData.push_back(data[i]);
|
|
}
|
|
|
|
{
|
|
const gls::LineParameters leftLine = gls::theilSenLinearRegression(leftData);
|
|
const gls::LineParameters rightLine = gls::theilSenLinearRegression(rightData);
|
|
|
|
if (numDistinctX(leftData) < 2 || leftLine.coefficient > rightLine.coefficient*0.5f)
|
|
{
|
|
// Left data doesn't seem credible; assume the data is just a single line.
|
|
const gls::LineParameters entireLine = gls::theilSenLinearRegression(data);
|
|
return SegmentedEstimator(gls::LineParameters(entireLine.offset, 0.0f), entireLine, -std::numeric_limits<float>::infinity());
|
|
}
|
|
else
|
|
return SegmentedEstimator(leftLine, rightLine, pivotX);
|
|
}
|
|
}
|
|
|
|
OperatorPerformanceCase::OperatorPerformanceCase (tcu::TestContext& testCtx, glu::RenderContext& renderCtx, const char* name, const char* description,
|
|
CaseType caseType, int numWorkloads, const InitialCalibrationStorage& initialCalibrationStorage)
|
|
: tcu::TestCase (testCtx, tcu::NODETYPE_PERFORMANCE, name, description)
|
|
, m_renderCtx (renderCtx)
|
|
, m_caseType (caseType)
|
|
, m_numMeasurementsPerWorkload (getIterationCountOrDefault(m_testCtx.getCommandLine(), DEFAULT_NUM_MEASUREMENTS_PER_WORKLOAD))
|
|
, m_numWorkloads (numWorkloads)
|
|
, m_workloadNdx (-1)
|
|
, m_workloadMeasurementNdx (-1)
|
|
, m_state (STATE_LAST)
|
|
, m_measureProgramNdx (-1)
|
|
, m_initialCalibrationStorage (initialCalibrationStorage)
|
|
, m_viewportWidth (caseType == CASETYPE_VERTEX ? 32 : renderCtx.getRenderTarget().getWidth())
|
|
, m_viewportHeight (caseType == CASETYPE_VERTEX ? 32 : renderCtx.getRenderTarget().getHeight())
|
|
, m_gridSizeX (caseType == CASETYPE_FRAGMENT ? 1 : 100)
|
|
, m_gridSizeY (caseType == CASETYPE_FRAGMENT ? 1 : 100)
|
|
{
|
|
DE_ASSERT(m_numWorkloads > 0);
|
|
}
|
|
|
|
OperatorPerformanceCase::~OperatorPerformanceCase (void)
|
|
{
|
|
if (!m_attribBuffers.empty())
|
|
{
|
|
m_renderCtx.getFunctions().deleteBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
|
|
m_attribBuffers.clear();
|
|
}
|
|
}
|
|
|
|
static void logRenderTargetInfo (TestLog& log, const tcu::RenderTarget& renderTarget)
|
|
{
|
|
log << TestLog::Section("RenderTarget", "Render target")
|
|
<< TestLog::Message << "size: " << renderTarget.getWidth() << "x" << renderTarget.getHeight() << TestLog::EndMessage
|
|
<< TestLog::Message << "bits:"
|
|
<< " R" << renderTarget.getPixelFormat().redBits
|
|
<< " G" << renderTarget.getPixelFormat().greenBits
|
|
<< " B" << renderTarget.getPixelFormat().blueBits
|
|
<< " A" << renderTarget.getPixelFormat().alphaBits
|
|
<< " D" << renderTarget.getDepthBits()
|
|
<< " S" << renderTarget.getStencilBits()
|
|
<< TestLog::EndMessage;
|
|
|
|
if (renderTarget.getNumSamples() != 0)
|
|
log << TestLog::Message << renderTarget.getNumSamples() << "x MSAA" << TestLog::EndMessage;
|
|
else
|
|
log << TestLog::Message << "No MSAA" << TestLog::EndMessage;
|
|
|
|
log << TestLog::EndSection;
|
|
}
|
|
|
|
vector<Vec2> OperatorPerformanceCase::getWorkloadMedianDataPoints (int progNdx) const
|
|
{
|
|
const vector<WorkloadRecord>& records = m_workloadRecords[progNdx];
|
|
vector<Vec2> result;
|
|
|
|
for (int i = 0; i < (int)records.size(); i++)
|
|
result.push_back(Vec2((float)records[i].workloadSize, records[i].getMedianTime()));
|
|
|
|
return result;
|
|
}
|
|
|
|
void OperatorPerformanceCase::prepareProgram (int progNdx)
|
|
{
|
|
DE_ASSERT(progNdx < (int)m_programs.size());
|
|
DE_ASSERT(m_programData.size() == m_programs.size());
|
|
|
|
const glw::Functions& gl = m_renderCtx.getFunctions();
|
|
const ShaderProgram& program = *m_programs[progNdx];
|
|
|
|
vector<AttribSpec> attributes = m_programData[progNdx].attributes;
|
|
|
|
attributes.push_back(AttribSpec("a_position",
|
|
Vec4(-1.0f, -1.0f, 0.0f, 1.0f),
|
|
Vec4( 1.0f, -1.0f, 0.0f, 1.0f),
|
|
Vec4(-1.0f, 1.0f, 0.0f, 1.0f),
|
|
Vec4( 1.0f, 1.0f, 0.0f, 1.0f)));
|
|
|
|
DE_ASSERT(program.isOk());
|
|
|
|
// Generate vertices.
|
|
if (!m_attribBuffers.empty())
|
|
gl.deleteBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
|
|
m_attribBuffers.resize(attributes.size(), 0);
|
|
gl.genBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
|
|
GLU_EXPECT_NO_ERROR(gl.getError(), "glGenBuffers()");
|
|
|
|
for (int attribNdx = 0; attribNdx < (int)attributes.size(); attribNdx++)
|
|
{
|
|
std::vector<float> vertices;
|
|
generateVertices(vertices, m_gridSizeX, m_gridSizeY, attributes[attribNdx]);
|
|
|
|
gl.bindBuffer(GL_ARRAY_BUFFER, m_attribBuffers[attribNdx]);
|
|
gl.bufferData(GL_ARRAY_BUFFER, (glw::GLsizeiptr)(vertices.size()*sizeof(float)), &vertices[0], GL_STATIC_DRAW);
|
|
GLU_EXPECT_NO_ERROR(gl.getError(), "Upload buffer data");
|
|
}
|
|
|
|
// Setup attribute bindings.
|
|
for (int attribNdx = 0; attribNdx < (int)attributes.size(); attribNdx++)
|
|
{
|
|
int location = gl.getAttribLocation(program.getProgram(), attributes[attribNdx].name.c_str());
|
|
|
|
if (location >= 0)
|
|
{
|
|
gl.enableVertexAttribArray(location);
|
|
gl.bindBuffer(GL_ARRAY_BUFFER, m_attribBuffers[attribNdx]);
|
|
gl.vertexAttribPointer(location, 4, GL_FLOAT, GL_FALSE, 0, DE_NULL);
|
|
}
|
|
}
|
|
GLU_EXPECT_NO_ERROR(gl.getError(), "Setup vertex input state");
|
|
|
|
gl.useProgram(program.getProgram());
|
|
setGeneralUniforms(program.getProgram());
|
|
gl.viewport(0, 0, m_viewportWidth, m_viewportHeight);
|
|
}
|
|
|
|
void OperatorPerformanceCase::prepareWorkload (int progNdx, int workload)
|
|
{
|
|
setWorkloadSizeUniform(m_programs[progNdx]->getProgram(), workload);
|
|
render(m_calibrator.getCallCount());
|
|
}
|
|
|
|
void OperatorPerformanceCase::prepareNextRound (void)
|
|
{
|
|
DE_ASSERT(m_state == STATE_CALIBRATING ||
|
|
m_state == STATE_FIND_HIGH_WORKLOAD ||
|
|
m_state == STATE_MEASURING);
|
|
|
|
TestLog& log = m_testCtx.getLog();
|
|
|
|
if (m_state == STATE_CALIBRATING && m_calibrator.getState() == TheilSenCalibrator::STATE_FINISHED)
|
|
{
|
|
m_measureProgramNdx = 0;
|
|
m_state = STATE_FIND_HIGH_WORKLOAD;
|
|
}
|
|
|
|
if (m_state == STATE_CALIBRATING)
|
|
prepareWorkload(0, 1);
|
|
else if (m_state == STATE_FIND_HIGH_WORKLOAD)
|
|
{
|
|
vector<WorkloadRecord>& records = m_workloadRecordsFindHigh[m_measureProgramNdx];
|
|
|
|
if (records.empty() || records.back().getMedianTime() < 2.0f*records[0].getMedianTime())
|
|
{
|
|
int workloadSize;
|
|
|
|
if (records.empty())
|
|
workloadSize = 1;
|
|
else
|
|
{
|
|
workloadSize = records.back().workloadSize*2;
|
|
|
|
if (workloadSize > MAX_WORKLOAD_SIZE)
|
|
{
|
|
log << TestLog::Message << "Even workload size " << records.back().workloadSize
|
|
<< " doesn't give high enough frame time for program " << m_measureProgramNdx
|
|
<< ". Can't get sensible result." << TestLog::EndMessage;
|
|
MEASUREMENT_FAIL();
|
|
}
|
|
}
|
|
|
|
records.push_back(WorkloadRecord(workloadSize));
|
|
prepareWorkload(0, workloadSize);
|
|
m_workloadMeasurementNdx = 0;
|
|
}
|
|
else
|
|
{
|
|
m_highWorkloadSizes[m_measureProgramNdx] = records.back().workloadSize;
|
|
m_measureProgramNdx++;
|
|
|
|
if (m_measureProgramNdx >= (int)m_programs.size())
|
|
{
|
|
m_state = STATE_MEASURING;
|
|
m_workloadNdx = -1;
|
|
m_measureProgramNdx = 0;
|
|
}
|
|
|
|
prepareProgram(m_measureProgramNdx);
|
|
prepareNextRound();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
m_workloadNdx++;
|
|
|
|
if (m_workloadNdx < m_numWorkloads)
|
|
{
|
|
DE_ASSERT(m_numWorkloads > 1);
|
|
const int highWorkload = m_highWorkloadSizes[m_measureProgramNdx];
|
|
const int workload = highWorkload > m_numWorkloads ?
|
|
1 + m_workloadNdx*(highWorkload-1)/(m_numWorkloads-1) :
|
|
1 + m_workloadNdx;
|
|
|
|
prepareWorkload(m_measureProgramNdx, workload);
|
|
|
|
m_workloadMeasurementNdx = 0;
|
|
|
|
m_workloadRecords[m_measureProgramNdx].push_back(WorkloadRecord(workload));
|
|
}
|
|
else
|
|
{
|
|
m_measureProgramNdx++;
|
|
|
|
if (m_measureProgramNdx < (int)m_programs.size())
|
|
{
|
|
m_workloadNdx = -1;
|
|
m_workloadMeasurementNdx = 0;
|
|
prepareProgram(m_measureProgramNdx);
|
|
prepareNextRound();
|
|
}
|
|
else
|
|
m_state = STATE_REPORTING;
|
|
}
|
|
}
|
|
}
|
|
|
|
void OperatorPerformanceCase::init (void)
|
|
{
|
|
TestLog& log = m_testCtx.getLog();
|
|
const glw::Functions& gl = m_renderCtx.getFunctions();
|
|
|
|
// Validate that we have sane grid and viewport setup.
|
|
DE_ASSERT(de::inBounds(m_gridSizeX, 1, 256) && de::inBounds(m_gridSizeY, 1, 256));
|
|
TCU_CHECK(de::inRange(m_viewportWidth, 1, m_renderCtx.getRenderTarget().getWidth()) &&
|
|
de::inRange(m_viewportHeight, 1, m_renderCtx.getRenderTarget().getHeight()));
|
|
|
|
logRenderTargetInfo(log, m_renderCtx.getRenderTarget());
|
|
|
|
log << TestLog::Message << "Using additive blending." << TestLog::EndMessage;
|
|
gl.enable(GL_BLEND);
|
|
gl.blendEquation(GL_FUNC_ADD);
|
|
gl.blendFunc(GL_ONE, GL_ONE);
|
|
|
|
// Generate programs.
|
|
DE_ASSERT(m_programs.empty());
|
|
m_programData = generateProgramData();
|
|
DE_ASSERT(!m_programData.empty());
|
|
|
|
for (int progNdx = 0; progNdx < (int)m_programData.size(); progNdx++)
|
|
{
|
|
const string& vert = m_programData[progNdx].vertShaderSource;
|
|
const string& frag = m_programData[progNdx].fragShaderSource;
|
|
|
|
m_programs.push_back(SharedPtr<ShaderProgram>(new ShaderProgram(m_renderCtx, glu::makeVtxFragSources(vert, frag))));
|
|
|
|
if (!m_programs.back()->isOk())
|
|
{
|
|
log << *m_programs.back();
|
|
TCU_FAIL("Compile failed");
|
|
}
|
|
}
|
|
|
|
// Log all programs.
|
|
for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++)
|
|
log << TestLog::Section("Program" + de::toString(progNdx), "Program " + de::toString(progNdx))
|
|
<< TestLog::Message << m_programData[progNdx].description << TestLog::EndMessage
|
|
<< *m_programs[progNdx]
|
|
<< TestLog::EndSection;
|
|
|
|
m_highWorkloadSizes.resize(m_programData.size());
|
|
m_workloadRecordsFindHigh.resize(m_programData.size());
|
|
m_workloadRecords.resize(m_programData.size());
|
|
|
|
m_calibrator.clear(CalibratorParameters(m_initialCalibrationStorage->initialNumCalls, 10 /* calibrate iteration frames */, 2000.0f /* calibrate iteration shortcut threshold (ms) */, 16 /* max calibrate iterations */,
|
|
1000.0f/30.0f /* frame time (ms) */, 1000.0f/60.0f /* frame time cap (ms) */, 1000.0f /* target measure duration (ms) */));
|
|
m_state = STATE_CALIBRATING;
|
|
|
|
prepareProgram(0);
|
|
prepareNextRound();
|
|
}
|
|
|
|
void OperatorPerformanceCase::deinit (void)
|
|
{
|
|
if (!m_attribBuffers.empty())
|
|
{
|
|
m_renderCtx.getFunctions().deleteBuffers((glw::GLsizei)m_attribBuffers.size(), &m_attribBuffers[0]);
|
|
m_attribBuffers.clear();
|
|
}
|
|
|
|
m_programs.clear();
|
|
}
|
|
|
|
void OperatorPerformanceCase::render (int numDrawCalls)
|
|
{
|
|
const glw::Functions& gl = m_renderCtx.getFunctions();
|
|
const int numVertices = getNumVertices(m_gridSizeX, m_gridSizeY);
|
|
|
|
for (int callNdx = 0; callNdx < numDrawCalls; callNdx++)
|
|
gl.drawArrays(GL_TRIANGLES, 0, numVertices);
|
|
|
|
glu::readPixels(m_renderCtx, 0, 0, tcu::Surface(1, 1).getAccess()); // \note Serves as a more reliable replacement for glFinish().
|
|
}
|
|
|
|
deUint64 OperatorPerformanceCase::renderAndMeasure (int numDrawCalls)
|
|
{
|
|
const deUint64 startTime = deGetMicroseconds();
|
|
render(numDrawCalls);
|
|
return deGetMicroseconds() - startTime;
|
|
}
|
|
|
|
void OperatorPerformanceCase::adjustAndLogGridAndViewport (void)
|
|
{
|
|
TestLog& log = m_testCtx.getLog();
|
|
|
|
// If call count is just 1, and the target frame time still wasn't reached, reduce grid or viewport size.
|
|
if (m_calibrator.getCallCount() == 1)
|
|
{
|
|
const gls::MeasureState& calibratorMeasure = m_calibrator.getMeasureState();
|
|
const float drawCallTime = (float)calibratorMeasure.getTotalTime() / (float)calibratorMeasure.frameTimes.size();
|
|
const float targetDrawCallTime = m_calibrator.getParameters().targetFrameTimeUs;
|
|
const float targetRatio = targetDrawCallTime / drawCallTime;
|
|
|
|
if (targetRatio < 0.95f)
|
|
{
|
|
// Reduce grid or viewport size assuming draw call time scales proportionally.
|
|
if (m_caseType == CASETYPE_VERTEX)
|
|
{
|
|
const float targetRatioSqrt = deFloatSqrt(targetRatio);
|
|
m_gridSizeX = (int)(targetRatioSqrt * (float)m_gridSizeX);
|
|
m_gridSizeY = (int)(targetRatioSqrt * (float)m_gridSizeY);
|
|
TCU_CHECK_MSG(m_gridSizeX >= 1 && m_gridSizeY >= 1, "Can't decrease grid size enough to achieve low-enough draw times");
|
|
log << TestLog::Message << "Note: triangle grid size reduced from original; it's now smaller than during calibration." << TestLog::EndMessage;
|
|
}
|
|
else
|
|
{
|
|
const float targetRatioSqrt = deFloatSqrt(targetRatio);
|
|
m_viewportWidth = (int)(targetRatioSqrt * (float)m_viewportWidth);
|
|
m_viewportHeight = (int)(targetRatioSqrt * (float)m_viewportHeight);
|
|
TCU_CHECK_MSG(m_viewportWidth >= 1 && m_viewportHeight >= 1, "Can't decrease viewport size enough to achieve low-enough draw times");
|
|
log << TestLog::Message << "Note: viewport size reduced from original; it's now smaller than during calibration." << TestLog::EndMessage;
|
|
}
|
|
}
|
|
}
|
|
|
|
prepareProgram(0);
|
|
|
|
// Log grid and viewport sizes.
|
|
log << TestLog::Message << "Grid size: " << m_gridSizeX << "x" << m_gridSizeY << TestLog::EndMessage;
|
|
log << TestLog::Message << "Viewport: " << m_viewportWidth << "x" << m_viewportHeight << TestLog::EndMessage;
|
|
}
|
|
|
|
OperatorPerformanceCase::IterateResult OperatorPerformanceCase::iterate (void)
|
|
{
|
|
const TheilSenCalibrator::State calibratorState = m_calibrator.getState();
|
|
|
|
if (calibratorState != TheilSenCalibrator::STATE_FINISHED)
|
|
{
|
|
if (calibratorState == TheilSenCalibrator::STATE_RECOMPUTE_PARAMS)
|
|
m_calibrator.recomputeParameters();
|
|
else if (calibratorState == TheilSenCalibrator::STATE_MEASURE)
|
|
m_calibrator.recordIteration(renderAndMeasure(m_calibrator.getCallCount()));
|
|
else
|
|
DE_ASSERT(false);
|
|
|
|
if (m_calibrator.getState() == TheilSenCalibrator::STATE_FINISHED)
|
|
{
|
|
logCalibrationInfo(m_testCtx.getLog(), m_calibrator);
|
|
adjustAndLogGridAndViewport();
|
|
prepareNextRound();
|
|
m_initialCalibrationStorage->initialNumCalls = m_calibrator.getCallCount();
|
|
}
|
|
}
|
|
else if (m_state == STATE_FIND_HIGH_WORKLOAD || m_state == STATE_MEASURING)
|
|
{
|
|
if (m_workloadMeasurementNdx < m_numMeasurementsPerWorkload)
|
|
{
|
|
vector<WorkloadRecord>& records = m_state == STATE_FIND_HIGH_WORKLOAD ? m_workloadRecordsFindHigh[m_measureProgramNdx] : m_workloadRecords[m_measureProgramNdx];
|
|
records.back().addFrameTime((float)renderAndMeasure(m_calibrator.getCallCount()));
|
|
m_workloadMeasurementNdx++;
|
|
}
|
|
else
|
|
prepareNextRound();
|
|
}
|
|
else
|
|
{
|
|
DE_ASSERT(m_state == STATE_REPORTING);
|
|
|
|
TestLog& log = m_testCtx.getLog();
|
|
const int drawCallCount = m_calibrator.getCallCount();
|
|
|
|
{
|
|
// Compute per-program estimators for measurements.
|
|
vector<SegmentedEstimator> estimators;
|
|
for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++)
|
|
estimators.push_back(computeSegmentedEstimator(getWorkloadMedianDataPoints(progNdx)));
|
|
|
|
// Log measurements and their estimators for all programs.
|
|
for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++)
|
|
{
|
|
const SegmentedEstimator& estimator = estimators[progNdx];
|
|
const string progNdxStr = de::toString(progNdx);
|
|
vector<WorkloadRecord> records = m_workloadRecords[progNdx];
|
|
std::sort(records.begin(), records.end());
|
|
|
|
{
|
|
const tcu::ScopedLogSection section(log,
|
|
"Program" + progNdxStr + "Measurements",
|
|
"Measurements for program " + progNdxStr);
|
|
|
|
// Sample list of individual frame times.
|
|
|
|
log << TestLog::SampleList("Program" + progNdxStr + "IndividualFrameTimes", "Individual frame times")
|
|
<< TestLog::SampleInfo << TestLog::ValueInfo("Workload", "Workload", "", QP_SAMPLE_VALUE_TAG_PREDICTOR)
|
|
<< TestLog::ValueInfo("FrameTime", "Frame time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
|
|
<< TestLog::EndSampleInfo;
|
|
|
|
for (int i = 0; i < (int)records.size(); i++)
|
|
for (int j = 0; j < (int)records[i].frameTimes.size(); j++)
|
|
log << TestLog::Sample << records[i].workloadSize << records[i].frameTimes[j] << TestLog::EndSample;
|
|
|
|
log << TestLog::EndSampleList;
|
|
|
|
// Sample list of median frame times.
|
|
|
|
log << TestLog::SampleList("Program" + progNdxStr + "MedianFrameTimes", "Median frame times")
|
|
<< TestLog::SampleInfo << TestLog::ValueInfo("Workload", "Workload", "", QP_SAMPLE_VALUE_TAG_PREDICTOR)
|
|
<< TestLog::ValueInfo("MedianFrameTime", "Median frame time", "us", QP_SAMPLE_VALUE_TAG_RESPONSE)
|
|
<< TestLog::EndSampleInfo;
|
|
|
|
for (int i = 0; i < (int)records.size(); i++)
|
|
log << TestLog::Sample << records[i].workloadSize << records[i].getMedianTime() << TestLog::EndSample;
|
|
|
|
log << TestLog::EndSampleList;
|
|
|
|
log << TestLog::Float("Program" + progNdxStr + "WorkloadCostEstimate", "Workload cost estimate", "us / workload", QP_KEY_TAG_TIME, estimator.right.coefficient);
|
|
|
|
if (estimator.pivotX > -std::numeric_limits<float>::infinity())
|
|
log << TestLog::Message << "Note: the data points with x coordinate greater than or equal to " << estimator.pivotX
|
|
<< " seem to form a rising line, and the rest of data points seem to form a near-horizontal line" << TestLog::EndMessage
|
|
<< TestLog::Message << "Note: the left line is estimated to be " << lineParamsString(estimator.left)
|
|
<< " and the right line " << lineParamsString(estimator.right) << TestLog::EndMessage;
|
|
else
|
|
log << TestLog::Message << "Note: the data seem to form a single line: " << lineParamsString(estimator.right) << TestLog::EndMessage;
|
|
}
|
|
}
|
|
|
|
for (int progNdx = 0; progNdx < (int)m_programs.size(); progNdx++)
|
|
{
|
|
if (estimators[progNdx].right.coefficient <= 0.0f)
|
|
{
|
|
log << TestLog::Message << "Slope of measurements for program " << progNdx << " isn't positive. Can't get sensible result." << TestLog::EndMessage;
|
|
MEASUREMENT_FAIL();
|
|
}
|
|
}
|
|
|
|
// \note For each estimator, .right.coefficient is the increase in draw time (in microseconds) when
|
|
// incrementing shader workload size by 1, when D draw calls are done, with a vertex/fragment count
|
|
// of R.
|
|
//
|
|
// The measurements of any single program can't tell us the final result (time of single operation),
|
|
// so we use computeSingleOperationTime to compute it from multiple programs' measurements in a
|
|
// subclass-defined manner.
|
|
//
|
|
// After that, microseconds per operation can be calculated as singleOperationTime / (D * R).
|
|
|
|
{
|
|
vector<float> perProgramSlopes;
|
|
for (int i = 0; i < (int)m_programs.size(); i++)
|
|
perProgramSlopes.push_back(estimators[i].right.coefficient);
|
|
|
|
logSingleOperationCalculationInfo();
|
|
|
|
const float maxSlope = *std::max_element(perProgramSlopes.begin(), perProgramSlopes.end());
|
|
const float usecsPerFramePerOp = computeSingleOperationTime(perProgramSlopes);
|
|
const int vertexOrFragmentCount = m_caseType == CASETYPE_VERTEX ?
|
|
getNumVertices(m_gridSizeX, m_gridSizeY) :
|
|
m_viewportWidth*m_viewportHeight;
|
|
const double usecsPerDrawCallPerOp = usecsPerFramePerOp / (double)drawCallCount;
|
|
const double usecsPerSingleOp = usecsPerDrawCallPerOp / (double)vertexOrFragmentCount;
|
|
const double megaOpsPerSecond = (double)(drawCallCount*vertexOrFragmentCount) / usecsPerFramePerOp;
|
|
const int numFreeOps = de::max(0, (int)deFloatFloor(intersectionX(estimators[0].left,
|
|
LineParameters(estimators[0].right.offset,
|
|
usecsPerFramePerOp))));
|
|
|
|
log << TestLog::Integer("VertexOrFragmentCount",
|
|
"R = " + string(m_caseType == CASETYPE_VERTEX ? "Vertex" : "Fragment") + " count",
|
|
"", QP_KEY_TAG_NONE, vertexOrFragmentCount)
|
|
|
|
<< TestLog::Integer("DrawCallsPerFrame", "D = Draw calls per frame", "", QP_KEY_TAG_NONE, drawCallCount)
|
|
|
|
<< TestLog::Integer("VerticesOrFragmentsPerFrame",
|
|
"R*D = " + string(m_caseType == CASETYPE_VERTEX ? "Vertices" : "Fragments") + " per frame",
|
|
"", QP_KEY_TAG_NONE, vertexOrFragmentCount*drawCallCount)
|
|
|
|
<< TestLog::Float("TimePerFramePerOp",
|
|
"Estimated cost of R*D " + string(m_caseType == CASETYPE_VERTEX ? "vertices" : "fragments")
|
|
+ " (i.e. one frame) with one shader operation",
|
|
"us", QP_KEY_TAG_TIME, (float)usecsPerFramePerOp)
|
|
|
|
<< TestLog::Float("TimePerDrawcallPerOp",
|
|
"Estimated cost of one draw call with one shader operation",
|
|
"us", QP_KEY_TAG_TIME, (float)usecsPerDrawCallPerOp)
|
|
|
|
<< TestLog::Float("TimePerSingleOp",
|
|
"Estimated cost of a single shader operation",
|
|
"us", QP_KEY_TAG_TIME, (float)usecsPerSingleOp);
|
|
|
|
// \note Sometimes, when the operation is free or very cheap, it can happen that the shader with the operation runs,
|
|
// for some reason, a bit faster than the shader without the operation, and thus we get a negative result. The
|
|
// following threshold values for accepting a negative or almost-zero result are rather quick and dirty.
|
|
if (usecsPerFramePerOp <= -0.1f*maxSlope)
|
|
{
|
|
log << TestLog::Message << "Got strongly negative result." << TestLog::EndMessage;
|
|
MEASUREMENT_FAIL();
|
|
}
|
|
else if (usecsPerFramePerOp <= 0.001*maxSlope)
|
|
{
|
|
log << TestLog::Message << "Cost of operation seems to be approximately zero." << TestLog::EndMessage;
|
|
m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
|
|
}
|
|
else
|
|
{
|
|
log << TestLog::Float("OpsPerSecond",
|
|
"Operations per second",
|
|
"Million/s", QP_KEY_TAG_PERFORMANCE, (float)megaOpsPerSecond)
|
|
|
|
<< TestLog::Integer("NumFreeOps",
|
|
"Estimated number of \"free\" operations",
|
|
"", QP_KEY_TAG_PERFORMANCE, numFreeOps);
|
|
|
|
m_testCtx.setTestResult(QP_TEST_RESULT_PASS, de::floatToString((float)megaOpsPerSecond, 2).c_str());
|
|
}
|
|
|
|
m_state = STATE_FINISHED;
|
|
}
|
|
}
|
|
|
|
return STOP;
|
|
}
|
|
|
|
return CONTINUE;
|
|
}
|
|
|
|
// Binary operator case.
|
|
class BinaryOpCase : public OperatorPerformanceCase
|
|
{
|
|
public:
|
|
BinaryOpCase (Context& context, const char* name, const char* description, const char* op,
|
|
glu::DataType type, glu::Precision precision, bool useSwizzle, bool isVertex, const InitialCalibrationStorage& initialCalibration);
|
|
|
|
protected:
|
|
vector<ProgramContext> generateProgramData (void) const;
|
|
void setGeneralUniforms (deUint32 program) const;
|
|
void setWorkloadSizeUniform (deUint32 program, int numOperations) const;
|
|
float computeSingleOperationTime (const vector<float>& perProgramOperationCosts) const;
|
|
void logSingleOperationCalculationInfo (void) const;
|
|
|
|
private:
|
|
enum ProgramID
|
|
{
|
|
// \note 0-based sequential numbering is relevant, because these are also used as vector indices.
|
|
// \note The first program should be the heaviest, because OperatorPerformanceCase uses it to reduce grid/viewport size when going too slow.
|
|
PROGRAM_WITH_BIGGER_LOOP = 0,
|
|
PROGRAM_WITH_SMALLER_LOOP,
|
|
|
|
PROGRAM_LAST
|
|
};
|
|
|
|
ProgramContext generateSingleProgramData (ProgramID) const;
|
|
|
|
const string m_op;
|
|
const glu::DataType m_type;
|
|
const glu::Precision m_precision;
|
|
const bool m_useSwizzle;
|
|
};
|
|
|
|
BinaryOpCase::BinaryOpCase (Context& context, const char* name, const char* description, const char* op,
|
|
glu::DataType type, glu::Precision precision, bool useSwizzle, bool isVertex, const InitialCalibrationStorage& initialCalibration)
|
|
: OperatorPerformanceCase (context.getTestContext(), context.getRenderContext(), name, description,
|
|
isVertex ? CASETYPE_VERTEX : CASETYPE_FRAGMENT, NUM_WORKLOADS, initialCalibration)
|
|
, m_op (op)
|
|
, m_type (type)
|
|
, m_precision (precision)
|
|
, m_useSwizzle (useSwizzle)
|
|
{
|
|
}
|
|
|
|
BinaryOpCase::ProgramContext BinaryOpCase::generateSingleProgramData (ProgramID programID) const
|
|
{
|
|
DE_ASSERT(glu::isDataTypeFloatOrVec(m_type) || glu::isDataTypeIntOrIVec(m_type));
|
|
|
|
const bool isVertexCase = m_caseType == CASETYPE_VERTEX;
|
|
const char* const precision = glu::getPrecisionName(m_precision);
|
|
const char* const inputPrecision = glu::isDataTypeIntOrIVec(m_type) && m_precision == glu::PRECISION_LOWP ? "mediump" : precision;
|
|
const char* const typeName = getDataTypeName(m_type);
|
|
|
|
std::ostringstream vtx;
|
|
std::ostringstream frag;
|
|
std::ostringstream& op = isVertexCase ? vtx : frag;
|
|
|
|
// Attributes.
|
|
vtx << "attribute highp vec4 a_position;\n";
|
|
for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++)
|
|
vtx << "attribute " << inputPrecision << " vec4 a_in" << i << ";\n";
|
|
|
|
if (isVertexCase)
|
|
{
|
|
vtx << "varying mediump vec4 v_color;\n";
|
|
frag << "varying mediump vec4 v_color;\n";
|
|
}
|
|
else
|
|
{
|
|
for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++)
|
|
{
|
|
vtx << "varying " << inputPrecision << " vec4 v_in" << i << ";\n";
|
|
frag << "varying " << inputPrecision << " vec4 v_in" << i << ";\n";
|
|
}
|
|
}
|
|
|
|
op << "uniform mediump int u_numLoopIterations;\n";
|
|
if (isVertexCase)
|
|
op << "uniform mediump float u_zero;\n";
|
|
|
|
vtx << "\n";
|
|
vtx << "void main()\n";
|
|
vtx << "{\n";
|
|
|
|
if (!isVertexCase)
|
|
vtx << "\tgl_Position = a_position;\n";
|
|
|
|
frag << "\n";
|
|
frag << "void main()\n";
|
|
frag << "{\n";
|
|
|
|
// Expression inputs.
|
|
const char* const prefix = isVertexCase ? "a_" : "v_";
|
|
for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++)
|
|
{
|
|
const int inSize = getDataTypeScalarSize(m_type);
|
|
const bool isInt = de::inRange<int>(m_type, TYPE_INT, TYPE_INT_VEC4);
|
|
const bool cast = isInt || (!m_useSwizzle && m_type != TYPE_FLOAT_VEC4);
|
|
|
|
op << "\t" << precision << " " << typeName << " in" << i << " = ";
|
|
|
|
if (cast)
|
|
op << typeName << "(";
|
|
|
|
op << prefix << "in" << i;
|
|
|
|
if (m_useSwizzle)
|
|
op << "." << s_swizzles[i % DE_LENGTH_OF_ARRAY(s_swizzles)][inSize-1];
|
|
|
|
if (cast)
|
|
op << ")";
|
|
|
|
op << ";\n";
|
|
}
|
|
|
|
// Operation accumulation variables.
|
|
for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
|
|
{
|
|
op << "\t" << precision << " " << typeName << " acc" << i << "a" << " = in" << i+0 << ";\n";
|
|
op << "\t" << precision << " " << typeName << " acc" << i << "b" << " = in" << i+1 << ";\n";
|
|
}
|
|
|
|
// Loop, with expressions in it.
|
|
op << "\tfor (int i = 0; i < u_numLoopIterations; i++)\n";
|
|
op << "\t{\n";
|
|
{
|
|
const int unrollAmount = programID == PROGRAM_WITH_SMALLER_LOOP ? BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT : BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT;
|
|
for (int unrollNdx = 0; unrollNdx < unrollAmount; unrollNdx++)
|
|
{
|
|
for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
|
|
{
|
|
if (i > 0 || unrollNdx > 0)
|
|
op << "\n";
|
|
op << "\t\tacc" << i << "a = acc" << i << "b " << m_op << " acc" << i << "a" << ";\n";
|
|
op << "\t\tacc" << i << "b = acc" << i << "a " << m_op << " acc" << i << "b" << ";\n";
|
|
}
|
|
}
|
|
}
|
|
op << "\t}\n";
|
|
op << "\n";
|
|
|
|
// Result variable (sum of accumulation variables).
|
|
op << "\t" << precision << " " << typeName << " res =";
|
|
for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
|
|
op << (i > 0 ? " "+m_op : "") << " acc" << i << "b";
|
|
op << ";\n";
|
|
|
|
// Convert to color.
|
|
op << "\tmediump vec4 color = ";
|
|
if (m_type == TYPE_FLOAT_VEC4)
|
|
op << "res";
|
|
else
|
|
{
|
|
int size = getDataTypeScalarSize(m_type);
|
|
op << "vec4(res";
|
|
|
|
for (int i = size; i < 4; i++)
|
|
op << ", " << (i == 3 ? "1.0" : "0.0");
|
|
|
|
op << ")";
|
|
}
|
|
op << ";\n";
|
|
op << "\t" << (isVertexCase ? "v_color" : "gl_FragColor") << " = color;\n";
|
|
|
|
if (isVertexCase)
|
|
{
|
|
vtx << " gl_Position = a_position + u_zero*color;\n";
|
|
frag << " gl_FragColor = v_color;\n";
|
|
}
|
|
else
|
|
{
|
|
for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++)
|
|
vtx << " v_in" << i << " = a_in" << i << ";\n";
|
|
}
|
|
|
|
vtx << "}\n";
|
|
frag << "}\n";
|
|
|
|
{
|
|
vector<AttribSpec> attributes;
|
|
for (int i = 0; i < BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS+1; i++)
|
|
attributes.push_back(AttribSpec(("a_in" + de::toString(i)).c_str(),
|
|
Vec4(2.0f, 2.0f, 2.0f, 1.0f).swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4),
|
|
Vec4(1.0f, 2.0f, 1.0f, 2.0f).swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4),
|
|
Vec4(2.0f, 1.0f, 2.0f, 2.0f).swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4),
|
|
Vec4(1.0f, 1.0f, 2.0f, 1.0f).swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4)));
|
|
|
|
{
|
|
string description = "This is the program with the ";
|
|
|
|
description += programID == PROGRAM_WITH_SMALLER_LOOP ? "smaller"
|
|
: programID == PROGRAM_WITH_BIGGER_LOOP ? "bigger"
|
|
: DE_NULL;
|
|
|
|
description += " loop.\n"
|
|
"Note: workload size for this program means the number of loop iterations.";
|
|
|
|
return ProgramContext(vtx.str(), frag.str(), attributes, description);
|
|
}
|
|
}
|
|
}
|
|
|
|
vector<BinaryOpCase::ProgramContext> BinaryOpCase::generateProgramData (void) const
|
|
{
|
|
vector<ProgramContext> progData;
|
|
for (int i = 0; i < PROGRAM_LAST; i++)
|
|
progData.push_back(generateSingleProgramData((ProgramID)i));
|
|
return progData;
|
|
}
|
|
|
|
void BinaryOpCase::setGeneralUniforms (deUint32 program) const
|
|
{
|
|
const glw::Functions& gl = m_renderCtx.getFunctions();
|
|
gl.uniform1f(gl.getUniformLocation(program, "u_zero"), 0.0f);
|
|
}
|
|
|
|
void BinaryOpCase::setWorkloadSizeUniform (deUint32 program, int numLoopIterations) const
|
|
{
|
|
const glw::Functions& gl = m_renderCtx.getFunctions();
|
|
gl.uniform1i(gl.getUniformLocation(program, "u_numLoopIterations"), numLoopIterations);
|
|
}
|
|
|
|
float BinaryOpCase::computeSingleOperationTime (const vector<float>& perProgramOperationCosts) const
|
|
{
|
|
DE_ASSERT(perProgramOperationCosts.size() == PROGRAM_LAST);
|
|
|
|
const int baseNumOpsInsideLoop = 2 * BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS;
|
|
const int numOpsInsideLoopInSmallProgram = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT;
|
|
const int numOpsInsideLoopInBigProgram = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT;
|
|
DE_STATIC_ASSERT(numOpsInsideLoopInBigProgram > numOpsInsideLoopInSmallProgram);
|
|
const int opDiff = numOpsInsideLoopInBigProgram - numOpsInsideLoopInSmallProgram;
|
|
const float programOperationCostDiff = perProgramOperationCosts[PROGRAM_WITH_BIGGER_LOOP] - perProgramOperationCosts[PROGRAM_WITH_SMALLER_LOOP];
|
|
|
|
return programOperationCostDiff / (float)opDiff;
|
|
}
|
|
|
|
void BinaryOpCase::logSingleOperationCalculationInfo (void) const
|
|
{
|
|
const int baseNumOpsInsideLoop = 2 * BINARY_OPERATOR_CASE_NUM_INDEPENDENT_CALCULATIONS;
|
|
const int numOpsInsideLoopInSmallProgram = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_SMALL_PROGRAM_UNROLL_AMOUNT;
|
|
const int numOpsInsideLoopInBigProgram = baseNumOpsInsideLoop * BINARY_OPERATOR_CASE_BIG_PROGRAM_UNROLL_AMOUNT;
|
|
const int opDiff = numOpsInsideLoopInBigProgram - numOpsInsideLoopInSmallProgram;
|
|
const char* const opName = m_op == "+" ? "addition"
|
|
: m_op == "-" ? "subtraction"
|
|
: m_op == "*" ? "multiplication"
|
|
: m_op == "/" ? "division"
|
|
: DE_NULL;
|
|
DE_ASSERT(opName != DE_NULL);
|
|
|
|
m_testCtx.getLog() << TestLog::Message << "Note: the bigger program contains " << opDiff << " more "
|
|
<< opName << " operations in one loop iteration than the small program; "
|
|
<< "cost of one operation is calculated as (cost_of_bigger_workload - cost_of_smaller_workload) / " << opDiff
|
|
<< TestLog::EndMessage;
|
|
}
|
|
|
|
// Built-in function case.
|
|
class FunctionCase : public OperatorPerformanceCase
|
|
{
|
|
public:
|
|
enum
|
|
{
|
|
MAX_PARAMS = 3
|
|
};
|
|
|
|
FunctionCase (Context& context,
|
|
const char* name,
|
|
const char* description,
|
|
const char* func,
|
|
glu::DataType returnType,
|
|
const glu::DataType paramTypes[MAX_PARAMS],
|
|
const Vec4& attribute,
|
|
int modifyParamNdx, //!< Add a compile-time constant (2.0) to the parameter at this index. This is ignored if negative.
|
|
bool useNearlyConstantINputs, //!< Function inputs shouldn't be much bigger than 'attribute'.
|
|
glu::Precision precision,
|
|
bool isVertex,
|
|
const InitialCalibrationStorage& initialCalibration);
|
|
|
|
protected:
|
|
vector<ProgramContext> generateProgramData (void) const;
|
|
void setGeneralUniforms (deUint32 program) const;
|
|
void setWorkloadSizeUniform (deUint32 program, int numOperations) const;
|
|
float computeSingleOperationTime (const vector<float>& perProgramOperationCosts) const;
|
|
void logSingleOperationCalculationInfo (void) const;
|
|
|
|
private:
|
|
enum ProgramID
|
|
{
|
|
// \note 0-based sequential numbering is relevant, because these are also used as vector indices.
|
|
// \note The first program should be the heaviest, because OperatorPerformanceCase uses it to reduce grid/viewport size when going too slow.
|
|
PROGRAM_WITH_FUNCTION_CALLS = 0,
|
|
PROGRAM_WITHOUT_FUNCTION_CALLS,
|
|
|
|
PROGRAM_LAST
|
|
};
|
|
|
|
//! Forms a "sum" expression from aExpr and bExpr; for booleans, this is "equal(a,b)", otherwise actual sum.
|
|
static string sumExpr (const string& aExpr, const string& bExpr, glu::DataType type);
|
|
//! Forms an expression used to increment an input value in the shader. If type is boolean, this is just
|
|
//! baseExpr; otherwise, baseExpr is modified by multiplication or division by a loop index,
|
|
//! to prevent simple compiler optimizations. See m_useNearlyConstantInputs for more explanation.
|
|
static string incrementExpr (const string& baseExpr, glu::DataType type, bool divide);
|
|
|
|
ProgramContext generateSingleProgramData (ProgramID) const;
|
|
|
|
const string m_func;
|
|
const glu::DataType m_returnType;
|
|
glu::DataType m_paramTypes[MAX_PARAMS];
|
|
// \note m_modifyParamNdx, if not negative, specifies the index of the parameter to which a
|
|
// compile-time constant (2.0) is added. This is a quick and dirty way to deal with
|
|
// functions like clamp or smoothstep that require that a certain parameter is
|
|
// greater than a certain other parameter.
|
|
const int m_modifyParamNdx;
|
|
// \note m_useNearlyConstantInputs determines whether the inputs given to the function
|
|
// should increase (w.r.t m_attribute) only by very small amounts. This is relevant
|
|
// for functions like asin, which requires its inputs to be in a specific range.
|
|
// In practice, this affects whether expressions used to increment the input
|
|
// variables use division instead of multiplication; normally, multiplication is used,
|
|
// but it's hard to keep the increments very small that way, and division shouldn't
|
|
// be the default, since for many functions (probably not asin, luckily), division
|
|
// is too heavy and dominates time-wise.
|
|
const bool m_useNearlyConstantInputs;
|
|
const Vec4 m_attribute;
|
|
const glu::Precision m_precision;
|
|
};
|
|
|
|
FunctionCase::FunctionCase (Context& context,
|
|
const char* name,
|
|
const char* description,
|
|
const char* func,
|
|
glu::DataType returnType,
|
|
const glu::DataType paramTypes[MAX_PARAMS],
|
|
const Vec4& attribute,
|
|
int modifyParamNdx,
|
|
bool useNearlyConstantInputs,
|
|
glu::Precision precision,
|
|
bool isVertex,
|
|
const InitialCalibrationStorage& initialCalibration)
|
|
: OperatorPerformanceCase (context.getTestContext(), context.getRenderContext(), name, description,
|
|
isVertex ? CASETYPE_VERTEX : CASETYPE_FRAGMENT, NUM_WORKLOADS, initialCalibration)
|
|
, m_func (func)
|
|
, m_returnType (returnType)
|
|
, m_modifyParamNdx (modifyParamNdx)
|
|
, m_useNearlyConstantInputs (useNearlyConstantInputs)
|
|
, m_attribute (attribute)
|
|
, m_precision (precision)
|
|
{
|
|
for (int i = 0; i < MAX_PARAMS; i++)
|
|
m_paramTypes[i] = paramTypes[i];
|
|
}
|
|
|
|
string FunctionCase::sumExpr (const string& aExpr, const string& bExpr, glu::DataType type)
|
|
{
|
|
if (glu::isDataTypeBoolOrBVec(type))
|
|
{
|
|
if (type == glu::TYPE_BOOL)
|
|
return "(" + aExpr + " == " + bExpr + ")";
|
|
else
|
|
return "equal(" + aExpr + ", " + bExpr + ")";
|
|
}
|
|
else
|
|
return "(" + aExpr + " + " + bExpr + ")";
|
|
}
|
|
|
|
string FunctionCase::incrementExpr (const string& baseExpr, glu::DataType type, bool divide)
|
|
{
|
|
const string mulOrDiv = divide ? "/" : "*";
|
|
|
|
return glu::isDataTypeBoolOrBVec(type) ? baseExpr
|
|
: glu::isDataTypeIntOrIVec(type) ? "(" + baseExpr + mulOrDiv + "(i+1))"
|
|
: "(" + baseExpr + mulOrDiv + "float(i+1))";
|
|
}
|
|
|
|
FunctionCase::ProgramContext FunctionCase::generateSingleProgramData (ProgramID programID) const
|
|
{
|
|
const bool isVertexCase = m_caseType == CASETYPE_VERTEX;
|
|
const char* const precision = glu::getPrecisionName(m_precision);
|
|
const char* const returnTypeName = getDataTypeName(m_returnType);
|
|
const string returnPrecisionMaybe = glu::isDataTypeBoolOrBVec(m_returnType) ? "" : string() + precision + " ";
|
|
const char* inputPrecision = DE_NULL;
|
|
const bool isMatrixReturn = isDataTypeMatrix(m_returnType);
|
|
int numParams = 0;
|
|
const char* paramTypeNames[MAX_PARAMS];
|
|
string paramPrecisionsMaybe[MAX_PARAMS];
|
|
|
|
for (int i = 0; i < MAX_PARAMS; i++)
|
|
{
|
|
paramTypeNames[i] = getDataTypeName(m_paramTypes[i]);
|
|
paramPrecisionsMaybe[i] = glu::isDataTypeBoolOrBVec(m_paramTypes[i]) ? "" : string() + precision + " ";
|
|
|
|
if (inputPrecision == DE_NULL && isDataTypeIntOrIVec(m_paramTypes[i]) && m_precision == glu::PRECISION_LOWP)
|
|
inputPrecision = "mediump";
|
|
|
|
if (m_paramTypes[i] != TYPE_INVALID)
|
|
numParams = i+1;
|
|
}
|
|
|
|
DE_ASSERT(numParams > 0);
|
|
|
|
if (inputPrecision == DE_NULL)
|
|
inputPrecision = precision;
|
|
|
|
int numAttributes = FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS + numParams - 1;
|
|
std::ostringstream vtx;
|
|
std::ostringstream frag;
|
|
std::ostringstream& op = isVertexCase ? vtx : frag;
|
|
|
|
// Attributes.
|
|
vtx << "attribute highp vec4 a_position;\n";
|
|
for (int i = 0; i < numAttributes; i++)
|
|
vtx << "attribute " << inputPrecision << " vec4 a_in" << i << ";\n";
|
|
|
|
if (isVertexCase)
|
|
{
|
|
vtx << "varying mediump vec4 v_color;\n";
|
|
frag << "varying mediump vec4 v_color;\n";
|
|
}
|
|
else
|
|
{
|
|
for (int i = 0; i < numAttributes; i++)
|
|
{
|
|
vtx << "varying " << inputPrecision << " vec4 v_in" << i << ";\n";
|
|
frag << "varying " << inputPrecision << " vec4 v_in" << i << ";\n";
|
|
}
|
|
}
|
|
|
|
op << "uniform mediump int u_numLoopIterations;\n";
|
|
if (isVertexCase)
|
|
op << "uniform mediump float u_zero;\n";
|
|
|
|
for (int paramNdx = 0; paramNdx < numParams; paramNdx++)
|
|
op << "uniform " << paramPrecisionsMaybe[paramNdx] << paramTypeNames[paramNdx] << " u_inc" << (char)('A'+paramNdx) << ";\n";
|
|
|
|
vtx << "\n";
|
|
vtx << "void main()\n";
|
|
vtx << "{\n";
|
|
|
|
if (!isVertexCase)
|
|
vtx << "\tgl_Position = a_position;\n";
|
|
|
|
frag << "\n";
|
|
frag << "void main()\n";
|
|
frag << "{\n";
|
|
|
|
// Function call input and return value accumulation variables.
|
|
{
|
|
const char* const inPrefix = isVertexCase ? "a_" : "v_";
|
|
|
|
for (int calcNdx = 0; calcNdx < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; calcNdx++)
|
|
{
|
|
for (int paramNdx = 0; paramNdx < numParams; paramNdx++)
|
|
{
|
|
const glu::DataType paramType = m_paramTypes[paramNdx];
|
|
const bool mustCast = paramType != glu::TYPE_FLOAT_VEC4;
|
|
|
|
op << "\t" << paramPrecisionsMaybe[paramNdx] << paramTypeNames[paramNdx] << " in" << calcNdx << (char)('a'+paramNdx) << " = ";
|
|
|
|
if (mustCast)
|
|
op << paramTypeNames[paramNdx] << "(";
|
|
|
|
if (glu::isDataTypeMatrix(paramType))
|
|
{
|
|
static const char* const swizzles[3] = { "x", "xy", "xyz" };
|
|
const int numRows = glu::getDataTypeMatrixNumRows(paramType);
|
|
const int numCols = glu::getDataTypeMatrixNumColumns(paramType);
|
|
const string swizzle = numRows < 4 ? string() + "." + swizzles[numRows-1] : "";
|
|
|
|
for (int i = 0; i < numCols; i++)
|
|
op << (i > 0 ? ", " : "") << inPrefix << "in" << calcNdx+paramNdx << swizzle;
|
|
}
|
|
else
|
|
{
|
|
op << inPrefix << "in" << calcNdx+paramNdx;
|
|
|
|
if (paramNdx == m_modifyParamNdx)
|
|
{
|
|
DE_ASSERT(glu::isDataTypeFloatOrVec(paramType));
|
|
op << " + 2.0";
|
|
}
|
|
}
|
|
|
|
if (mustCast)
|
|
op << ")";
|
|
|
|
op << ";\n";
|
|
}
|
|
|
|
op << "\t" << returnPrecisionMaybe << returnTypeName << " res" << calcNdx << " = " << returnTypeName << "(0);\n";
|
|
}
|
|
}
|
|
|
|
// Loop with expressions in it.
|
|
op << "\tfor (int i = 0; i < u_numLoopIterations; i++)\n";
|
|
op << "\t{\n";
|
|
for (int calcNdx = 0; calcNdx < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; calcNdx++)
|
|
{
|
|
if (calcNdx > 0)
|
|
op << "\n";
|
|
|
|
op << "\t\t{\n";
|
|
|
|
for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
|
|
{
|
|
const string inputName = "in" + de::toString(calcNdx) + (char)('a'+inputNdx);
|
|
const string incName = string() + "u_inc" + (char)('A'+inputNdx);
|
|
const string incExpr = incrementExpr(incName, m_paramTypes[inputNdx], m_useNearlyConstantInputs);
|
|
|
|
op << "\t\t\t" << inputName << " = " << sumExpr(inputName, incExpr, m_paramTypes[inputNdx]) << ";\n";
|
|
}
|
|
|
|
op << "\t\t\t" << returnPrecisionMaybe << returnTypeName << " eval" << calcNdx << " = ";
|
|
|
|
if (programID == PROGRAM_WITH_FUNCTION_CALLS)
|
|
{
|
|
op << m_func << "(";
|
|
|
|
for (int paramNdx = 0; paramNdx < numParams; paramNdx++)
|
|
{
|
|
if (paramNdx > 0)
|
|
op << ", ";
|
|
|
|
op << "in" << calcNdx << (char)('a'+paramNdx);
|
|
}
|
|
|
|
op << ")";
|
|
}
|
|
else
|
|
{
|
|
DE_ASSERT(programID == PROGRAM_WITHOUT_FUNCTION_CALLS);
|
|
op << returnTypeName << "(1)";
|
|
}
|
|
|
|
op << ";\n";
|
|
|
|
{
|
|
const string resName = "res" + de::toString(calcNdx);
|
|
const string evalName = "eval" + de::toString(calcNdx);
|
|
const string incExpr = incrementExpr(evalName, m_returnType, m_useNearlyConstantInputs);
|
|
|
|
op << "\t\t\tres" << calcNdx << " = " << sumExpr(resName, incExpr, m_returnType) << ";\n";
|
|
}
|
|
|
|
op << "\t\t}\n";
|
|
}
|
|
op << "\t}\n";
|
|
op << "\n";
|
|
|
|
// Result variables.
|
|
for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
|
|
{
|
|
op << "\t" << paramPrecisionsMaybe[inputNdx] << paramTypeNames[inputNdx] << " sumIn" << (char)('A'+inputNdx) << " = ";
|
|
{
|
|
string expr = string() + "in0" + (char)('a'+inputNdx);
|
|
for (int i = 1; i < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
|
|
expr = sumExpr(expr, string() + "in" + de::toString(i) + (char)('a'+inputNdx), m_paramTypes[inputNdx]);
|
|
op << expr;
|
|
}
|
|
op << ";\n";
|
|
}
|
|
|
|
op << "\t" << returnPrecisionMaybe << returnTypeName << " sumRes = ";
|
|
{
|
|
string expr = "res0";
|
|
for (int i = 1; i < FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS; i++)
|
|
expr = sumExpr(expr, "res" + de::toString(i), m_returnType);
|
|
op << expr;
|
|
}
|
|
op << ";\n";
|
|
|
|
{
|
|
glu::DataType finalResultDataType = glu::TYPE_LAST;
|
|
|
|
if (glu::isDataTypeMatrix(m_returnType))
|
|
{
|
|
finalResultDataType = m_returnType;
|
|
|
|
op << "\t" << precision << " " << returnTypeName << " finalRes = ";
|
|
|
|
for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
|
|
{
|
|
DE_ASSERT(m_paramTypes[inputNdx] == m_returnType);
|
|
op << "sumIn" << (char)('A'+inputNdx) << " + ";
|
|
}
|
|
op << "sumRes;\n";
|
|
}
|
|
else
|
|
{
|
|
int numFinalResComponents = glu::getDataTypeScalarSize(m_returnType);
|
|
for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
|
|
numFinalResComponents = de::max(numFinalResComponents, glu::getDataTypeScalarSize(m_paramTypes[inputNdx]));
|
|
|
|
finalResultDataType = getDataTypeFloatOrVec(numFinalResComponents);
|
|
|
|
{
|
|
const string finalResType = glu::getDataTypeName(finalResultDataType);
|
|
op << "\t" << precision << " " << finalResType << " finalRes = ";
|
|
for (int inputNdx = 0; inputNdx < numParams; inputNdx++)
|
|
op << finalResType << "(sumIn" << (char)('A'+inputNdx) << ") + ";
|
|
op << finalResType << "(sumRes);\n";
|
|
}
|
|
}
|
|
|
|
// Convert to color.
|
|
op << "\tmediump vec4 color = ";
|
|
if (finalResultDataType == TYPE_FLOAT_VEC4)
|
|
op << "finalRes";
|
|
else
|
|
{
|
|
int size = isMatrixReturn ? getDataTypeMatrixNumRows(finalResultDataType) : getDataTypeScalarSize(finalResultDataType);
|
|
|
|
op << "vec4(";
|
|
|
|
if (isMatrixReturn)
|
|
{
|
|
for (int i = 0; i < getDataTypeMatrixNumColumns(finalResultDataType); i++)
|
|
{
|
|
if (i > 0)
|
|
op << " + ";
|
|
op << "finalRes[" << i << "]";
|
|
}
|
|
}
|
|
else
|
|
op << "finalRes";
|
|
|
|
for (int i = size; i < 4; i++)
|
|
op << ", " << (i == 3 ? "1.0" : "0.0");
|
|
|
|
op << ")";
|
|
}
|
|
op << ";\n";
|
|
op << "\t" << (isVertexCase ? "v_color" : "gl_FragColor") << " = color;\n";
|
|
|
|
if (isVertexCase)
|
|
{
|
|
vtx << " gl_Position = a_position + u_zero*color;\n";
|
|
frag << " gl_FragColor = v_color;\n";
|
|
}
|
|
else
|
|
{
|
|
for (int i = 0; i < numAttributes; i++)
|
|
vtx << " v_in" << i << " = a_in" << i << ";\n";
|
|
}
|
|
|
|
vtx << "}\n";
|
|
frag << "}\n";
|
|
}
|
|
|
|
{
|
|
vector<AttribSpec> attributes;
|
|
for (int i = 0; i < numAttributes; i++)
|
|
attributes.push_back(AttribSpec(("a_in" + de::toString(i)).c_str(),
|
|
m_attribute.swizzle((i+0)%4, (i+1)%4, (i+2)%4, (i+3)%4),
|
|
m_attribute.swizzle((i+1)%4, (i+2)%4, (i+3)%4, (i+0)%4),
|
|
m_attribute.swizzle((i+2)%4, (i+3)%4, (i+0)%4, (i+1)%4),
|
|
m_attribute.swizzle((i+3)%4, (i+0)%4, (i+1)%4, (i+2)%4)));
|
|
|
|
{
|
|
string description = "This is the program ";
|
|
|
|
description += programID == PROGRAM_WITHOUT_FUNCTION_CALLS ? "without"
|
|
: programID == PROGRAM_WITH_FUNCTION_CALLS ? "with"
|
|
: DE_NULL;
|
|
|
|
description += " '" + m_func + "' function calls.\n"
|
|
"Note: workload size for this program means the number of loop iterations.";
|
|
|
|
return ProgramContext(vtx.str(), frag.str(), attributes, description);
|
|
}
|
|
}
|
|
}
|
|
|
|
vector<FunctionCase::ProgramContext> FunctionCase::generateProgramData (void) const
|
|
{
|
|
vector<ProgramContext> progData;
|
|
for (int i = 0; i < PROGRAM_LAST; i++)
|
|
progData.push_back(generateSingleProgramData((ProgramID)i));
|
|
return progData;
|
|
}
|
|
|
|
void FunctionCase::setGeneralUniforms (deUint32 program) const
|
|
{
|
|
const glw::Functions& gl = m_renderCtx.getFunctions();
|
|
|
|
gl.uniform1f(gl.getUniformLocation(program, "u_zero"), 0.0f);
|
|
|
|
for (int paramNdx = 0; paramNdx < MAX_PARAMS; paramNdx++)
|
|
{
|
|
if (m_paramTypes[paramNdx] != glu::TYPE_INVALID)
|
|
{
|
|
const glu::DataType paramType = m_paramTypes[paramNdx];
|
|
const int scalarSize = glu::getDataTypeScalarSize(paramType);
|
|
const int location = gl.getUniformLocation(program, (string() + "u_inc" + (char)('A'+paramNdx)).c_str());
|
|
|
|
if (glu::isDataTypeFloatOrVec(paramType))
|
|
{
|
|
float values[4];
|
|
for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++)
|
|
values[i] = (float)paramNdx*0.01f + (float)i*0.001f; // Arbitrary small values.
|
|
uniformNfv(gl, scalarSize, location, 1, &values[0]);
|
|
}
|
|
else if (glu::isDataTypeIntOrIVec(paramType))
|
|
{
|
|
int values[4];
|
|
for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++)
|
|
values[i] = paramNdx*100 + i; // Arbitrary values.
|
|
uniformNiv(gl, scalarSize, location, 1, &values[0]);
|
|
}
|
|
else if (glu::isDataTypeBoolOrBVec(paramType))
|
|
{
|
|
int values[4];
|
|
for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++)
|
|
values[i] = (paramNdx >> i) & 1; // Arbitrary values.
|
|
uniformNiv(gl, scalarSize, location, 1, &values[0]);
|
|
}
|
|
else if (glu::isDataTypeMatrix(paramType))
|
|
{
|
|
const int size = glu::getDataTypeMatrixNumRows(paramType);
|
|
DE_ASSERT(size == glu::getDataTypeMatrixNumColumns(paramType));
|
|
float values[4*4];
|
|
for (int i = 0; i < DE_LENGTH_OF_ARRAY(values); i++)
|
|
values[i] = (float)paramNdx*0.01f + (float)i*0.001f; // Arbitrary values.
|
|
uniformMatrixNfv(gl, size, location, 1, &values[0]);
|
|
}
|
|
else
|
|
DE_ASSERT(false);
|
|
}
|
|
}
|
|
}
|
|
|
|
void FunctionCase::setWorkloadSizeUniform (deUint32 program, int numLoopIterations) const
|
|
{
|
|
const glw::Functions& gl = m_renderCtx.getFunctions();
|
|
const int loc = gl.getUniformLocation(program, "u_numLoopIterations");
|
|
|
|
gl.uniform1i(loc, numLoopIterations);
|
|
}
|
|
|
|
float FunctionCase::computeSingleOperationTime (const vector<float>& perProgramOperationCosts) const
|
|
{
|
|
DE_ASSERT(perProgramOperationCosts.size() == PROGRAM_LAST);
|
|
const int numFunctionCalls = FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS;
|
|
const float programOperationCostDiff = perProgramOperationCosts[PROGRAM_WITH_FUNCTION_CALLS] - perProgramOperationCosts[PROGRAM_WITHOUT_FUNCTION_CALLS];
|
|
|
|
return programOperationCostDiff / (float)numFunctionCalls;
|
|
}
|
|
|
|
void FunctionCase::logSingleOperationCalculationInfo (void) const
|
|
{
|
|
const int numFunctionCalls = FUNCTION_CASE_NUM_INDEPENDENT_CALCULATIONS;
|
|
|
|
m_testCtx.getLog() << TestLog::Message << "Note: program " << (int)PROGRAM_WITH_FUNCTION_CALLS << " contains "
|
|
<< numFunctionCalls << " calls to '" << m_func << "' in one loop iteration; "
|
|
<< "cost of one operation is calculated as "
|
|
<< "(cost_of_workload_with_calls - cost_of_workload_without_calls) / " << numFunctionCalls << TestLog::EndMessage;
|
|
}
|
|
|
|
} // anonymous
|
|
|
|
ShaderOperatorTests::ShaderOperatorTests (Context& context)
|
|
: TestCaseGroup(context, "operator", "Operator Performance Tests")
|
|
{
|
|
}
|
|
|
|
ShaderOperatorTests::~ShaderOperatorTests (void)
|
|
{
|
|
}
|
|
|
|
void ShaderOperatorTests::init (void)
|
|
{
|
|
// Binary operator cases
|
|
|
|
static const DataType binaryOpTypes[] =
|
|
{
|
|
TYPE_FLOAT,
|
|
TYPE_FLOAT_VEC2,
|
|
TYPE_FLOAT_VEC3,
|
|
TYPE_FLOAT_VEC4,
|
|
TYPE_INT,
|
|
TYPE_INT_VEC2,
|
|
TYPE_INT_VEC3,
|
|
TYPE_INT_VEC4,
|
|
};
|
|
static const Precision precisions[] =
|
|
{
|
|
PRECISION_LOWP,
|
|
PRECISION_MEDIUMP,
|
|
PRECISION_HIGHP
|
|
};
|
|
static const struct
|
|
{
|
|
const char* name;
|
|
const char* op;
|
|
bool swizzle;
|
|
} binaryOps[] =
|
|
{
|
|
{ "add", "+", false },
|
|
{ "sub", "-", true },
|
|
{ "mul", "*", false },
|
|
{ "div", "/", true }
|
|
};
|
|
|
|
tcu::TestCaseGroup* const binaryOpsGroup = new tcu::TestCaseGroup(m_testCtx, "binary_operator", "Binary Operator Performance Tests");
|
|
addChild(binaryOpsGroup);
|
|
|
|
for (int opNdx = 0; opNdx < DE_LENGTH_OF_ARRAY(binaryOps); opNdx++)
|
|
{
|
|
tcu::TestCaseGroup* const opGroup = new tcu::TestCaseGroup(m_testCtx, binaryOps[opNdx].name, "");
|
|
binaryOpsGroup->addChild(opGroup);
|
|
|
|
for (int isFrag = 0; isFrag <= 1; isFrag++)
|
|
{
|
|
const BinaryOpCase::InitialCalibrationStorage shaderGroupCalibrationStorage (new BinaryOpCase::InitialCalibration);
|
|
const bool isVertex = isFrag == 0;
|
|
tcu::TestCaseGroup* const shaderGroup = new tcu::TestCaseGroup(m_testCtx, isVertex ? "vertex" : "fragment", "");
|
|
opGroup->addChild(shaderGroup);
|
|
|
|
for (int typeNdx = 0; typeNdx < DE_LENGTH_OF_ARRAY(binaryOpTypes); typeNdx++)
|
|
{
|
|
for (int precNdx = 0; precNdx < DE_LENGTH_OF_ARRAY(precisions); precNdx++)
|
|
{
|
|
const DataType type = binaryOpTypes[typeNdx];
|
|
const Precision precision = precisions[precNdx];
|
|
const char* const op = binaryOps[opNdx].op;
|
|
const bool useSwizzle = binaryOps[opNdx].swizzle;
|
|
std::ostringstream name;
|
|
|
|
name << getPrecisionName(precision) << "_" << getDataTypeName(type);
|
|
|
|
shaderGroup->addChild(new BinaryOpCase(m_context, name.str().c_str(), "", op, type, precision, useSwizzle, isVertex, shaderGroupCalibrationStorage));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Built-in function cases.
|
|
|
|
// Non-specific (i.e. includes gentypes) parameter types for the functions.
|
|
enum ValueType
|
|
{
|
|
VALUE_NONE = 0,
|
|
VALUE_FLOAT = (1<<0), // float scalar
|
|
VALUE_FLOAT_VEC = (1<<1), // float vector
|
|
VALUE_FLOAT_VEC34 = (1<<2), // float vector of size 3 or 4
|
|
VALUE_FLOAT_GENTYPE = (1<<3), // float scalar/vector
|
|
VALUE_VEC3 = (1<<4), // vec3 only
|
|
VALUE_VEC4 = (1<<5), // vec4 only
|
|
VALUE_MATRIX = (1<<6), // matrix
|
|
VALUE_BOOL = (1<<7), // boolean scalar
|
|
VALUE_BOOL_VEC = (1<<8), // boolean vector
|
|
VALUE_BOOL_GENTYPE = (1<<9), // boolean scalar/vector
|
|
VALUE_INT = (1<<10), // int scalar
|
|
VALUE_INT_VEC = (1<<11), // int vector
|
|
VALUE_INT_GENTYPE = (1<<12), // int scalar/vector
|
|
|
|
// Shorthands.
|
|
N = VALUE_NONE,
|
|
F = VALUE_FLOAT,
|
|
FV = VALUE_FLOAT_VEC,
|
|
VL = VALUE_FLOAT_VEC34, // L for "large"
|
|
GT = VALUE_FLOAT_GENTYPE,
|
|
V3 = VALUE_VEC3,
|
|
V4 = VALUE_VEC4,
|
|
M = VALUE_MATRIX,
|
|
B = VALUE_BOOL,
|
|
BV = VALUE_BOOL_VEC,
|
|
BGT = VALUE_BOOL_GENTYPE,
|
|
I = VALUE_INT,
|
|
IV = VALUE_INT_VEC,
|
|
IGT = VALUE_INT_GENTYPE,
|
|
|
|
VALUE_ANY_FLOAT = VALUE_FLOAT | VALUE_FLOAT_VEC | VALUE_FLOAT_GENTYPE | VALUE_VEC3 | VALUE_VEC4 | VALUE_FLOAT_VEC34,
|
|
VALUE_ANY_INT = VALUE_INT | VALUE_INT_VEC | VALUE_INT_GENTYPE,
|
|
VALUE_ANY_BOOL = VALUE_BOOL | VALUE_BOOL_VEC | VALUE_BOOL_GENTYPE,
|
|
|
|
VALUE_ANY_GENTYPE = VALUE_FLOAT_VEC | VALUE_FLOAT_GENTYPE | VALUE_FLOAT_VEC34 |
|
|
VALUE_BOOL_VEC | VALUE_BOOL_GENTYPE |
|
|
VALUE_INT_VEC | VALUE_INT_GENTYPE |
|
|
VALUE_MATRIX
|
|
};
|
|
enum PrecisionMask
|
|
{
|
|
PRECMASK_NA = 0, //!< Precision not applicable (booleans)
|
|
PRECMASK_LOWP = (1<<PRECISION_LOWP),
|
|
PRECMASK_MEDIUMP = (1<<PRECISION_MEDIUMP),
|
|
PRECMASK_HIGHP = (1<<PRECISION_HIGHP),
|
|
|
|
PRECMASK_MEDIUMP_HIGHP = (1<<PRECISION_MEDIUMP) | (1<<PRECISION_HIGHP),
|
|
PRECMASK_ALL = (1<<PRECISION_LOWP) | (1<<PRECISION_MEDIUMP) | (1<<PRECISION_HIGHP)
|
|
};
|
|
|
|
static const DataType floatTypes[] =
|
|
{
|
|
TYPE_FLOAT,
|
|
TYPE_FLOAT_VEC2,
|
|
TYPE_FLOAT_VEC3,
|
|
TYPE_FLOAT_VEC4
|
|
};
|
|
static const DataType intTypes[] =
|
|
{
|
|
TYPE_INT,
|
|
TYPE_INT_VEC2,
|
|
TYPE_INT_VEC3,
|
|
TYPE_INT_VEC4
|
|
};
|
|
static const DataType boolTypes[] =
|
|
{
|
|
TYPE_BOOL,
|
|
TYPE_BOOL_VEC2,
|
|
TYPE_BOOL_VEC3,
|
|
TYPE_BOOL_VEC4
|
|
};
|
|
static const DataType matrixTypes[] =
|
|
{
|
|
TYPE_FLOAT_MAT2,
|
|
TYPE_FLOAT_MAT3,
|
|
TYPE_FLOAT_MAT4
|
|
};
|
|
|
|
tcu::TestCaseGroup* const angleAndTrigonometryGroup = new tcu::TestCaseGroup(m_testCtx, "angle_and_trigonometry", "Built-In Angle and Trigonometry Function Performance Tests");
|
|
tcu::TestCaseGroup* const exponentialGroup = new tcu::TestCaseGroup(m_testCtx, "exponential", "Built-In Exponential Function Performance Tests");
|
|
tcu::TestCaseGroup* const commonFunctionsGroup = new tcu::TestCaseGroup(m_testCtx, "common_functions", "Built-In Common Function Performance Tests");
|
|
tcu::TestCaseGroup* const geometricFunctionsGroup = new tcu::TestCaseGroup(m_testCtx, "geometric", "Built-In Geometric Function Performance Tests");
|
|
tcu::TestCaseGroup* const matrixFunctionsGroup = new tcu::TestCaseGroup(m_testCtx, "matrix", "Built-In Matrix Function Performance Tests");
|
|
tcu::TestCaseGroup* const floatCompareGroup = new tcu::TestCaseGroup(m_testCtx, "float_compare", "Built-In Floating Point Comparison Function Performance Tests");
|
|
tcu::TestCaseGroup* const intCompareGroup = new tcu::TestCaseGroup(m_testCtx, "int_compare", "Built-In Integer Comparison Function Performance Tests");
|
|
tcu::TestCaseGroup* const boolCompareGroup = new tcu::TestCaseGroup(m_testCtx, "bool_compare", "Built-In Boolean Comparison Function Performance Tests");
|
|
|
|
addChild(angleAndTrigonometryGroup);
|
|
addChild(exponentialGroup);
|
|
addChild(commonFunctionsGroup);
|
|
addChild(geometricFunctionsGroup);
|
|
addChild(matrixFunctionsGroup);
|
|
addChild(floatCompareGroup);
|
|
addChild(intCompareGroup);
|
|
addChild(boolCompareGroup);
|
|
|
|
// Some attributes to be used as parameters for the functions.
|
|
const Vec4 attrPos = Vec4( 2.3f, 1.9f, 0.8f, 0.7f);
|
|
const Vec4 attrNegPos = Vec4(-1.3f, 2.5f, -3.5f, 4.3f);
|
|
const Vec4 attrSmall = Vec4(-0.9f, 0.8f, -0.4f, 0.2f);
|
|
|
|
// Function name, return type and parameter type information; also, what attribute should be used in the test.
|
|
// \note Different versions of the same function (i.e. with the same group name) can be defined by putting them successively in this array.
|
|
// \note In order to reduce case count and thus total execution time, we don't test all input type combinations for every function.
|
|
static const struct
|
|
{
|
|
tcu::TestCaseGroup* parentGroup;
|
|
const char* groupName;
|
|
const char* func;
|
|
const ValueType types[FunctionCase::MAX_PARAMS + 1]; // Return type and parameter types, in that order.
|
|
const Vec4& attribute;
|
|
int modifyParamNdx;
|
|
bool useNearlyConstantInputs;
|
|
bool booleanCase;
|
|
PrecisionMask precMask;
|
|
} functionCaseGroups[] =
|
|
{
|
|
{ angleAndTrigonometryGroup, "radians", "radians", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ angleAndTrigonometryGroup, "degrees", "degrees", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ angleAndTrigonometryGroup, "sin", "sin", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ angleAndTrigonometryGroup, "cos", "cos", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ angleAndTrigonometryGroup, "tan", "tan", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ angleAndTrigonometryGroup, "asin", "asin", { F, F, N, N }, attrSmall, -1, true, false, PRECMASK_ALL },
|
|
{ angleAndTrigonometryGroup, "acos", "acos", { F, F, N, N }, attrSmall, -1, true, false, PRECMASK_ALL },
|
|
{ angleAndTrigonometryGroup, "atan2", "atan", { F, F, F, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ angleAndTrigonometryGroup, "atan", "atan", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
|
|
{ exponentialGroup, "pow", "pow", { F, F, F, N }, attrPos, -1, false, false, PRECMASK_ALL },
|
|
{ exponentialGroup, "exp", "exp", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ exponentialGroup, "log", "log", { F, F, N, N }, attrPos, -1, false, false, PRECMASK_ALL },
|
|
{ exponentialGroup, "exp2", "exp2", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ exponentialGroup, "log2", "log2", { F, F, N, N }, attrPos, -1, false, false, PRECMASK_ALL },
|
|
{ exponentialGroup, "sqrt", "sqrt", { F, F, N, N }, attrPos, -1, false, false, PRECMASK_ALL },
|
|
{ exponentialGroup, "inversesqrt", "inversesqrt", { F, F, N, N }, attrPos, -1, false, false, PRECMASK_ALL },
|
|
|
|
{ commonFunctionsGroup, "abs", "abs", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
|
|
{ commonFunctionsGroup, "abs", "abs", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ commonFunctionsGroup, "sign", "sign", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
|
|
{ commonFunctionsGroup, "sign", "sign", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ commonFunctionsGroup, "floor", "floor", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
|
|
{ commonFunctionsGroup, "floor", "floor", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ commonFunctionsGroup, "ceil", "ceil", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
|
|
{ commonFunctionsGroup, "ceil", "ceil", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ commonFunctionsGroup, "fract", "fract", { F, F, N, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
|
|
{ commonFunctionsGroup, "fract", "fract", { V4, V4, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ commonFunctionsGroup, "mod", "mod", { GT, GT, GT, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ commonFunctionsGroup, "min", "min", { F, F, F, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
|
|
{ commonFunctionsGroup, "min", "min", { V4, V4, V4, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ commonFunctionsGroup, "max", "max", { F, F, F, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
|
|
{ commonFunctionsGroup, "max", "max", { V4, V4, V4, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ commonFunctionsGroup, "clamp", "clamp", { F, F, F, F }, attrSmall, 2, false, false, PRECMASK_MEDIUMP_HIGHP },
|
|
{ commonFunctionsGroup, "clamp", "clamp", { V4, V4, V4, V4 }, attrSmall, 2, false, false, PRECMASK_ALL },
|
|
{ commonFunctionsGroup, "mix", "mix", { F, F, F, F }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
|
|
{ commonFunctionsGroup, "mix", "mix", { V4, V4, V4, V4 }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ commonFunctionsGroup, "step", "step", { F, F, F, N }, attrNegPos, -1, false, false, PRECMASK_MEDIUMP_HIGHP },
|
|
{ commonFunctionsGroup, "step", "step", { V4, V4, V4, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ commonFunctionsGroup, "smoothstep", "smoothstep", { F, F, F, F }, attrSmall, 1, false, false, PRECMASK_MEDIUMP_HIGHP },
|
|
{ commonFunctionsGroup, "smoothstep", "smoothstep", { V4, V4, V4, V4 }, attrSmall, 1, false, false, PRECMASK_ALL },
|
|
|
|
{ geometricFunctionsGroup, "length", "length", { F, VL, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ geometricFunctionsGroup, "distance", "distance", { F, VL, VL, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ geometricFunctionsGroup, "dot", "dot", { F, VL, VL, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ geometricFunctionsGroup, "cross", "cross", { V3, V3, V3, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ geometricFunctionsGroup, "normalize", "normalize", { VL, VL, N, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ geometricFunctionsGroup, "faceforward", "faceforward", { VL, VL, VL, VL }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ geometricFunctionsGroup, "reflect", "reflect", { VL, VL, VL, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ geometricFunctionsGroup, "refract", "refract", { VL, VL, VL, F }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
|
|
{ matrixFunctionsGroup, "matrixCompMult", "matrixCompMult", { M, M, M, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
|
|
{ floatCompareGroup, "lessThan", "lessThan", { BV, FV, FV, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ floatCompareGroup, "lessThanEqual", "lessThanEqual", { BV, FV, FV, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ floatCompareGroup, "greaterThan", "greaterThan", { BV, FV, FV, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ floatCompareGroup, "greaterThanEqual", "greaterThanEqual", { BV, FV, FV, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ floatCompareGroup, "equal", "equal", { BV, FV, FV, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ floatCompareGroup, "notEqual", "notEqual", { BV, FV, FV, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
|
|
{ intCompareGroup, "lessThan", "lessThan", { BV, IV, IV, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ intCompareGroup, "lessThanEqual", "lessThanEqual", { BV, IV, IV, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ intCompareGroup, "greaterThan", "greaterThan", { BV, IV, IV, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ intCompareGroup, "greaterThanEqual", "greaterThanEqual", { BV, IV, IV, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ intCompareGroup, "equal", "equal", { BV, IV, IV, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
{ intCompareGroup, "notEqual", "notEqual", { BV, IV, IV, N }, attrNegPos, -1, false, false, PRECMASK_ALL },
|
|
|
|
{ boolCompareGroup, "equal", "equal", { BV, BV, BV, N }, attrNegPos, -1, false, true, PRECMASK_MEDIUMP },
|
|
{ boolCompareGroup, "notEqual", "notEqual", { BV, BV, BV, N }, attrNegPos, -1, false, true, PRECMASK_MEDIUMP },
|
|
{ boolCompareGroup, "any", "any", { B, BV, N, N }, attrNegPos, -1, false, true, PRECMASK_MEDIUMP },
|
|
{ boolCompareGroup, "all", "all", { B, BV, N, N }, attrNegPos, -1, false, true, PRECMASK_MEDIUMP },
|
|
{ boolCompareGroup, "not", "not", { BV, BV, N, N }, attrNegPos, -1, false, true, PRECMASK_MEDIUMP }
|
|
};
|
|
|
|
// vertexSubGroup and fragmentSubGroup are the groups where the various vertex/fragment cases of a single function are added.
|
|
// \note These are defined here so that different versions (different entries in the functionCaseGroups array) of the same function can be put in the same group.
|
|
tcu::TestCaseGroup* vertexSubGroup = DE_NULL;
|
|
tcu::TestCaseGroup* fragmentSubGroup = DE_NULL;
|
|
FunctionCase::InitialCalibrationStorage vertexSubGroupCalibrationStorage;
|
|
FunctionCase::InitialCalibrationStorage fragmentSubGroupCalibrationStorage;
|
|
for (int funcNdx = 0; funcNdx < DE_LENGTH_OF_ARRAY(functionCaseGroups); funcNdx++)
|
|
{
|
|
tcu::TestCaseGroup* const parentGroup = functionCaseGroups[funcNdx].parentGroup;
|
|
const char* const groupName = functionCaseGroups[funcNdx].groupName;
|
|
const char* const groupFunc = functionCaseGroups[funcNdx].func;
|
|
const ValueType* const funcTypes = functionCaseGroups[funcNdx].types;
|
|
const Vec4& groupAttribute = functionCaseGroups[funcNdx].attribute;
|
|
const int modifyParamNdx = functionCaseGroups[funcNdx].modifyParamNdx;
|
|
const bool useNearlyConstantInputs = functionCaseGroups[funcNdx].useNearlyConstantInputs;
|
|
const bool booleanCase = functionCaseGroups[funcNdx].booleanCase;
|
|
const PrecisionMask precMask = functionCaseGroups[funcNdx].precMask;
|
|
|
|
// If this is a new function and not just a different version of the previously defined function, create a new group.
|
|
if (funcNdx == 0 || parentGroup != functionCaseGroups[funcNdx-1].parentGroup || string(groupName) != functionCaseGroups[funcNdx-1].groupName)
|
|
{
|
|
tcu::TestCaseGroup* const funcGroup = new tcu::TestCaseGroup(m_testCtx, groupName, "");
|
|
functionCaseGroups[funcNdx].parentGroup->addChild(funcGroup);
|
|
|
|
vertexSubGroup = new tcu::TestCaseGroup(m_testCtx, "vertex", "");
|
|
fragmentSubGroup = new tcu::TestCaseGroup(m_testCtx, "fragment", "");
|
|
|
|
funcGroup->addChild(vertexSubGroup);
|
|
funcGroup->addChild(fragmentSubGroup);
|
|
|
|
vertexSubGroupCalibrationStorage = FunctionCase::InitialCalibrationStorage(new FunctionCase::InitialCalibration);
|
|
fragmentSubGroupCalibrationStorage = FunctionCase::InitialCalibrationStorage(new FunctionCase::InitialCalibration);
|
|
}
|
|
|
|
DE_ASSERT(vertexSubGroup != DE_NULL);
|
|
DE_ASSERT(fragmentSubGroup != DE_NULL);
|
|
|
|
// Find the type size range of parameters (e.g. from 2 to 4 in case of vectors).
|
|
int genTypeFirstSize = 1;
|
|
int genTypeLastSize = 1;
|
|
|
|
// Find the first return value or parameter with a gentype (if any) and set sizes accordingly.
|
|
// \note Assumes only matching sizes gentypes are to be found, e.g. no "genType func (vec param)"
|
|
for (int i = 0; i < FunctionCase::MAX_PARAMS + 1 && genTypeLastSize == 1; i++)
|
|
{
|
|
switch (funcTypes[i])
|
|
{
|
|
case VALUE_FLOAT_VEC:
|
|
case VALUE_BOOL_VEC:
|
|
case VALUE_INT_VEC: // \note Fall-through.
|
|
genTypeFirstSize = 2;
|
|
genTypeLastSize = 4;
|
|
break;
|
|
case VALUE_FLOAT_VEC34:
|
|
genTypeFirstSize = 3;
|
|
genTypeLastSize = 4;
|
|
break;
|
|
case VALUE_FLOAT_GENTYPE:
|
|
case VALUE_BOOL_GENTYPE:
|
|
case VALUE_INT_GENTYPE: // \note Fall-through.
|
|
genTypeFirstSize = 1;
|
|
genTypeLastSize = 4;
|
|
break;
|
|
case VALUE_MATRIX:
|
|
genTypeFirstSize = 2;
|
|
genTypeLastSize = 4;
|
|
break;
|
|
// If none of the above, keep looping.
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
// Create a case for each possible size of the gentype.
|
|
for (int curSize = genTypeFirstSize; curSize <= genTypeLastSize; curSize++)
|
|
{
|
|
// Determine specific types for return value and the parameters, according to curSize. Non-gentypes not affected by curSize.
|
|
DataType types[FunctionCase::MAX_PARAMS + 1];
|
|
for (int i = 0; i < FunctionCase::MAX_PARAMS + 1; i++)
|
|
{
|
|
if (funcTypes[i] == VALUE_NONE)
|
|
types[i] = TYPE_INVALID;
|
|
else
|
|
{
|
|
int isFloat = funcTypes[i] & VALUE_ANY_FLOAT;
|
|
int isBool = funcTypes[i] & VALUE_ANY_BOOL;
|
|
int isInt = funcTypes[i] & VALUE_ANY_INT;
|
|
int isMat = funcTypes[i] == VALUE_MATRIX;
|
|
int inSize = (funcTypes[i] & VALUE_ANY_GENTYPE) ? curSize
|
|
: funcTypes[i] == VALUE_VEC3 ? 3
|
|
: funcTypes[i] == VALUE_VEC4 ? 4
|
|
: 1;
|
|
int typeArrayNdx = isMat ? inSize - 2 : inSize - 1; // \note No matrices of size 1.
|
|
|
|
types[i] = isFloat ? floatTypes[typeArrayNdx]
|
|
: isBool ? boolTypes[typeArrayNdx]
|
|
: isInt ? intTypes[typeArrayNdx]
|
|
: isMat ? matrixTypes[typeArrayNdx]
|
|
: TYPE_LAST;
|
|
}
|
|
|
|
DE_ASSERT(types[i] != TYPE_LAST);
|
|
}
|
|
|
|
// Array for just the parameter types.
|
|
DataType paramTypes[FunctionCase::MAX_PARAMS];
|
|
for (int i = 0; i < FunctionCase::MAX_PARAMS; i++)
|
|
paramTypes[i] = types[i+1];
|
|
|
|
for (int prec = (int)PRECISION_LOWP; prec < (int)PRECISION_LAST; prec++)
|
|
{
|
|
if ((precMask & (1 << prec)) == 0)
|
|
continue;
|
|
|
|
const string precisionPrefix = booleanCase ? "" : (string(getPrecisionName((Precision)prec)) + "_");
|
|
std::ostringstream caseName;
|
|
|
|
caseName << precisionPrefix;
|
|
|
|
// Write the name of each distinct parameter data type into the test case name.
|
|
for (int i = 1; i < FunctionCase::MAX_PARAMS + 1 && types[i] != TYPE_INVALID; i++)
|
|
{
|
|
if (i == 1 || types[i] != types[i-1])
|
|
{
|
|
if (i > 1)
|
|
caseName << "_";
|
|
|
|
caseName << getDataTypeName(types[i]);
|
|
}
|
|
}
|
|
|
|
for (int fragI = 0; fragI <= 1; fragI++)
|
|
{
|
|
const bool vert = fragI == 0;
|
|
tcu::TestCaseGroup* const group = vert ? vertexSubGroup : fragmentSubGroup;
|
|
group->addChild (new FunctionCase(m_context,
|
|
caseName.str().c_str(), "",
|
|
groupFunc,
|
|
types[0], paramTypes,
|
|
groupAttribute, modifyParamNdx, useNearlyConstantInputs,
|
|
(Precision)prec, vert,
|
|
vert ? vertexSubGroupCalibrationStorage : fragmentSubGroupCalibrationStorage));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
} // Performance
|
|
} // gles2
|
|
} // deqp
|