You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
1446 lines
65 KiB
1446 lines
65 KiB
/*
|
|
* Copyright (C) 2017 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#include <HalInterfaces.h>
|
|
#include <SampleDriver.h>
|
|
#include <ValidateHal.h>
|
|
#include <android-base/logging.h>
|
|
#include <android/hardware/neuralnetworks/1.0/ADevice.h>
|
|
#include <android/hardware/neuralnetworks/1.1/ADevice.h>
|
|
#include <android/hardware/neuralnetworks/1.2/ADevice.h>
|
|
#include <gtest/gtest.h>
|
|
#include <unistd.h>
|
|
|
|
#include <algorithm>
|
|
#include <cassert>
|
|
#include <cstdio>
|
|
#include <iterator>
|
|
#include <map>
|
|
#include <memory>
|
|
#include <random>
|
|
#include <set>
|
|
#include <string>
|
|
#include <tuple>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include "CompilationBuilder.h"
|
|
#include "HalUtils.h"
|
|
#include "Manager.h"
|
|
#include "ModelBuilder.h"
|
|
#include "NeuralNetworks.h"
|
|
#include "TestNeuralNetworksWrapper.h"
|
|
|
|
// Uncomment the following line to generate some debugging output that
|
|
// may be useful when analyzing failures:
|
|
//
|
|
// #define VERBOSE VERBOSE
|
|
|
|
// Uncomment the following line to generate some debugging output that
|
|
// may be useful to determine test coverage for support of dynamic
|
|
// temporaries (http://b/132458982):
|
|
//
|
|
// #define TRACE_DYNTEMP TRACE_DYNTEMP
|
|
|
|
// We randomly generate tests (model + input data) at runtime, and verify
|
|
// that we get the same results whether we do partitioned compilation/execution
|
|
// or non partitioned compilation/execution. We perform a test as follows:
|
|
//
|
|
// (1) Randomly generate a model (graph and weights), randomly generate input
|
|
// data, randomly assign inputs and outputs to CPU memory or to shared
|
|
// memory.
|
|
//
|
|
// Randomly leaves dimensions unset for intermediate operands.
|
|
//
|
|
// (2) Randomly generate drivers based on the sample driver, each of which
|
|
// executes models on the CPU. They differ according to which operations
|
|
// they support.
|
|
//
|
|
// (3) Compile and execute without partitioning, saving off the results.
|
|
//
|
|
// (4) Compile and execute with partitioning.
|
|
//
|
|
// (5) Verify that the saved results from (3) match the results from (4).
|
|
//
|
|
// For simplicity, all data (model inputs, model outputs, weights,
|
|
// temps) are of the same type: a 2-D TENSOR_FLOAT32 where the two
|
|
// dimensions are fixed throughout a particular test case (and
|
|
// randomly determined). This prevents us from having to find a
|
|
// mechanism to "resize" data (e.g., if ADD#a operates on data of size
|
|
// 2x2, ADD#b operates on data of size 3x3, and the outputs of ADD#a
|
|
// and ADD#b become inputs of ADD#c, do we need to insert one or more
|
|
// operations between (say) ADD#a and ADD#c to convert ADD#2's data
|
|
// from size 2x2 to size 3x3 in order to match ADD#b). In the few
|
|
// cases where an operand cannot be of this type, it is a constant
|
|
// (e.g., activation functions and RNN bias).
|
|
//
|
|
// Each operation we generate has a signature (described in more
|
|
// detail later). The randomly generated drivers decide which
|
|
// operations they can execute by checking operation signatures. Once
|
|
// we have built the model and know the set of signatures, we randomly
|
|
// assign each signature to a driver. No signature is supported by
|
|
// multiple drivers -- we're not testing the logic that the
|
|
// partitioning algorithm uses to select the best driver for an
|
|
// operation.
|
|
|
|
namespace android {
|
|
|
|
namespace V1_0 = ::android::hardware::neuralnetworks::V1_0;
|
|
namespace V1_1 = ::android::hardware::neuralnetworks::V1_1;
|
|
namespace V1_2 = ::android::hardware::neuralnetworks::V1_2;
|
|
namespace V1_3 = ::android::hardware::neuralnetworks::V1_3;
|
|
using CompilationBuilder = nn::CompilationBuilder;
|
|
using DeviceManager = nn::DeviceManager;
|
|
using Device = nn::Device;
|
|
using SharedDevice = nn::SharedDevice;
|
|
using ExecutionPlan = nn::ExecutionPlan;
|
|
using ExecutionStep = nn::ExecutionStep;
|
|
using HalCacheToken = nn::HalCacheToken;
|
|
using HalVersion = nn::HalVersion;
|
|
using HidlModel = V1_3::Model;
|
|
using LogicalStep = nn::LogicalStep;
|
|
using ModelBuilder = nn::ModelBuilder;
|
|
using Result = nn::test_wrapper::Result;
|
|
using SampleDriver = nn::sample_driver::SampleDriver;
|
|
using WrapperCompilation = nn::test_wrapper::Compilation;
|
|
using WrapperExecution = nn::test_wrapper::Execution;
|
|
using WrapperMemory = nn::test_wrapper::Memory;
|
|
using WrapperModel = nn::test_wrapper::Model;
|
|
using WrapperOperandType = nn::test_wrapper::OperandType;
|
|
using WrapperType = nn::test_wrapper::Type;
|
|
|
|
namespace {
|
|
|
|
/// Configure test size //////////////////////////////////////////////////////////
|
|
|
|
// We may exceed this in order to connect otherwise disjoint subgraphs.
|
|
static const unsigned kMaxNumOperations = 100;
|
|
|
|
// We build models to process 2-D square tensors up to this size in each dimension;
|
|
// note that the API promotes by-value weights larger than 128 to by-reference,
|
|
// so we want to ensure that we can pick both types that exceed and types that do
|
|
// not exceed this size.
|
|
static const unsigned kMaxProblemSize = 8;
|
|
|
|
// First seed for pseudorandom test generation.
|
|
static const unsigned kFirstSeed = 0;
|
|
|
|
// Number of test cases.
|
|
static const unsigned kNumTestCases = 225;
|
|
|
|
// Force all graph weights into a single pool (as we recommend to users)
|
|
// or allow them to be distributed across multiple pools (more stress
|
|
// on the partitioning algorithm and the rest of the runtime)?
|
|
// Forcing all graph weights into a single pool may be necessary to
|
|
// prevent large graphs from running up against http://b/70302693
|
|
// "NNAPI overuses (?) fds".
|
|
static const bool kAllWeightsInOnePool = false;
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////
|
|
|
|
// The signature of an operation consists of the operation type (e.g.,
|
|
// ADD) and the activation function (use -1 in the case of an
|
|
// operation type for which the activation function is inapplicable).
|
|
typedef std::pair<ANeuralNetworksOperationType, int> Signature;
|
|
|
|
// This class adds some simple utilities on top of WrapperModel. For example,
|
|
// it provides access to certain features from ModelBuilder that are not exposed
|
|
// by the base class (such as inputCount() and operation index).
|
|
class TestModel : public WrapperModel {
|
|
public:
|
|
uint32_t addOperation(ANeuralNetworksOperationType type, const std::vector<uint32_t>& inputs,
|
|
const std::vector<uint32_t>& outputs) {
|
|
const uint32_t operationIndex = operationCount();
|
|
mOperations.push_back(outputs);
|
|
WrapperModel::addOperation(type, inputs, outputs);
|
|
return operationIndex;
|
|
}
|
|
|
|
uint32_t operationCount() const { return mOperations.size(); }
|
|
|
|
uint32_t inputCount() const { return builder()->inputCount(); }
|
|
uint32_t outputCount() const { return builder()->outputCount(); }
|
|
|
|
const std::vector<uint32_t>& getOperationOutputs(uint32_t index) const {
|
|
CHECK(index < mOperations.size());
|
|
return mOperations[index];
|
|
}
|
|
|
|
// All values are immediately copied into the model (we need to do
|
|
// this ourselves in cases where the underlying NNAPI does not).
|
|
void setOperandValue(uint32_t index, const std::vector<float>& value) {
|
|
const size_t length = value.size() * sizeof(float);
|
|
|
|
if (length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES) {
|
|
WrapperModel::setOperandValue(index, value.data(), length);
|
|
} else {
|
|
mOperandValues.push_back(value);
|
|
WrapperModel::setOperandValue(index, mOperandValues.back().data(), length);
|
|
}
|
|
}
|
|
|
|
void setOperandValue(uint32_t index, const std::vector<int32_t>& value) {
|
|
const size_t length = value.size() * sizeof(int32_t);
|
|
|
|
CHECK(length <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES);
|
|
WrapperModel::setOperandValue(index, value.data(), length);
|
|
}
|
|
|
|
void setOperandValue(uint32_t index, int32_t value) {
|
|
CHECK(sizeof(value) <= ANEURALNETWORKS_MAX_SIZE_OF_IMMEDIATELY_COPIED_VALUES);
|
|
WrapperModel::setOperandValue(index, &value, sizeof(value));
|
|
}
|
|
|
|
private:
|
|
const ModelBuilder* builder() const {
|
|
return reinterpret_cast<const ModelBuilder*>(getHandle());
|
|
}
|
|
|
|
// Representation of operations: vector index is operation number,
|
|
// vector value is operation's output operands.
|
|
std::vector<std::vector<uint32_t>> mOperations;
|
|
|
|
// Large operand values -- not immediately copied into the
|
|
// WrapperModel, so remembered here instead.
|
|
std::vector<std::vector<float>> mOperandValues;
|
|
};
|
|
|
|
// This class adds some simple utilities on top of WrapperCompilation in order
|
|
// to provide access to certain features from CompilationBuilder that are not
|
|
// exposed by the base class.
|
|
class TestCompilation : public WrapperCompilation {
|
|
public:
|
|
TestCompilation(const WrapperModel* model) : WrapperCompilation(model) {}
|
|
|
|
TestCompilation(const WrapperModel* model, std::vector<std::shared_ptr<Device>> devices) {
|
|
ModelBuilder* m = reinterpret_cast<ModelBuilder*>(model->getHandle());
|
|
CompilationBuilder* c = nullptr;
|
|
int result = m->createCompilation(&c, devices);
|
|
EXPECT_EQ(result, 0);
|
|
mCompilation = reinterpret_cast<ANeuralNetworksCompilation*>(c);
|
|
}
|
|
|
|
using WrapperCompilation::finish;
|
|
|
|
Result setPartitioning(uint32_t partitioning) {
|
|
return static_cast<Result>(builder()->forTest_setPartitioning(partitioning));
|
|
}
|
|
|
|
const ExecutionPlan& getExecutionPlan() const { return builder()->forTest_getExecutionPlan(); }
|
|
|
|
private:
|
|
const CompilationBuilder* builder() const {
|
|
return reinterpret_cast<const CompilationBuilder*>(getHandle());
|
|
}
|
|
CompilationBuilder* builder() { return reinterpret_cast<CompilationBuilder*>(getHandle()); }
|
|
};
|
|
|
|
// This class is used to manage a collection of memory regions,
|
|
// disjoint windows onto a set of Memory instances, each of which is
|
|
// associated with a single shared memory region. Each region and
|
|
// Memory instance is assigned a number. The usage pattern is as
|
|
// follows:
|
|
// - Call addMemory() and addRegion() as many times as needed to
|
|
// declare (but not define) Memory instances and declare region
|
|
// instances.
|
|
// - Call layout() to define the Memory instances.
|
|
// - Call getRegion() as many times as needed to get the details
|
|
// of memory regions (such as address, or Memory/offset/length).
|
|
// The Memory instances created by layout() are owned by the
|
|
// TestMemories instance, and are destroyed when the TestMemories
|
|
// instance is destroyed.
|
|
class TestMemories {
|
|
public:
|
|
TestMemories() = default;
|
|
|
|
TestMemories(const TestMemories&) = delete;
|
|
TestMemories& operator=(const TestMemories&) = delete;
|
|
|
|
unsigned addMemory() {
|
|
CHECK(!mLayoutDone);
|
|
mMemorySizes.push_back(0);
|
|
return memoryCount() - 1;
|
|
}
|
|
unsigned memoryCount() const { return mMemorySizes.size(); }
|
|
|
|
unsigned addRegion(unsigned memoryIndex, uint32_t length) {
|
|
CHECK(!mLayoutDone);
|
|
CHECK(memoryIndex < memoryCount());
|
|
uint32_t& memorySize = mMemorySizes[memoryIndex];
|
|
auto desc = std::make_tuple(memoryIndex, (uint32_t)memorySize, length);
|
|
mRegions.push_back(desc);
|
|
memorySize += length;
|
|
return regionCount() - 1;
|
|
}
|
|
unsigned regionCount() const { return mRegions.size(); }
|
|
|
|
void layout();
|
|
|
|
void* getRegion(unsigned regionIndex, const WrapperMemory** pMemory, uint32_t* pOffset,
|
|
uint32_t* pLength) {
|
|
CHECK(mLayoutDone);
|
|
CHECK(regionIndex < regionCount());
|
|
const auto& regionDescriptor = mRegions[regionIndex];
|
|
const WrapperMemory* memory = &mMemories[std::get<0>(regionDescriptor)];
|
|
uint32_t offset = std::get<1>(regionDescriptor);
|
|
uint32_t length = std::get<2>(regionDescriptor);
|
|
|
|
uint8_t* buffer = reinterpret_cast<nn::MemoryAshmem*>(memory->get())->getPointer();
|
|
CHECK(buffer != nullptr);
|
|
|
|
if (pMemory) *pMemory = memory;
|
|
if (pOffset) *pOffset = offset;
|
|
if (pLength) *pLength = length;
|
|
|
|
return buffer + offset;
|
|
}
|
|
|
|
void* getRegion(unsigned regionIndex) {
|
|
return getRegion(regionIndex, nullptr, nullptr, nullptr);
|
|
}
|
|
|
|
private:
|
|
// Index is the memory index; value is the size of the memory
|
|
// (aggregate size of all regions in the memory).
|
|
std::vector<uint32_t> mMemorySizes;
|
|
|
|
// Index is the memory index.
|
|
std::vector<WrapperMemory> mMemories;
|
|
|
|
// Index is the region index; tuple represents memory index,
|
|
// region offset within memory, region length.
|
|
std::vector<std::tuple<unsigned, uint32_t, uint32_t>> mRegions;
|
|
|
|
// For validity checking.
|
|
bool mLayoutDone = false;
|
|
};
|
|
|
|
void TestMemories::layout() {
|
|
CHECK(!mLayoutDone);
|
|
for (uint32_t memorySize : mMemorySizes) {
|
|
auto [n, ashmem] = nn::MemoryAshmem::create(memorySize);
|
|
CHECK_EQ(n, ANEURALNETWORKS_NO_ERROR);
|
|
CHECK(ashmem != nullptr);
|
|
|
|
ANeuralNetworksMemory* memory = reinterpret_cast<ANeuralNetworksMemory*>(ashmem.release());
|
|
mMemories.emplace_back(memory);
|
|
}
|
|
mLayoutDone = true;
|
|
}
|
|
|
|
class RandomPartitioningTest : public ::testing::TestWithParam<unsigned> {
|
|
public:
|
|
RandomPartitioningTest() : mRandNumEng(GetParam() /* seed */), mRandNumUnitDist(0.0, 1.0) {}
|
|
|
|
static Signature getSignature(const HidlModel& model, const V1_3::Operation& operation);
|
|
|
|
protected:
|
|
static SharedDevice makeTestDriver(HalVersion version, const char* name,
|
|
std::set<Signature> signatures);
|
|
|
|
static HalVersion getMinHalVersion(ANeuralNetworksOperationType type);
|
|
|
|
static std::string to_string(HalVersion version);
|
|
|
|
bool randBool() { return randUInt(2) == 1; }
|
|
|
|
double randFrac() { // [0.0, 1.0)
|
|
return mRandNumUnitDist(mRandNumEng);
|
|
}
|
|
|
|
unsigned randUInt(unsigned limit) { // [0, limit)
|
|
return unsigned(randFrac() * limit);
|
|
}
|
|
|
|
// Represents an operation in which every input and output operand
|
|
// is a TENSOR_FLOAT32 of dimensions [problemSize, problemSize] except:
|
|
// - One input operand may be an activation function.
|
|
// - Any number of input operands may be "special" in some other way
|
|
// (and in this implementation, not produced by any other operation).
|
|
// We require that:
|
|
// - There be at least one input operand that is neither an
|
|
// activation function nor "special".
|
|
struct OperationPattern {
|
|
HalVersion mMinHalVersion;
|
|
int mOperationType;
|
|
unsigned mNumInputs;
|
|
unsigned mNumOutputs;
|
|
int mActivationFunctionInputIndex; // <0 if none
|
|
|
|
// Returns operand index, or <0 if input is normal (must not
|
|
// be called for an activation function operand). Function
|
|
// should have the following prototype:
|
|
//
|
|
// int makeSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex);
|
|
//
|
|
int (RandomPartitioningTest::*mMakeSpecialInput)(unsigned, TestModel*, unsigned);
|
|
};
|
|
|
|
static const OperationPattern kOperationPatterns[];
|
|
|
|
// See OperationPattern::mMakeSpecialInput. This function is used to
|
|
// manufacture an ELU input operand that doesn't fit the general operand
|
|
// pattern known to the graph generator infrastructure.
|
|
int makeEluSpecialInput([[maybe_unused]] unsigned problemSize, TestModel* model,
|
|
unsigned inputIndex) {
|
|
if (inputIndex != 1) {
|
|
return -1;
|
|
}
|
|
|
|
// input operand 1 is alpha, a scalar
|
|
const WrapperOperandType alphaType(WrapperType::FLOAT32, {});
|
|
return int(model->addConstantOperand(&alphaType, 1.0f));
|
|
}
|
|
|
|
// See OperationPattern::mMakeSpecialInput. This function is used to
|
|
// manufacture an RNN input operand that doesn't fit the general operand
|
|
// pattern known to the graph generator infrastructure.
|
|
int makeRnnSpecialInput(unsigned problemSize, TestModel* model, unsigned inputIndex) {
|
|
if (inputIndex != 3) {
|
|
return -1;
|
|
}
|
|
|
|
// input operand 3 is bias, a 1-D tensor
|
|
const WrapperOperandType biasType(WrapperType::TENSOR_FLOAT32, {problemSize});
|
|
const uint32_t operandIndex = model->addOperand(&biasType);
|
|
std::vector<float> biasValue(problemSize);
|
|
std::generate(biasValue.begin(), biasValue.end(), [this] { return randFrac(); });
|
|
model->setOperandValue(operandIndex, biasValue);
|
|
return int(operandIndex);
|
|
}
|
|
|
|
// See OperationPattern::mMakeSpecialInput. This function is used to
|
|
// manufacture a TRANSPOSE input operand that doesn't fit the general operand
|
|
// pattern known to the graph generator infrastructure.
|
|
int makeTransposeSpecialInput(unsigned /* problemSize */, TestModel* model,
|
|
unsigned inputIndex) {
|
|
if (inputIndex != 1) {
|
|
return -1;
|
|
}
|
|
|
|
// input operand 1 is perm, a 1-D tensor
|
|
const WrapperOperandType permType(WrapperType::TENSOR_INT32, {2});
|
|
const uint32_t operandIndex = model->addOperand(&permType);
|
|
std::vector<int32_t> permValue = {1, 0};
|
|
model->setOperandValue(operandIndex, permValue);
|
|
return int(operandIndex);
|
|
}
|
|
|
|
#ifdef VERBOSE
|
|
class ModelStats {
|
|
public:
|
|
ModelStats(const ModelBuilder* model) : mBuilder(model) {}
|
|
ModelStats(const WrapperModel* model)
|
|
: mBuilder(reinterpret_cast<const ModelBuilder*>(model->getHandle())) {}
|
|
friend std::ostream& operator<<(std::ostream& out, const ModelStats& stats) {
|
|
const uint32_t operandCount = stats.mBuilder->operandCount();
|
|
const uint32_t inputCount = stats.mBuilder->inputCount();
|
|
const uint32_t outputCount = stats.mBuilder->outputCount();
|
|
out << "operationCount = " << stats.mBuilder->operationCount()
|
|
<< ", operandCount = " << operandCount << ", inputCount = " << inputCount << " ("
|
|
<< (double(inputCount) / operandCount) << ")"
|
|
<< ", outputCount = " << outputCount << " (" << (double(outputCount) / operandCount)
|
|
<< ")";
|
|
return out;
|
|
}
|
|
|
|
private:
|
|
const ModelBuilder* mBuilder;
|
|
};
|
|
|
|
template <typename T_iterator>
|
|
static void dump(T_iterator I, T_iterator E) {
|
|
std::cout << "{";
|
|
for (; I != E; I++) {
|
|
std::cout << " " << *I;
|
|
}
|
|
std::cout << " }" << std::endl;
|
|
}
|
|
#endif
|
|
|
|
std::mt19937 mRandNumEng;
|
|
|
|
private:
|
|
std::uniform_real_distribution<double> mRandNumUnitDist;
|
|
};
|
|
|
|
const RandomPartitioningTest::OperationPattern RandomPartitioningTest::kOperationPatterns[] = {
|
|
{HalVersion::V1_0, ANEURALNETWORKS_ADD, 3, 1, 2, nullptr},
|
|
{HalVersion::V1_0, ANEURALNETWORKS_LOGISTIC, 1, 1, -1, nullptr},
|
|
{HalVersion::V1_0, ANEURALNETWORKS_MUL, 3, 1, 2, nullptr},
|
|
{HalVersion::V1_0, ANEURALNETWORKS_RNN, 6, 2, 5,
|
|
&RandomPartitioningTest::makeRnnSpecialInput},
|
|
{HalVersion::V1_0, ANEURALNETWORKS_TANH, 1, 1, -1, nullptr},
|
|
|
|
{HalVersion::V1_1, ANEURALNETWORKS_SUB, 3, 1, 2, nullptr},
|
|
{HalVersion::V1_1, ANEURALNETWORKS_TRANSPOSE, 2, 1, -1,
|
|
&RandomPartitioningTest::makeTransposeSpecialInput},
|
|
|
|
{HalVersion::V1_2, ANEURALNETWORKS_MAXIMUM, 2, 1, -1, nullptr},
|
|
{HalVersion::V1_2, ANEURALNETWORKS_NEG, 1, 1, -1, nullptr},
|
|
{HalVersion::V1_2, ANEURALNETWORKS_SIN, 1, 1, -1, nullptr},
|
|
|
|
{HalVersion::V1_3, ANEURALNETWORKS_ELU, 2, 1, -1,
|
|
&RandomPartitioningTest::makeEluSpecialInput},
|
|
{HalVersion::V1_3, ANEURALNETWORKS_HARD_SWISH, 1, 1, -1, nullptr},
|
|
};
|
|
|
|
HalVersion RandomPartitioningTest::getMinHalVersion(ANeuralNetworksOperationType type) {
|
|
static const auto kOperationToVersion = [] {
|
|
std::map<ANeuralNetworksOperationType, HalVersion> result;
|
|
for (const auto& pattern : kOperationPatterns) {
|
|
result[pattern.mOperationType] = pattern.mMinHalVersion;
|
|
}
|
|
return result;
|
|
}();
|
|
|
|
return kOperationToVersion.at(type);
|
|
}
|
|
|
|
Signature RandomPartitioningTest::getSignature(const HidlModel& model,
|
|
const V1_3::Operation& operation) {
|
|
static const auto kOperationToActivation = [] {
|
|
std::map<ANeuralNetworksOperationType, int> result;
|
|
for (const auto& pattern : kOperationPatterns) {
|
|
result[pattern.mOperationType] = pattern.mActivationFunctionInputIndex;
|
|
}
|
|
return result;
|
|
}();
|
|
|
|
const ANeuralNetworksOperationType operationType =
|
|
static_cast<ANeuralNetworksOperationType>(operation.type);
|
|
const int activationFunctionInputIndex = kOperationToActivation.at(operationType);
|
|
if (activationFunctionInputIndex < 0) {
|
|
return Signature(operationType, -1);
|
|
}
|
|
|
|
const V1_3::Operand& operand =
|
|
model.main.operands[operation.inputs[activationFunctionInputIndex]];
|
|
CHECK(operand.lifetime == V1_3::OperandLifeTime::CONSTANT_COPY);
|
|
CHECK(operand.type == V1_3::OperandType::INT32);
|
|
int32_t value;
|
|
memcpy(&value, &model.operandValues[operand.location.offset], operand.location.length);
|
|
return Signature(operationType, value);
|
|
}
|
|
|
|
std::string RandomPartitioningTest::to_string(HalVersion version) {
|
|
switch (version) {
|
|
case HalVersion::V1_0:
|
|
return "V1_0";
|
|
case HalVersion::V1_1:
|
|
return "V1_1";
|
|
case HalVersion::V1_2:
|
|
return "V1_2";
|
|
case HalVersion::V1_3:
|
|
return "V1_3";
|
|
default:
|
|
return "V_UNKNOWN";
|
|
}
|
|
};
|
|
|
|
class TestDriver : public SampleDriver {
|
|
public:
|
|
// Behaves like SampleDriver, except that it only supports
|
|
// operations with the specified signatures.
|
|
TestDriver(const char* name, std::set<Signature> signatures)
|
|
: SampleDriver(name), mSignatures(std::move(signatures)) {}
|
|
|
|
hardware::Return<void> getCapabilities_1_3(getCapabilities_1_3_cb _hidl_cb) override {
|
|
android::nn::initVLogMask();
|
|
const V1_0::PerformanceInfo kPerf = {.execTime = 0.75f, .powerUsage = 0.75f};
|
|
V1_3::Capabilities capabilities = {
|
|
.relaxedFloat32toFloat16PerformanceScalar = kPerf,
|
|
.relaxedFloat32toFloat16PerformanceTensor = kPerf,
|
|
.operandPerformance = nn::nonExtensionOperandPerformance<HalVersion::V1_3>(kPerf),
|
|
.ifPerformance = kPerf,
|
|
.whilePerformance = kPerf};
|
|
_hidl_cb(V1_3::ErrorStatus::NONE, capabilities);
|
|
return hardware::Void();
|
|
}
|
|
|
|
hardware::Return<void> getSupportedOperations_1_3(const HidlModel& model,
|
|
getSupportedOperations_1_3_cb cb) override {
|
|
if (nn::validateModel(model)) {
|
|
const size_t count = model.main.operations.size();
|
|
std::vector<bool> supported(count);
|
|
for (size_t i = 0; i < count; i++) {
|
|
supported[i] = (mSignatures.count(RandomPartitioningTest::getSignature(
|
|
model, model.main.operations[i])) != 0);
|
|
}
|
|
cb(V1_3::ErrorStatus::NONE, supported);
|
|
} else {
|
|
cb(V1_3::ErrorStatus::INVALID_ARGUMENT, {});
|
|
}
|
|
return hardware::Void();
|
|
}
|
|
|
|
hardware::Return<V1_3::ErrorStatus> prepareModel_1_3(
|
|
const HidlModel& model, V1_1::ExecutionPreference preference, V1_3::Priority priority,
|
|
const V1_3::OptionalTimePoint& deadline,
|
|
const hardware::hidl_vec<hardware::hidl_handle>& modelCache,
|
|
const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token,
|
|
const sp<V1_3::IPreparedModelCallback>& callback) override {
|
|
// NOTE: We verify that all operations in the model are supported.
|
|
V1_3::ErrorStatus outStatus = V1_3::ErrorStatus::INVALID_ARGUMENT;
|
|
auto ret = getSupportedOperations_1_3(
|
|
model, [&outStatus](V1_3::ErrorStatus inStatus,
|
|
const hardware::hidl_vec<bool>& supportedOperations) {
|
|
if (inStatus == V1_3::ErrorStatus::NONE) {
|
|
if (std::all_of(supportedOperations.begin(), supportedOperations.end(),
|
|
[](bool v) { return v; })) {
|
|
outStatus = V1_3::ErrorStatus::NONE;
|
|
}
|
|
}
|
|
});
|
|
if (ret.isOk() && (outStatus == V1_3::ErrorStatus::NONE)) {
|
|
return SampleDriver::prepareModel_1_3(model, preference, priority, deadline, modelCache,
|
|
dataCache, token, callback);
|
|
} else {
|
|
callback->notify_1_3(V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr);
|
|
return V1_3::ErrorStatus::INVALID_ARGUMENT;
|
|
}
|
|
}
|
|
|
|
private:
|
|
const std::set<Signature> mSignatures;
|
|
};
|
|
|
|
class TestDriverV1_2 : public V1_2::ADevice {
|
|
public:
|
|
TestDriverV1_2(const char* name, std::set<Signature> signatures)
|
|
: V1_2::ADevice(new TestDriver(name, std::move(signatures))) {}
|
|
};
|
|
|
|
class TestDriverV1_1 : public V1_1::ADevice {
|
|
public:
|
|
TestDriverV1_1(const char* name, std::set<Signature> signatures)
|
|
: V1_1::ADevice(new TestDriver(name, std::move(signatures))) {}
|
|
};
|
|
|
|
class TestDriverV1_0 : public V1_0::ADevice {
|
|
public:
|
|
TestDriverV1_0(const char* name, std::set<Signature> signatures)
|
|
: V1_0::ADevice(new TestDriver(name, std::move(signatures))) {}
|
|
};
|
|
|
|
SharedDevice RandomPartitioningTest::makeTestDriver(HalVersion version, const char* name,
|
|
std::set<Signature> signatures) {
|
|
switch (version) {
|
|
case HalVersion::V1_0:
|
|
return nn::makeSharedDevice(name, new TestDriverV1_0(name, std::move(signatures)));
|
|
case HalVersion::V1_1:
|
|
return nn::makeSharedDevice(name, new TestDriverV1_1(name, std::move(signatures)));
|
|
case HalVersion::V1_2:
|
|
return nn::makeSharedDevice(name, new TestDriverV1_2(name, std::move(signatures)));
|
|
case HalVersion::V1_3:
|
|
return nn::makeSharedDevice(name, new TestDriver(name, std::move(signatures)));
|
|
default:
|
|
ADD_FAILURE() << "Unexpected HalVersion " << static_cast<int32_t>(version);
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
INSTANTIATE_TEST_SUITE_P(Seed, RandomPartitioningTest,
|
|
::testing::Range(kFirstSeed, kFirstSeed + kNumTestCases));
|
|
|
|
TEST_P(RandomPartitioningTest, Test) {
|
|
LOG(INFO) << "RandomPartitioningTest: GetParam() = " << GetParam();
|
|
|
|
#ifdef VERBOSE
|
|
std::cout << std::setprecision(2) << std::fixed << std::setw(4);
|
|
#endif
|
|
|
|
const unsigned problemSize = 1 + randUInt(kMaxProblemSize);
|
|
const WrapperOperandType problemType(WrapperType::TENSOR_FLOAT32, {problemSize, problemSize});
|
|
const WrapperOperandType unknownDimensionsTypes[] = {
|
|
{WrapperType::TENSOR_FLOAT32, {}},
|
|
{WrapperType::TENSOR_FLOAT32, {0, 0}},
|
|
{WrapperType::TENSOR_FLOAT32, {0, problemSize}},
|
|
{WrapperType::TENSOR_FLOAT32, {problemSize, 0}},
|
|
};
|
|
const unsigned kUnknownDimensionsTypesCount =
|
|
sizeof(unknownDimensionsTypes) / sizeof(unknownDimensionsTypes[0]);
|
|
|
|
static const WrapperOperandType activationFunctionType(WrapperType::INT32, {});
|
|
|
|
const unsigned numOperations = 2 + randUInt(kMaxNumOperations - 1);
|
|
const bool allowDeadOperations = (randFrac() < 0.2);
|
|
const bool allowUnknownDimensions = (randFrac() < 0.25);
|
|
|
|
// TODO: The current algorithm builds the graph in a forward
|
|
// direction (i.e., later-generated operations consume outputs
|
|
// from earlier-generated operations). In order to get more
|
|
// variation in graph topology, perhaps we should also create an
|
|
// algorithm to build the graph in a backward direction (i.e.,
|
|
// later-generated operations produce outputs to be consumed by
|
|
// earlier-generated operations).
|
|
[[maybe_unused]] const bool buildForward = randBool();
|
|
|
|
// TODO: Add a form of forced connectivity that operates by
|
|
// joining disjoint subgraphs rather than by forcing a root.
|
|
const bool forceCommonRoot = (randFrac() < 0.75);
|
|
|
|
auto computeMode = WrapperExecution::getComputeMode();
|
|
// We check randFrac() independent of compute mode, because we don't want
|
|
// the random number sequence to change depending on compute mode: Compute
|
|
// mode should only affect how we perform the inference, not how we build the
|
|
// Model, the Compilation, or the Execution.
|
|
if (randFrac() < 0.5 && computeMode == WrapperExecution::ComputeMode::ASYNC) {
|
|
computeMode = WrapperExecution::ComputeMode::FENCED;
|
|
}
|
|
|
|
TestModel model;
|
|
std::vector<uint32_t> modelInputs;
|
|
std::vector<uint32_t> modelOutputs;
|
|
|
|
std::set<uint32_t> operandsWithUnknownDimensions;
|
|
|
|
// Each region in weights is a problem-sized 2-D TENSOR_FLOAT32.
|
|
TestMemories weights;
|
|
|
|
// Keep track of all normal (i.e., not activation function and not
|
|
// "special") operands that are values (from setOperandValue*()).
|
|
// .first: operand index
|
|
// .second: if the operand is already defined (via setOperandValue*()) then ~0U;
|
|
// otherwise, the operand has yet to be defined, and this is the corresponding
|
|
// region index in "weights"
|
|
std::vector<std::pair<uint32_t, unsigned>> valueOperands;
|
|
|
|
// An operand is "dead" if it is not consumed by another operation
|
|
// and is not a model output. Key is operand index; value is
|
|
// operation index.
|
|
std::map<uint32_t, uint32_t> deadOperands;
|
|
|
|
// An operation is "dead" if all of its outputs are dead.
|
|
std::set<uint32_t> deadOperations;
|
|
|
|
// Collect the signatures of operations in this model.
|
|
std::set<Signature> signatures;
|
|
|
|
// For reporting purposes, keep track of the number of root
|
|
// operations (those that do not consume results produced by other
|
|
// operations).
|
|
unsigned rootOperationCount = 0;
|
|
|
|
// Track whether we added operands with unknown dimensions. In this case,
|
|
// partitioned compilation will fail if such an operand is read in a
|
|
// different partition than it is written, and the partition that does the
|
|
// writing is scheduled on a pre-HAL 1.2 (pre-Android Q) device.
|
|
bool hasUnknownDimensions = false;
|
|
|
|
// Generate operations.
|
|
for (unsigned i = 0; i < numOperations; i++) {
|
|
const unsigned operationPatternIndex = randUInt(std::size(kOperationPatterns));
|
|
const auto& operationPattern = kOperationPatterns[operationPatternIndex];
|
|
|
|
// INPUTS //////////////////////////////////////////////////////////////////////////////////
|
|
|
|
std::vector<uint32_t> operationInputs(operationPattern.mNumInputs, ~0U);
|
|
|
|
// First, process activation function and special inputs, and
|
|
// keep track of which inputs remain.
|
|
std::vector<uint32_t> normalOperationInputIndexes;
|
|
int32_t activationFunction = -1;
|
|
for (unsigned operationInputIndex = 0; operationInputIndex < operationPattern.mNumInputs;
|
|
operationInputIndex++) {
|
|
if (int(operationInputIndex) == operationPattern.mActivationFunctionInputIndex) {
|
|
const uint32_t operandIndex = model.addOperand(&activationFunctionType);
|
|
activationFunction = randUInt(4);
|
|
if (activationFunction == ANEURALNETWORKS_FUSED_RELU1) {
|
|
// workaround for http://b/69011131
|
|
activationFunction = ANEURALNETWORKS_FUSED_NONE;
|
|
}
|
|
model.setOperandValue(operandIndex, activationFunction);
|
|
operationInputs[operationInputIndex] = operandIndex;
|
|
continue;
|
|
}
|
|
if (operationPattern.mMakeSpecialInput != nullptr) {
|
|
const int operandIndex = (this->*(operationPattern.mMakeSpecialInput))(
|
|
problemSize, &model, operationInputIndex);
|
|
if (operandIndex >= 0) {
|
|
operationInputs[operationInputIndex] = operandIndex;
|
|
continue;
|
|
}
|
|
}
|
|
normalOperationInputIndexes.push_back(operationInputIndex);
|
|
}
|
|
CHECK(!normalOperationInputIndexes.empty());
|
|
signatures.insert(Signature(operationPattern.mOperationType, activationFunction));
|
|
|
|
// A (normal) operation input can be one of:
|
|
// - a new or existing model input
|
|
// - an output of an existing operation
|
|
// - an OperandValue
|
|
// - an OperandValueFromMemory
|
|
// Some guidelines:
|
|
// - We generally don't want all of an operation's inputs to be values (constants)
|
|
const unsigned normalOperationInputCount = normalOperationInputIndexes.size();
|
|
// How many of this operation's inputs are constants?
|
|
unsigned normalOperationInputConstantCount = 0;
|
|
// How many of this operation's inputs are model inputs?
|
|
unsigned normalOperationInputModelInputCount = 0;
|
|
// We begin by deciding what kind of input each (normal) operation will be; we don't
|
|
// actually pick input operand indexes at this time, because we might override this
|
|
// decision later.
|
|
enum InputKind { IK_SUBGRAPH_INPUT, IK_OPERATION_OUTPUT, IK_VALUE };
|
|
std::vector<InputKind> normalOperationInputKinds(normalOperationInputCount);
|
|
std::generate(
|
|
normalOperationInputKinds.begin(), normalOperationInputKinds.end(),
|
|
[this, &model, numOperations, normalOperationInputCount,
|
|
&normalOperationInputConstantCount,
|
|
&normalOperationInputModelInputCount]() -> InputKind {
|
|
// Constant? Becomes less likely the more
|
|
// constants we already have as inputs to
|
|
// this operation.
|
|
if (randFrac() < 0.3 * (1 - double(normalOperationInputConstantCount) /
|
|
normalOperationInputCount)) {
|
|
normalOperationInputConstantCount++;
|
|
return IK_VALUE;
|
|
}
|
|
|
|
// Model input? Becomes less likely the
|
|
// more model inputs we already have as
|
|
// inputs to this operation, and the further
|
|
// along we are in generating this model
|
|
// (i.e., the more operations we have
|
|
// generated).
|
|
if ((model.operationCount() == 0) ||
|
|
(randFrac() < 0.5 *
|
|
(1 - double(normalOperationInputModelInputCount) /
|
|
normalOperationInputCount) *
|
|
std::min(0.3, (1 - double(model.operationCount()) /
|
|
numOperations)))) {
|
|
normalOperationInputModelInputCount++;
|
|
return IK_SUBGRAPH_INPUT;
|
|
}
|
|
|
|
// Else output of an existing operation.
|
|
return IK_OPERATION_OUTPUT;
|
|
});
|
|
|
|
// Now force common root or model input, if necessary. (A
|
|
// model must have at least one input.)
|
|
auto force = [this, &normalOperationInputKinds,
|
|
normalOperationInputCount](InputKind forceKind) {
|
|
if (std::none_of(normalOperationInputKinds.begin(), normalOperationInputKinds.end(),
|
|
[forceKind](InputKind kind) { return kind == forceKind; })) {
|
|
normalOperationInputKinds[randUInt(normalOperationInputCount)] = forceKind;
|
|
}
|
|
};
|
|
if (forceCommonRoot && (model.operationCount() != 0)) {
|
|
force(IK_OPERATION_OUTPUT);
|
|
}
|
|
if (modelInputs.empty()) {
|
|
CHECK(model.operationCount() == 0);
|
|
force(IK_SUBGRAPH_INPUT);
|
|
}
|
|
|
|
// Finally create the normal inputs.
|
|
bool isRootOperation = true;
|
|
for (unsigned i = 0; i < normalOperationInputCount; i++) {
|
|
uint32_t operandIndex = ~0U;
|
|
switch (normalOperationInputKinds[i]) {
|
|
case IK_SUBGRAPH_INPUT: {
|
|
if (!modelInputs.empty() && (randFrac() < 0.5)) {
|
|
operandIndex = modelInputs[randUInt(modelInputs.size())];
|
|
} else {
|
|
operandIndex = model.addOperand(&problemType);
|
|
modelInputs.push_back(operandIndex);
|
|
}
|
|
break;
|
|
}
|
|
case IK_OPERATION_OUTPUT: {
|
|
decltype(deadOperands.begin()) deadOperandI;
|
|
if (!deadOperands.empty() && (randFrac() < 0.5)) {
|
|
deadOperandI = deadOperands.begin();
|
|
std::advance(deadOperandI, randUInt(deadOperands.size()));
|
|
operandIndex = deadOperandI->first;
|
|
} else {
|
|
const uint32_t existingOperationIndex = randUInt(model.operationCount());
|
|
const auto& existingOperationOutputs =
|
|
model.getOperationOutputs(existingOperationIndex);
|
|
operandIndex =
|
|
existingOperationOutputs[randUInt(existingOperationOutputs.size())];
|
|
deadOperandI = deadOperands.find(operandIndex);
|
|
CHECK(deadOperandI == deadOperands.end() ||
|
|
deadOperandI->second == existingOperationIndex);
|
|
}
|
|
if (deadOperandI != deadOperands.end()) {
|
|
const uint32_t correspondingOperation = deadOperandI->second;
|
|
deadOperands.erase(deadOperandI);
|
|
|
|
auto deadOperationI = deadOperations.find(correspondingOperation);
|
|
if (deadOperationI != deadOperations.end()) {
|
|
deadOperations.erase(deadOperationI);
|
|
}
|
|
}
|
|
isRootOperation = false;
|
|
break;
|
|
}
|
|
case IK_VALUE: {
|
|
if (!valueOperands.empty() && (randFrac() < 0.25)) {
|
|
operandIndex = valueOperands[randUInt(valueOperands.size())].first;
|
|
} else {
|
|
operandIndex = model.addOperand(&problemType);
|
|
if (randFrac() < 0.5) {
|
|
std::vector<float> value(problemSize * problemSize);
|
|
std::generate(value.begin(), value.end(),
|
|
[this] { return randFrac(); });
|
|
model.setOperandValue(operandIndex, value);
|
|
valueOperands.push_back(std::make_pair(operandIndex, ~0U));
|
|
} else {
|
|
unsigned memoryIndex = ~0U;
|
|
if ((weights.memoryCount() != 0) &&
|
|
(kAllWeightsInOnePool || (randFrac() < 0.5))) {
|
|
memoryIndex = randUInt(weights.memoryCount());
|
|
} else {
|
|
memoryIndex = weights.addMemory();
|
|
}
|
|
const size_t length = problemSize * problemSize * sizeof(float);
|
|
const unsigned regionIndex = weights.addRegion(memoryIndex, length);
|
|
valueOperands.push_back(std::make_pair(operandIndex, regionIndex));
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
FAIL();
|
|
}
|
|
operationInputs[normalOperationInputIndexes[i]] = operandIndex;
|
|
}
|
|
if (isRootOperation) {
|
|
rootOperationCount++;
|
|
}
|
|
|
|
// OUTPUTS /////////////////////////////////////////////////////////////////////////////////
|
|
|
|
std::vector<uint32_t> operationOutputs(operationPattern.mNumOutputs);
|
|
std::generate(
|
|
operationOutputs.begin(), operationOutputs.end(),
|
|
[&operandsWithUnknownDimensions, &model, &problemType, &unknownDimensionsTypes,
|
|
&hasUnknownDimensions, allowUnknownDimensions, this] {
|
|
// Before the fix for http://b/132458982, 3% unknowns causes
|
|
// ~35% of partitionings to fail. After the fix, 3%
|
|
// unknowns causes ~3% of partitionings to fail. (This is
|
|
// determined by removing the fallback code and noting the
|
|
// number of failures.)
|
|
if (allowUnknownDimensions && randFrac() < 0.03) {
|
|
hasUnknownDimensions = true;
|
|
uint32_t opndIdx = model.addOperand(
|
|
&unknownDimensionsTypes[randUInt(kUnknownDimensionsTypesCount)]);
|
|
operandsWithUnknownDimensions.insert(opndIdx);
|
|
return opndIdx;
|
|
} else {
|
|
return model.addOperand(&problemType);
|
|
}
|
|
});
|
|
|
|
// OPERATION ///////////////////////////////////////////////////////////////////////////////
|
|
|
|
const uint32_t operationIndex = model.addOperation(operationPattern.mOperationType,
|
|
operationInputs, operationOutputs);
|
|
deadOperations.insert(operationIndex);
|
|
std::for_each(operationOutputs.begin(), operationOutputs.end(),
|
|
[&deadOperands, operationIndex](uint32_t operandIndex) {
|
|
deadOperands.insert(std::make_pair(operandIndex, operationIndex));
|
|
});
|
|
}
|
|
|
|
// Now finalize the weights.
|
|
weights.layout();
|
|
for (const auto& valueOperand : valueOperands) {
|
|
const uint32_t operandIndex = valueOperand.first;
|
|
const unsigned regionIndex = valueOperand.second;
|
|
|
|
if (regionIndex == ~0U) {
|
|
continue;
|
|
}
|
|
|
|
const WrapperMemory* memory;
|
|
uint32_t offset, length;
|
|
float* region =
|
|
static_cast<float*>(weights.getRegion(regionIndex, &memory, &offset, &length));
|
|
CHECK(length == problemSize * problemSize * sizeof(float));
|
|
std::generate(region, region + problemSize * problemSize, [this] { return randFrac(); });
|
|
model.setOperandValueFromMemory(operandIndex, memory, offset, length);
|
|
}
|
|
|
|
// Now select model outputs.
|
|
for (uint32_t operationIdx = 0, operationCount = model.operationCount();
|
|
operationIdx < operationCount; operationIdx++) {
|
|
const auto& outputs = model.getOperationOutputs(operationIdx);
|
|
for (uint32_t outputIdx = 0, outputCount = outputs.size(); outputIdx < outputCount;
|
|
outputIdx++) {
|
|
bool modelOutput = false;
|
|
const uint32_t operandIndex = outputs[outputIdx];
|
|
const auto deadOperandI = deadOperands.find(operandIndex);
|
|
if (deadOperandI != deadOperands.end()) {
|
|
// This is not consumed within the model, so unless we
|
|
// make it an output of the model, it's dead. The
|
|
// further along we are in generating this model
|
|
// (i.e., the more operations we have generated), the
|
|
// more likely we are to classify this operation
|
|
// output as a model output.
|
|
const double probabilityOfModelOutput =
|
|
0.50 * [](double x) { return x * x; }((operationIdx + 1) / operationCount);
|
|
modelOutput = (randFrac() < probabilityOfModelOutput);
|
|
} else {
|
|
// This is consumed within the model, so we'll rarely
|
|
// make it an output of the model.
|
|
modelOutput = (randFrac() < 0.05);
|
|
}
|
|
if (!modelOutput) {
|
|
continue;
|
|
}
|
|
modelOutputs.push_back(operandIndex);
|
|
if (deadOperandI != deadOperands.end()) {
|
|
deadOperands.erase(deadOperandI);
|
|
const auto deadOperationI = deadOperations.find(operationIdx);
|
|
if (deadOperationI != deadOperations.end()) {
|
|
deadOperations.erase(deadOperationI);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (!allowDeadOperations) {
|
|
// For each dead operation, pick a random output to become a model output.
|
|
for (uint32_t deadOperationIndex : deadOperations) {
|
|
const auto& deadOperationOutputs = model.getOperationOutputs(deadOperationIndex);
|
|
const uint32_t deadOperandIndex =
|
|
deadOperationOutputs[randUInt(deadOperationOutputs.size())];
|
|
modelOutputs.push_back(deadOperandIndex);
|
|
}
|
|
}
|
|
// A model must have at least one output.
|
|
if (modelOutputs.empty()) {
|
|
const auto& outputs = model.getOperationOutputs(randUInt(model.operationCount()));
|
|
modelOutputs.push_back(outputs[randUInt(outputs.size())]);
|
|
}
|
|
if (computeMode == WrapperExecution::ComputeMode::FENCED) {
|
|
if (std::any_of(modelOutputs.begin(), modelOutputs.end(),
|
|
[&operandsWithUnknownDimensions](uint32_t opndIdx) {
|
|
return operandsWithUnknownDimensions.count(opndIdx) != 0;
|
|
})) {
|
|
// Workaround for http://b/162980246: Fenced execution is documented
|
|
// as requiring model outputs to have fully specified dimensions,
|
|
// either from Model or from Execution, but its implementation
|
|
// requires this to come from Model. This test only guarantees that
|
|
// they have fully specified dimensions from Execution. So in the
|
|
// case of a Model where some output does not have fully specified
|
|
// dimensions, perform asynchronous execution instead.
|
|
computeMode = WrapperExecution::ComputeMode::ASYNC;
|
|
}
|
|
}
|
|
|
|
model.identifyInputsAndOutputs(modelInputs, modelOutputs);
|
|
#ifdef VERBOSE
|
|
{
|
|
std::cout << "Original model: " << ModelStats(&model) << std::endl;
|
|
std::cout << "rootOperationCount = " << rootOperationCount << ", deadOperations = ";
|
|
if (allowDeadOperations) {
|
|
std::cout << deadOperations.size();
|
|
} else {
|
|
std::cout << "forbidden (converted " << deadOperations.size() << ")";
|
|
}
|
|
std::cout << std::endl;
|
|
}
|
|
#endif
|
|
ASSERT_EQ(model.finish(), Result::NO_ERROR);
|
|
|
|
// Non-partitioned compilation.
|
|
TestCompilation c(&model);
|
|
ASSERT_EQ(c.setPartitioning(DeviceManager::kPartitioningNo), Result::NO_ERROR);
|
|
ASSERT_EQ(c.finish(), Result::NO_ERROR);
|
|
|
|
// Create some drivers for partitioned compilation.
|
|
CHECK(!signatures.empty());
|
|
std::vector<std::set<Signature>> signaturesForDriver(signatures.size());
|
|
// First assign each signature to a random driver (a driver is
|
|
// just represented as an entry in the signaturesForDriver
|
|
// vector).
|
|
for (Signature signature : signatures) {
|
|
signaturesForDriver[randUInt(signatures.size())].insert(signature);
|
|
}
|
|
// Now remove each entry that has no signatures.
|
|
auto firstExtra =
|
|
std::remove_if(signaturesForDriver.begin(), signaturesForDriver.end(),
|
|
[](const std::set<Signature>& sigSet) { return sigSet.empty(); });
|
|
if (firstExtra != signaturesForDriver.end()) {
|
|
signaturesForDriver.erase(firstExtra, signaturesForDriver.end());
|
|
}
|
|
// Now actually create the drivers.
|
|
std::vector<std::shared_ptr<Device>> devices;
|
|
for (unsigned i = 0; i < signaturesForDriver.size(); i++) {
|
|
const auto& signaturesForThisDriver = signaturesForDriver[i];
|
|
// Minimum HAL version for this driver is highest minimum HAL version of
|
|
// any operation supported by this driver.
|
|
const HalVersion minHalVersion = getMinHalVersion(
|
|
std::max_element(signaturesForThisDriver.begin(), signaturesForThisDriver.end(),
|
|
[](const Signature& a, const Signature& b) {
|
|
return getMinHalVersion(a.first) < getMinHalVersion(b.first);
|
|
})
|
|
->first);
|
|
const HalVersion actualHalVersion =
|
|
static_cast<HalVersion>(static_cast<int32_t>(minHalVersion) +
|
|
randUInt(static_cast<int32_t>(HalVersion::LATEST) -
|
|
static_cast<int32_t>(minHalVersion) + 1));
|
|
const std::string name =
|
|
"TestDriver(" + std::to_string(i) + "){" + to_string(actualHalVersion) + "}";
|
|
#ifdef VERBOSE
|
|
std::cout << "Creating " + name + " for collection of signatures that requires HAL " +
|
|
to_string(minHalVersion)
|
|
<< std::endl;
|
|
#endif
|
|
auto device = DeviceManager::forTest_makeDriverDevice(
|
|
makeTestDriver(actualHalVersion, name.c_str(), signaturesForThisDriver));
|
|
devices.push_back(device);
|
|
}
|
|
// CPU fallback device
|
|
devices.push_back(DeviceManager::getCpuDevice());
|
|
|
|
// Partitioned compilation.
|
|
//
|
|
// If a test case has both (a) unknown intermediate operand sizes and
|
|
// (b) partitions scheduled on pre-HAL 1.2 (pre-Android Q) devices, fallback
|
|
// is needed if the non-fallback partitioning fails.
|
|
//
|
|
// The issue is that prior to HAL 1.2, an output operand must have a known
|
|
// size provided either in the Model or in the Request; and in the case of
|
|
// partitioning, an intermediate operand of the original model that becomes
|
|
// an output operand of a partition won't have a known size provided in the
|
|
// Request.
|
|
//
|
|
// If a test case has a step model with no inputs or no outputs, fallback is needed.
|
|
// This is because our HAL specification requires a model to have at least one
|
|
// input and one output.
|
|
//
|
|
// If a fallback is needed, we retry the compilation with a fallback and require
|
|
// the fallback to succeed. Otherwise, we require the partitioning to succeed
|
|
// without CPU fallback.
|
|
TestCompilation cNoFallback(&model, devices);
|
|
TestCompilation cWithFallback(&model, devices);
|
|
ASSERT_EQ(cNoFallback.setPartitioning(DeviceManager::kPartitioningWithoutFallback),
|
|
Result::NO_ERROR);
|
|
auto compilationResult = cNoFallback.finish();
|
|
const bool fallbackNeededForDynamicTemporaries =
|
|
compilationResult == Result::OP_FAILED && hasUnknownDimensions &&
|
|
cNoFallback.getExecutionPlan().hasDynamicTemporaries() &&
|
|
std::any_of(devices.begin(), devices.end(), [](const std::shared_ptr<Device>& device) {
|
|
return device->getFeatureLevel() < nn::kHalVersionV1_2ToApi.featureLevel;
|
|
});
|
|
const bool fallbackNeededForStepModelWithNoInputsOrNoOutputs =
|
|
cNoFallback.getExecutionPlan().forTest_hasStepModelWithNoInputsOrNoOutputs();
|
|
const bool fallbackNeeded = fallbackNeededForDynamicTemporaries ||
|
|
fallbackNeededForStepModelWithNoInputsOrNoOutputs;
|
|
if (fallbackNeeded) {
|
|
ASSERT_EQ(compilationResult, Result::OP_FAILED);
|
|
|
|
ASSERT_EQ(cWithFallback.setPartitioning(DeviceManager::kPartitioningWithFallback),
|
|
Result::NO_ERROR);
|
|
compilationResult = cWithFallback.finish();
|
|
ASSERT_EQ(compilationResult, Result::NO_ERROR);
|
|
ASSERT_EQ(cWithFallback.getExecutionPlan().forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
|
|
ASSERT_EQ(cWithFallback.getExecutionPlan().forTest_simpleGetDevice(),
|
|
DeviceManager::getCpuDevice());
|
|
} else {
|
|
ASSERT_EQ(compilationResult, Result::NO_ERROR);
|
|
|
|
const ExecutionPlan& plan = cNoFallback.getExecutionPlan();
|
|
if (signaturesForDriver.size() == 1) {
|
|
ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::SIMPLE);
|
|
ASSERT_TRUE(plan.forTest_simpleGetDevice() != DeviceManager::getCpuDevice());
|
|
} else {
|
|
ASSERT_EQ(plan.forTest_getKind(), ExecutionPlan::Kind::COMPOUND);
|
|
auto stepToDeviceId = [](const std::shared_ptr<LogicalStep>& step) {
|
|
return step->executionStep()->getDevice();
|
|
};
|
|
std::set<decltype(stepToDeviceId(plan.forTest_compoundGetSteps()[0]))> deviceSet;
|
|
for (const auto& step : plan.forTest_compoundGetSteps()) {
|
|
deviceSet.insert(stepToDeviceId(step));
|
|
}
|
|
// TODO(b/178517567): Figure out why we sometimes have 1 more
|
|
// signature than we have devices -- this means that we've scheduled
|
|
// one or more operations onto the CPU fallback device, which is not
|
|
// something we ever expect to do.
|
|
ASSERT_TRUE(deviceSet.size() == signaturesForDriver.size() ||
|
|
deviceSet.size() == signaturesForDriver.size() + 1);
|
|
}
|
|
}
|
|
TestCompilation& c2 = (fallbackNeeded ? cWithFallback : cNoFallback);
|
|
#ifdef TRACE_DYNTEMP
|
|
{
|
|
const ExecutionPlan& plan = c2.getExecutionPlan();
|
|
const size_t dynamicTemporaryCount = plan.forTest_flatGetDynamicTemporaries().size();
|
|
std::cout << "TRACE_DYNTEMP: dynamic temporary count = " << dynamicTemporaryCount
|
|
<< std::endl;
|
|
if (plan.forTest_getKind() == ExecutionPlan::Kind::COMPOUND) {
|
|
size_t stepsWithModelOutputsThatAreDownstreamInputs = 0;
|
|
size_t countOfModelOutputsThatAreDownstreamInputs = 0;
|
|
for (const auto& step : plan.forTest_compoundGetSteps()) {
|
|
if (const size_t count = step->executionStep()
|
|
->getModelOutputsThatAreDownstreamInputs()
|
|
.size()) {
|
|
++stepsWithModelOutputsThatAreDownstreamInputs;
|
|
countOfModelOutputsThatAreDownstreamInputs += count;
|
|
}
|
|
}
|
|
if (countOfModelOutputsThatAreDownstreamInputs != 0) {
|
|
std::cout << "TRACE_DYNTEMP: model outputs that are downstream inputs: "
|
|
<< countOfModelOutputsThatAreDownstreamInputs << " / "
|
|
<< modelOutputs.size() << ", over "
|
|
<< stepsWithModelOutputsThatAreDownstreamInputs << " / "
|
|
<< plan.forTest_compoundGetSteps().size() << " steps" << std::endl;
|
|
EXPECT_LE(countOfModelOutputsThatAreDownstreamInputs, modelOutputs.size());
|
|
}
|
|
} else {
|
|
EXPECT_EQ(dynamicTemporaryCount, size_t(0))
|
|
<< "Only COMPOUND plan should have dynamic temporaries";
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#ifdef VERBOSE
|
|
{
|
|
std::cout << "signatures = " << signatures.size() << ", devices = " << devices.size()
|
|
<< std::endl;
|
|
// TODO: When dumping steps, include non-ExecutionSteps.
|
|
const ExecutionPlan& plan = c2.getExecutionPlan();
|
|
switch (plan.forTest_getKind()) {
|
|
case ExecutionPlan::Kind::SIMPLE:
|
|
std::cout << "plan: simple" << std::endl;
|
|
break;
|
|
case ExecutionPlan::Kind::COMPOUND: {
|
|
const auto& steps = plan.forTest_compoundGetSteps();
|
|
std::set<const Device*> devicesInPlan;
|
|
for (const auto& step : steps) {
|
|
if (const auto* executionStep = step->tryExecutionStep()) {
|
|
devicesInPlan.insert(executionStep->getDevice().get());
|
|
}
|
|
}
|
|
std::cout << "plan: compound, " << steps.size() << " steps over "
|
|
<< devicesInPlan.size() << " devices" << std::endl;
|
|
for (unsigned i = 0; i < steps.size(); i++) {
|
|
if (const auto executionStep = steps[i]->tryExecutionStep()) {
|
|
std::cout << "Step " << i << ": "
|
|
<< ModelStats(executionStep->getStepModel())
|
|
<< ", device = " << executionStep->getDevice()->getName()
|
|
<< std::endl;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
default:
|
|
std::cout << "Unexpected plan kind: "
|
|
<< static_cast<unsigned>(plan.forTest_getKind());
|
|
break;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// For execution:
|
|
// - create golden inputs (one long vector) and golden output value
|
|
// - golden inputs will be copied to actual inputs before each
|
|
// of the two executions
|
|
// - golden output will be used to fill actual outputs before each
|
|
// of the two executions
|
|
// - create actual inputs and outputs
|
|
// - first execution (non-partitioned)
|
|
// - initialize inputs and (to avoid unrelated oddities) outputs
|
|
// - execute
|
|
// - copy outputs to a save area (one long vector)
|
|
// - second execution (partitioned)
|
|
// - (to avoid unrelated oddities) initialize inputs and outputs
|
|
// - execute
|
|
// - compare outputs to save area
|
|
|
|
// If the runtime and drivers are working properly, execution
|
|
// should not change the inputs. Nonetheless, we reinitialize the
|
|
// inputs for each execution, so as to avoid unrelated problems
|
|
// appearing to be problems related to unpartitioned execution
|
|
// versus partitioned execution. Similarly, execution behavior
|
|
// should not be dependent on the outputs; but we'll initialize the
|
|
// outputs anyway.
|
|
std::vector<float> goldenInputs(problemSize * problemSize * model.inputCount());
|
|
std::generate(goldenInputs.begin(), goldenInputs.end(), [this] { return randFrac(); });
|
|
#ifdef VERBOSE
|
|
{
|
|
std::cout << "flat inputs = ";
|
|
dump(goldenInputs.begin(), goldenInputs.end());
|
|
}
|
|
#endif
|
|
const float goldenOutput = randFrac();
|
|
|
|
// Create the memory for the actual inputs and outputs.
|
|
struct InputOutputDescriptor {
|
|
enum Kind { INPUT, OUTPUT };
|
|
Kind mKind;
|
|
|
|
// The input or output either resides in a local buffer
|
|
// (mVector, in which case mMemoryRegion is ignored); or in a
|
|
// shared memory region within a TestMemories instance
|
|
// (mMemoryRegion, in which case mVector is ignored).
|
|
enum Location { VECTOR, REGION };
|
|
Location getLocation() const { return !mVector.empty() ? VECTOR : REGION; }
|
|
|
|
std::vector<float> mVector;
|
|
unsigned mMemoryRegion;
|
|
};
|
|
std::vector<InputOutputDescriptor> ioDescriptors(model.inputCount() + model.outputCount());
|
|
for (unsigned i = 0; i < ioDescriptors.size(); i++) {
|
|
ioDescriptors[i].mKind = (i < model.inputCount() ? InputOutputDescriptor::INPUT
|
|
: InputOutputDescriptor::OUTPUT);
|
|
}
|
|
// We randomly interleave inputs and outputs in creation
|
|
// order, because when we we create memory regions in a
|
|
// TestMemories instance, the order in which regions are
|
|
// created within a single Memory is the order they'll be laid
|
|
// out in that memory; and when we have inputs and outputs
|
|
// within the same Memory, we want the possibility that
|
|
// they'll be interleaved.
|
|
std::shuffle(ioDescriptors.begin(), ioDescriptors.end(), mRandNumEng);
|
|
TestMemories ioMemories;
|
|
for (auto& desc : ioDescriptors) {
|
|
if (randFrac() < 0.5) {
|
|
desc.mVector.resize(problemSize * problemSize);
|
|
} else {
|
|
// TODO: common this with the way we create IK_VALUE inputs?
|
|
unsigned memoryIndex = ~0U;
|
|
if ((ioMemories.memoryCount() != 0) && (randFrac() < 0.5)) {
|
|
memoryIndex = randUInt(ioMemories.memoryCount());
|
|
} else {
|
|
memoryIndex = ioMemories.addMemory();
|
|
}
|
|
const size_t length = problemSize * problemSize * sizeof(float);
|
|
desc.mMemoryRegion = ioMemories.addRegion(memoryIndex, length);
|
|
}
|
|
}
|
|
ioMemories.layout();
|
|
|
|
// Function to set up actual inputs and outputs (initializing them
|
|
// and telling the WrapperExecution about them).
|
|
auto prepareForExecution = [&model, &ioDescriptors, &ioMemories, &goldenInputs, &goldenOutput,
|
|
problemSize, &problemType](WrapperExecution* e) {
|
|
uint32_t inputIndex = 0, outputIndex = 0;
|
|
for (auto& desc : ioDescriptors) {
|
|
if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
|
|
if (desc.mKind == InputOutputDescriptor::INPUT) {
|
|
const size_t inputOffset = inputIndex * problemSize * problemSize;
|
|
std::copy(goldenInputs.begin() + inputOffset,
|
|
goldenInputs.begin() + inputOffset + problemSize * problemSize,
|
|
desc.mVector.begin());
|
|
e->setInput(inputIndex++, desc.mVector.data(),
|
|
desc.mVector.size() * sizeof(float));
|
|
} else {
|
|
std::fill(desc.mVector.begin(),
|
|
desc.mVector.begin() + problemSize * problemSize, goldenOutput);
|
|
e->setOutput(outputIndex++, desc.mVector.data(),
|
|
desc.mVector.size() * sizeof(float), &problemType.operandType);
|
|
}
|
|
} else {
|
|
const WrapperMemory* memory;
|
|
uint32_t offset, length;
|
|
float* region = static_cast<float*>(
|
|
ioMemories.getRegion(desc.mMemoryRegion, &memory, &offset, &length));
|
|
CHECK(length == problemSize * problemSize * sizeof(float));
|
|
if (desc.mKind == InputOutputDescriptor::INPUT) {
|
|
const size_t inputOffset = inputIndex * problemSize * problemSize;
|
|
std::copy(goldenInputs.begin() + inputOffset,
|
|
goldenInputs.begin() + inputOffset + problemSize * problemSize,
|
|
region);
|
|
e->setInputFromMemory(inputIndex++, memory, offset, length);
|
|
} else {
|
|
std::fill(region, region + problemSize * problemSize, goldenOutput);
|
|
e->setOutputFromMemory(outputIndex++, memory, offset, length,
|
|
&problemType.operandType);
|
|
}
|
|
}
|
|
};
|
|
CHECK(inputIndex == model.inputCount());
|
|
CHECK(outputIndex == model.outputCount());
|
|
};
|
|
|
|
// Non-partitioned execution.
|
|
WrapperExecution e(&c);
|
|
ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e));
|
|
ASSERT_EQ(e.compute(computeMode), Result::NO_ERROR);
|
|
|
|
// Copy the outputs of the non-partitioned execution to a save area.
|
|
std::vector<float> nonPartitionedOutputs(problemSize * problemSize * model.outputCount());
|
|
{
|
|
uint32_t outputIndex = 0;
|
|
for (const auto& desc : ioDescriptors) {
|
|
if (desc.mKind != InputOutputDescriptor::OUTPUT) {
|
|
continue;
|
|
}
|
|
const size_t outputOffset = outputIndex * problemSize * problemSize;
|
|
if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
|
|
std::copy(desc.mVector.begin(), desc.mVector.end(),
|
|
nonPartitionedOutputs.begin() + outputOffset);
|
|
} else {
|
|
float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion));
|
|
std::copy(region, region + problemSize * problemSize,
|
|
nonPartitionedOutputs.begin() + outputOffset);
|
|
}
|
|
#ifdef VERBOSE
|
|
{
|
|
std::cout << "nonpartitioned output[" << outputIndex << "] = ";
|
|
dump(nonPartitionedOutputs.begin() + outputOffset,
|
|
nonPartitionedOutputs.begin() + outputOffset + problemSize * problemSize);
|
|
}
|
|
#endif
|
|
outputIndex++;
|
|
}
|
|
}
|
|
|
|
// Partitioned execution.
|
|
WrapperExecution e2(&c2);
|
|
ASSERT_NO_FATAL_FAILURE(prepareForExecution(&e2));
|
|
ASSERT_EQ(e2.compute(computeMode), Result::NO_ERROR);
|
|
|
|
// Compare the outputs of the partitioned execution to the save
|
|
// area containing the outpus of the non-partitioned execution.
|
|
{
|
|
uint32_t outputIndex = 0;
|
|
for (const auto& desc : ioDescriptors) {
|
|
if (desc.mKind != InputOutputDescriptor::OUTPUT) {
|
|
continue;
|
|
}
|
|
SCOPED_TRACE(outputIndex);
|
|
const size_t outputOffset = outputIndex * problemSize * problemSize;
|
|
if (desc.getLocation() == InputOutputDescriptor::VECTOR) {
|
|
#ifdef VERBOSE
|
|
std::cout << " partitioned output[" << outputIndex << "] = ";
|
|
dump(desc.mVector.begin(), desc.mVector.end());
|
|
#endif
|
|
ASSERT_TRUE(std::equal(desc.mVector.begin(), desc.mVector.end(),
|
|
nonPartitionedOutputs.begin() + outputOffset));
|
|
} else {
|
|
float* region = static_cast<float*>(ioMemories.getRegion(desc.mMemoryRegion));
|
|
#ifdef VERBOSE
|
|
std::cout << "part output[" << outputIndex << "] = ";
|
|
dump(region, region + problemSize * problemSize);
|
|
#endif
|
|
ASSERT_TRUE(std::equal(region, region + problemSize * problemSize,
|
|
nonPartitionedOutputs.begin() + outputOffset));
|
|
}
|
|
outputIndex++;
|
|
}
|
|
}
|
|
}
|
|
|
|
} // namespace
|
|
} // namespace android
|