You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

1999 lines
99 KiB

/*
* Copyright (C) 2020 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define LOG_TAG "SampleDriverFloatXNNPACK"
#include <CpuExecutor.h>
#include <HalInterfaces.h>
#include <Utils.h>
#include <ValidateHal.h>
#include <android-base/logging.h>
#include <hidl/LegacySupport.h>
#include <hwbinder/IPCThreadState.h>
#include <xnnpack.h>
#include <algorithm>
#include <cstdint>
#include <limits>
#include <memory>
#include <string>
#include <thread>
#include <tuple>
#include <unordered_set>
#include <utility>
#include <vector>
#include "SampleDriverPartial.h"
#include "SampleDriverUtils.h"
namespace android {
namespace nn {
namespace sample_driver {
namespace {
#define NN_DRIVER_RETURN_IF_ERROR(expr) \
do { \
V1_3::ErrorStatus _errorCode = (expr); \
if (_errorCode != V1_3::ErrorStatus::NONE) { \
return _errorCode; \
} \
} while (0)
const size_t kNumOfWorkerThreads = 1;
static const V1_2::Timing kNoTiming = {.timeOnDevice = UINT64_MAX, .timeInDriver = UINT64_MAX};
bool isScalarType(OperandType type) {
switch (type) {
case OperandType::FLOAT16:
case OperandType::FLOAT32:
case OperandType::INT32:
case OperandType::UINT32:
case OperandType::BOOL:
return true;
default:
return false;
}
}
void updateForArguments(const std::vector<uint32_t>& indexes,
const hardware::hidl_vec<V1_0::RequestArgument>& arguments,
const std::vector<RunTimePoolInfo>& requestPoolInfos,
RunTimeOperandInfo* operands) {
CHECK_EQ(indexes.size(), arguments.size());
for (size_t i = 0; i < indexes.size(); i++) {
const uint32_t operandIndex = indexes[i];
const V1_0::RequestArgument& from = arguments[i];
RunTimeOperandInfo& to = operands[operandIndex];
if (from.dimensions.size() > 0) {
// It's the responsibility of the caller to validate that
// from.dimensions only modifies the dimensions that were
// unspecified in the model. That's the case in SampleDriver.cpp
// with the call to validateRequest().
// TODO make sure that's the case for the default CPU path.
to.dimensions = from.dimensions;
}
if (from.hasNoValue) {
to.lifetime = Operand::LifeTime::NO_VALUE;
CHECK(to.buffer == nullptr);
to.length = 0;
} else {
auto poolIndex = from.location.poolIndex;
CHECK_LT(poolIndex, requestPoolInfos.size());
auto& r = requestPoolInfos[poolIndex];
to.buffer = r.getBuffer() + from.location.offset;
if (from.location.offset == 0 && from.location.length == 0) {
// Use the entire memory region.
to.length = r.getSize();
} else {
to.length = from.location.length;
}
}
}
}
std::vector<RunTimeOperandInfo> initializeRunTimeInfo(
const V1_3::Subgraph& subgraph, const std::vector<RunTimePoolInfo>& modelPoolInfos,
const hardware::hidl_vec<uint8_t>* mModelOperandValues) {
const size_t count = subgraph.operands.size();
std::vector<RunTimeOperandInfo> operands(count);
for (size_t i = 0; i < count; i++) {
const V1_3::Operand& from = subgraph.operands[i];
RunTimeOperandInfo& to = operands[i];
to.type = uncheckedConvert(from.type);
to.dimensions = from.dimensions;
to.scale = from.scale;
to.zeroPoint = from.zeroPoint;
to.length = from.location.length;
to.lifetime = uncheckedConvert(from.lifetime);
to.extraParams = uncheckedConvert(from.extraParams);
switch (from.lifetime) {
case V1_3::OperandLifeTime::TEMPORARY_VARIABLE:
to.buffer = nullptr;
to.numberOfUsesLeft = from.numberOfConsumers;
break;
case V1_3::OperandLifeTime::CONSTANT_COPY:
to.buffer = const_cast<uint8_t*>(&(*mModelOperandValues)[from.location.offset]);
to.numberOfUsesLeft = 0;
break;
case V1_3::OperandLifeTime::CONSTANT_REFERENCE: {
auto poolIndex = from.location.poolIndex;
CHECK_LT(poolIndex, modelPoolInfos.size());
auto& r = modelPoolInfos[poolIndex];
to.buffer = r.getBuffer() + from.location.offset;
to.numberOfUsesLeft = 0;
break;
}
case V1_3::OperandLifeTime::SUBGRAPH:
case V1_3::OperandLifeTime::SUBGRAPH_INPUT:
case V1_3::OperandLifeTime::SUBGRAPH_OUTPUT:
case V1_3::OperandLifeTime::NO_VALUE:
to.buffer = nullptr;
to.numberOfUsesLeft = 0;
break;
}
}
return operands;
}
} // namespace
class Subgraph {
public:
static Subgraph* Create(const hardware::hidl_vec<V1_3::Operation>& operations,
std::vector<RunTimeOperandInfo>& operands,
const std::vector<uint32_t>& inputIndexes,
const std::vector<uint32_t>& outputIndexes, pthreadpool_t threadpool,
bool useStaticBuffer = false) {
// Convert subgraph inputs and outputs to hash sets for faster lookup.
const std::unordered_set<uint32_t> inputs(inputIndexes.begin(), inputIndexes.end());
const std::unordered_set<uint32_t> outputs(outputIndexes.begin(), outputIndexes.end());
std::unordered_set<uint32_t> externals(outputs);
xnn_subgraph_t subgraphPtr = nullptr;
xnn_status status = xnn_create_subgraph(
/*external_value_ids=*/operands.size(), /*flags=*/0, &subgraphPtr);
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_create_subgraph FAILED";
return nullptr;
}
// Smart pointer to automatically release subgraph on exit.
std::unique_ptr<xnn_subgraph, decltype(&xnn_delete_subgraph)> subgraph(
subgraphPtr, &xnn_delete_subgraph);
// Detect which tensors are used as inputs or outputs of any subgraph nodes.
// -1 denotes tensor not used in the subgraph.
std::vector<int> tensors(operands.size(), -1);
for (const auto& operation : operations) {
const std::vector<uint32_t>& ins = operation.inputs;
const std::vector<uint32_t>& outs = operation.outputs;
switch (operation.type) {
case V1_3::OperationType::MEAN:
case V1_3::OperationType::PAD:
case V1_3::OperationType::RESHAPE:
case V1_3::OperationType::RESIZE_BILINEAR:
// Ignore the second input (axes, static padding, or new shape),
// because it is represented as parameters of the XNNPACK operator
// rather than extra input.
{
const int t = ins[0];
tensors[t] = t;
}
break;
default:
// All other operators: process all inputs
for (size_t k = 0; k < ins.size(); k++) {
if (isScalarType(operands[ins[k]].type)) continue;
const int t = ins[k];
tensors[t] = t;
}
}
for (size_t k = 0; k < outs.size(); k++) {
if (isScalarType(operands[outs[k]].type)) continue;
const int t = outs[k];
tensors[t] = t;
}
}
// XNNPACK Value IDs for NNAPI Operands
std::vector<uint32_t> xnnpackTensors(operands.size());
for (int t : tensors) {
if (t < 0) continue;
if (operands[tensors[t]].type != OperandType::TENSOR_FLOAT32) {
LOG(ERROR) << "XNNPACK only support FLOAT32 tensors";
return nullptr;
}
uint32_t flags = 0;
const void* data = nullptr;
if (operands[tensors[t]].lifetime == Operand::LifeTime::CONSTANT_COPY ||
operands[tensors[t]].lifetime == Operand::LifeTime::CONSTANT_REFERENCE ||
operands[tensors[t]].lifetime == Operand::LifeTime::POINTER) {
data = operands[tensors[t]].buffer;
}
if (inputs.count(t) != 0) {
flags |= XNN_VALUE_FLAG_EXTERNAL_INPUT;
CHECK(data == nullptr);
VLOG(DRIVER) << "found input tensor, add to external";
externals.insert(static_cast<uint32_t>(t));
}
if (outputs.count(t) != 0) {
flags |= XNN_VALUE_FLAG_EXTERNAL_OUTPUT;
}
std::vector<size_t> dims(operands[tensors[t]].dimensions.size());
for (size_t i = 0; i < dims.size(); i++) {
dims[i] = operands[tensors[t]].dimensions[i];
}
const xnn_status status = xnn_define_tensor_value(
subgraph.get(), xnn_datatype_fp32, dims.size(), dims.data(), data,
static_cast<uint32_t>(t), flags, &xnnpackTensors[t]);
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_define_tensor_value failed";
return nullptr;
}
}
// Create XNNPACK nodes for NNAPI Operations
for (const auto& operation : operations) {
if (VisitNode(subgraph.get(), operation, operands.data(), xnnpackTensors) !=
V1_3::ErrorStatus::NONE) {
LOG(ERROR) << "XNNPACK add op failed";
return nullptr;
}
}
xnn_runtime_t runtimePtr = nullptr;
status = xnn_create_runtime_v2(subgraph.get(), threadpool, /*flags=*/0, &runtimePtr);
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_create_runtime_v2 FAILED";
return nullptr;
}
return new Subgraph(runtimePtr, std::move(externals), useStaticBuffer);
}
V1_3::ErrorStatus Prepare() { return V1_3::ErrorStatus::NONE; }
V1_3::ErrorStatus Invoke(RunTimeOperandInfo* operands) {
VLOG(DRIVER) << "Subgraph::Invoke() start";
if (!mUseStaticBuffer || mFirstRun) {
VLOG(DRIVER) << "Setup buffer for Subgraph";
std::vector<xnn_external_value> externalValues;
for (uint32_t t : mExternals) {
xnn_external_value value = {.id = 0, .data = nullptr};
value.id = t;
value.data = operands[t].buffer;
externalValues.push_back(value);
}
const xnn_status status =
xnn_setup_runtime(mRuntime.get(), externalValues.size(), externalValues.data());
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_setup_runtime FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
mFirstRun = false;
}
VLOG(DRIVER) << "Subgraph::Invoke() finished xnn_setup_runtime";
const xnn_status status = xnn_invoke_runtime(mRuntime.get());
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_invoke_runtime FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus CalculatePadding(int padding, uint32_t* flags) {
switch (padding) {
case ANEURALNETWORKS_PADDING_SAME:
*flags = XNN_FLAG_TENSORFLOW_SAME_PADDING;
return V1_3::ErrorStatus::NONE;
case ANEURALNETWORKS_PADDING_VALID:
*flags = 0;
return V1_3::ErrorStatus::NONE;
default:
LOG(ERROR) << "invalid padding mode";
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
}
static V1_3::ErrorStatus ConvertActivationToOutputRange(int activation, float* outputMin,
float* outputMax) {
switch (activation) {
case ANEURALNETWORKS_FUSED_NONE:
*outputMin = -std::numeric_limits<float>::infinity();
*outputMax = +std::numeric_limits<float>::infinity();
return V1_3::ErrorStatus::NONE;
case ANEURALNETWORKS_FUSED_RELU:
*outputMin = 0.0f;
*outputMax = +std::numeric_limits<float>::infinity();
return V1_3::ErrorStatus::NONE;
case ANEURALNETWORKS_FUSED_RELU1:
*outputMin = -1.0f;
*outputMax = +1.0f;
return V1_3::ErrorStatus::NONE;
case ANEURALNETWORKS_FUSED_RELU6:
*outputMin = 0.0f;
*outputMax = 6.0f;
return V1_3::ErrorStatus::NONE;
default:
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
}
static V1_3::ErrorStatus CheckConvolutionParams(int32_t stride_width, int32_t stride_height,
int32_t dilation_width_factor,
int32_t dilation_height_factor) {
if (stride_width <= 0) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
if (stride_height <= 0) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
if (dilation_width_factor <= 0) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
if (dilation_height_factor <= 0) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus CheckDepthwiseConvolutionParams(
int32_t stride_width, int32_t stride_height, int32_t dilation_width_factor,
int32_t dilation_height_factor, int32_t depth_multiplier, uint32_t output_channels) {
if (stride_width <= 0) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
if (stride_height <= 0) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
if (depth_multiplier <= 0) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
if (output_channels % depth_multiplier != 0) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
if (dilation_width_factor <= 0) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
if (dilation_height_factor <= 0) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus CheckPoolingParams(int32_t stride_width, int32_t stride_height,
int32_t filter_width, int32_t filter_height) {
if (stride_width <= 0) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
if (stride_height <= 0) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
if (filter_width <= 0) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
if (filter_height <= 0) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
if (filter_width == 1 && filter_height == 1 && std::max(stride_width, stride_height) > 1) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus CheckNumInputsAndOutputs(const V1_3::Operation& operation,
uint32_t expected_num_inputs,
uint32_t expected_num_outputs) {
if (operation.inputs.size() != expected_num_inputs) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
if (operation.outputs.size() != expected_num_outputs) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus CheckTensorType(OperandType tensor_type, OperandType expected_type) {
if (tensor_type != expected_type) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus CheckTensorFloatType(OperandType tensor_type) {
if (tensor_type != OperandType::TENSOR_FLOAT32) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus CheckTensorShape(std::vector<uint32_t>& dimensions,
uint32_t min_num_dims, uint32_t max_num_dims) {
if (min_num_dims == max_num_dims) {
if (dimensions.size() != min_num_dims) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
} else {
if (dimensions.size() < min_num_dims || dimensions.size() > max_num_dims) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
}
for (size_t i = 0; i < dimensions.size(); i++) {
if (dimensions[i] <= 0) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus CheckTensorShape(std::vector<uint32_t>& dimensions,
int expected_num_dims) {
return CheckTensorShape(dimensions, expected_num_dims, expected_num_dims);
}
static V1_3::ErrorStatus CheckSlopeTensorShape(std::vector<uint32_t>& dimensions) {
if (dimensions.size() < 1) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
// Validate that all non-channel dimensions (if any) are exactly 1.
for (size_t i = 0; i < dimensions.size() - 1; i++) {
if (dimensions[i] != 1) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus CheckAxesTensorShape(std::vector<uint32_t>& dimensions) {
if (dimensions.size() != 1) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus CheckShapeTensorShape(std::vector<uint32_t>& dimensions) {
if (dimensions.size() != 1) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus CheckTensorStaticAllocation(Operand::LifeTime lifetime) {
if (lifetime != Operand::LifeTime::CONSTANT_COPY &&
lifetime != Operand::LifeTime::CONSTANT_REFERENCE &&
lifetime != Operand::LifeTime::POINTER) {
VLOG(DRIVER) << "CheckTensorStaticAllocation: " << toString(convertToV1_3(lifetime));
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus VisitNode(xnn_subgraph_t subgraph, const V1_3::Operation& operation,
RunTimeOperandInfo* operands,
const std::vector<uint32_t>& xnnpackTensors) {
switch (operation.type) {
case V1_3::OperationType::ABS:
return VisitAbsNode(subgraph, operation, operands, xnnpackTensors);
case V1_3::OperationType::ADD:
return VisitAddNode(subgraph, operation, operands, xnnpackTensors);
case V1_3::OperationType::AVERAGE_POOL_2D:
return VisitAveragePool2DNode(subgraph, operation, operands, xnnpackTensors);
case V1_3::OperationType::CONV_2D:
return VisitConv2DNode(subgraph, operation, operands, xnnpackTensors);
case V1_3::OperationType::DEPTHWISE_CONV_2D:
return VisitDepthwiseConv2DNode(subgraph, operation, operands, xnnpackTensors);
case V1_3::OperationType::DIV:
return VisitDivNode(subgraph, operation, operands, xnnpackTensors);
case V1_3::OperationType::FLOOR:
return VisitFloorNode(subgraph, operation, operands, xnnpackTensors);
case V1_3::OperationType::FULLY_CONNECTED:
return VisitFullyConnectedNode(subgraph, operation, operands, xnnpackTensors);
case V1_3::OperationType::HARD_SWISH:
return VisitHardSwishNode(subgraph, operation, operands, xnnpackTensors);
case V1_3::OperationType::LOGISTIC:
return VisitLogisticNode(subgraph, operation, operands, xnnpackTensors);
case V1_3::OperationType::MAX_POOL_2D:
return VisitMaxPool2DNode(subgraph, operation, operands, xnnpackTensors);
case V1_3::OperationType::MAXIMUM:
return VisitMaximumNode(subgraph, operation, operands, xnnpackTensors);
case V1_3::OperationType::MEAN:
return VisitMeanNode(subgraph, operation, operands, xnnpackTensors);
case V1_3::OperationType::MINIMUM:
return VisitMinimumNode(subgraph, operation, operands, xnnpackTensors);
case V1_3::OperationType::MUL:
return VisitMulNode(subgraph, operation, operands, xnnpackTensors);
case V1_3::OperationType::NEG:
return VisitNegNode(subgraph, operation, operands, xnnpackTensors);
case V1_3::OperationType::PAD:
return VisitPadNode(subgraph, operation, operands, 0.0f, xnnpackTensors);
case V1_3::OperationType::PAD_V2:
return VisitPadV2Node(subgraph, operation, operands, xnnpackTensors);
case V1_3::OperationType::RESHAPE:
return VisitReshapeNode(subgraph, operation, operands, xnnpackTensors);
case V1_3::OperationType::RESIZE_BILINEAR:
return VisitResizeBilinearNode(subgraph, operation, operands, xnnpackTensors);
case V1_3::OperationType::PRELU:
return VisitPreluNode(subgraph, operation, operands, xnnpackTensors);
case V1_3::OperationType::RELU:
return VisitReluNode(subgraph, operation, operands, 0.0f,
std::numeric_limits<float>::infinity(), xnnpackTensors);
case V1_3::OperationType::RELU1:
return VisitReluNode(subgraph, operation, operands, -1.0f, 1.0f, xnnpackTensors);
case V1_3::OperationType::RELU6:
return VisitReluNode(subgraph, operation, operands, 0.0f, 6.0f, xnnpackTensors);
case V1_3::OperationType::SQRT:
return VisitSqrtNode(subgraph, operation, operands, xnnpackTensors);
case V1_3::OperationType::SUB:
return VisitSubNode(subgraph, operation, operands, xnnpackTensors);
case V1_3::OperationType::SOFTMAX:
return VisitSoftmaxNode(subgraph, operation, operands, xnnpackTensors);
default:
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
}
static V1_3::ErrorStatus VisitAbsNode(xnn_subgraph_t subgraph, const V1_3::Operation& operation,
RunTimeOperandInfo* operands,
const std::vector<uint32_t>& xnnpackTensors) {
const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
if (subgraph != nullptr) {
const xnn_status status =
xnn_define_abs(subgraph, /*input_id=*/xnnpackTensors[ins[0]],
/*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_define_abs FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus VisitAddNode(xnn_subgraph_t subgraph, const V1_3::Operation& operation,
RunTimeOperandInfo* operands,
const std::vector<uint32_t>& xnnpackTensors) {
const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
float outputMin = -std::numeric_limits<float>::infinity();
float outputMax = +std::numeric_limits<float>::infinity();
int activation = getScalarData<int32_t>(operands[ins[2]]);
NN_DRIVER_RETURN_IF_ERROR(
ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
if (subgraph != nullptr) {
const xnn_status status =
xnn_define_add2(subgraph, outputMin, outputMax,
/*input1_id=*/xnnpackTensors[ins[0]],
/*input2_id=*/xnnpackTensors[ins[1]],
/*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_define_add2 FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus VisitAveragePool2DNode(xnn_subgraph_t subgraph,
const V1_3::Operation& operation,
RunTimeOperandInfo* operands,
const std::vector<uint32_t>& xnnpackTensors) {
const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
// Make sure all scalar params are constant.
for (uint32_t i = 1; i < ins.size(); i++) {
NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[i]].lifetime));
}
bool use_nchw = false;
if (ins.size() == 8) {
use_nchw = getScalarData<bool>(operands[ins[7]]);
}
if (ins.size() == 11) {
use_nchw = getScalarData<bool>(operands[ins[10]]);
}
if (use_nchw) {
VLOG(DRIVER) << "XNNPACK VisitAveragePool2DNode FAILED: only NHWC layout is supported";
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
int32_t stride_width, stride_height, filter_width, filter_height, activation;
uint32_t input_padding_top = 0;
uint32_t input_padding_right = 0;
uint32_t input_padding_bottom = 0;
uint32_t input_padding_left = 0;
uint32_t flags = 0;
if (ins.size() >= 10) {
// Explicit padding
input_padding_left = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[1]]));
input_padding_right = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[2]]));
input_padding_top = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[3]]));
input_padding_bottom = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[4]]));
stride_width = getScalarData<int32_t>(operands[ins[5]]);
stride_height = getScalarData<int32_t>(operands[ins[6]]);
filter_width = getScalarData<int32_t>(operands[ins[7]]);
filter_height = getScalarData<int32_t>(operands[ins[8]]);
activation = getScalarData<int32_t>(operands[ins[9]]);
} else {
// Implicit padding
int padding_implicit = getScalarData<int32_t>(operands[ins[1]]);
NN_DRIVER_RETURN_IF_ERROR(CalculatePadding(padding_implicit, &flags));
stride_width = getScalarData<int32_t>(operands[ins[2]]);
stride_height = getScalarData<int32_t>(operands[ins[3]]);
filter_width = getScalarData<int32_t>(operands[ins[4]]);
filter_height = getScalarData<int32_t>(operands[ins[5]]);
activation = getScalarData<int32_t>(operands[ins[6]]);
}
NN_DRIVER_RETURN_IF_ERROR(
CheckPoolingParams(stride_width, stride_height, filter_width, filter_height));
float outputMin = -std::numeric_limits<float>::infinity();
float outputMax = +std::numeric_limits<float>::infinity();
NN_DRIVER_RETURN_IF_ERROR(
ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
if (subgraph != nullptr) {
xnn_status status = xnn_status_success;
if (filter_width == 1 && filter_height == 1) {
status = xnn_define_clamp(subgraph, outputMin, outputMax,
/*input_id=*/xnnpackTensors[ins[0]],
/*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
} else {
status = xnn_define_average_pooling_2d(
subgraph, input_padding_top, input_padding_right, input_padding_bottom,
input_padding_left, static_cast<uint32_t>(filter_height),
static_cast<uint32_t>(filter_width), static_cast<uint32_t>(stride_height),
static_cast<uint32_t>(stride_width), outputMin, outputMax,
/*input_id=*/xnnpackTensors[ins[0]],
/*output_id=*/xnnpackTensors[outs[0]], flags);
}
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_define_average_pooling_2d FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus VisitConv2DNode(xnn_subgraph_t subgraph,
const V1_3::Operation& operation,
RunTimeOperandInfo* operands,
const std::vector<uint32_t>& xnnpackTensors) {
const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[1]].lifetime));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[2]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
// Make sure all scalar params are constant.
for (uint32_t i = 3; i < ins.size(); i++) {
NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[i]].lifetime));
}
bool use_nchw = false;
if (ins.size() >= 8 && operands[ins[7]].type == OperandType::BOOL) {
use_nchw = getScalarData<bool>(operands[ins[7]]);
}
if (ins.size() >= 11) {
use_nchw = getScalarData<bool>(operands[ins[10]]);
}
if (use_nchw) {
VLOG(DRIVER) << "XNNPACK VisitConv2DNode FAILED: only NHWC layout is supported";
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
int32_t stride_width, stride_height, activation;
int32_t dilation_width_factor = 1;
int32_t dilation_height_factor = 1;
uint32_t input_padding_top = 0;
uint32_t input_padding_right = 0;
uint32_t input_padding_bottom = 0;
uint32_t input_padding_left = 0;
uint32_t flags = 0;
if (ins.size() >= 10 && operands[ins[7]].type != OperandType::BOOL) {
// Explicit padding
input_padding_left = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[3]]));
input_padding_right = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[4]]));
input_padding_top = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[5]]));
input_padding_bottom = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[6]]));
stride_width = getScalarData<int32_t>(operands[ins[7]]);
stride_height = getScalarData<int32_t>(operands[ins[8]]);
activation = getScalarData<int32_t>(operands[ins[9]]);
if (ins.size() == 13) {
dilation_width_factor = getScalarData<int32_t>(operands[ins[11]]);
dilation_height_factor = getScalarData<int32_t>(operands[ins[12]]);
}
} else {
// Implicit padding
int padding_implicit = getScalarData<int32_t>(operands[ins[3]]);
NN_DRIVER_RETURN_IF_ERROR(CalculatePadding(padding_implicit, &flags));
stride_width = getScalarData<int32_t>(operands[ins[4]]);
stride_height = getScalarData<int32_t>(operands[ins[5]]);
activation = getScalarData<int32_t>(operands[ins[6]]);
if (ins.size() == 10) {
dilation_width_factor = getScalarData<int32_t>(operands[ins[8]]);
dilation_height_factor = getScalarData<int32_t>(operands[ins[9]]);
}
}
NN_DRIVER_RETURN_IF_ERROR(CheckConvolutionParams(
stride_width, stride_height, dilation_width_factor, dilation_height_factor));
float outputMin = -std::numeric_limits<float>::infinity();
float outputMax = +std::numeric_limits<float>::infinity();
NN_DRIVER_RETURN_IF_ERROR(
ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
const RunTimeOperandInfo& filter = operands[ins[1]];
const uint32_t output_channels = filter.dimensions[0];
const uint32_t kernel_height = filter.dimensions[1];
const uint32_t kernel_width = filter.dimensions[2];
const uint32_t input_channels = filter.dimensions[3];
if (subgraph != nullptr) {
const xnn_status status = xnn_define_convolution_2d(
subgraph, input_padding_top, input_padding_right, input_padding_bottom,
input_padding_left, static_cast<uint32_t>(kernel_height),
static_cast<uint32_t>(kernel_width), static_cast<uint32_t>(stride_height),
static_cast<uint32_t>(stride_width),
static_cast<uint32_t>(dilation_height_factor),
static_cast<uint32_t>(dilation_width_factor),
/*groups=*/1, static_cast<size_t>(input_channels),
static_cast<size_t>(output_channels), outputMin, outputMax,
/*input_id=*/xnnpackTensors[ins[0]],
/*filter_id=*/xnnpackTensors[ins[1]],
/*bias_id=*/xnnpackTensors[ins[2]],
/*output_id=*/xnnpackTensors[outs[0]], flags);
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_define_convolution_2d FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus VisitDepthwiseConv2DNode(xnn_subgraph_t subgraph,
const V1_3::Operation& operation,
RunTimeOperandInfo* operands,
const std::vector<uint32_t>& xnnpackTensors) {
const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[1]].lifetime));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[2]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
// Make sure all scalar params are constant.
for (uint32_t i = 3; i < ins.size(); i++) {
NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[i]].lifetime));
}
bool use_nchw = false;
if (ins.size() >= 9 && operands[ins[8]].type == OperandType::BOOL) {
use_nchw = getScalarData<bool>(operands[ins[8]]);
}
if (ins.size() >= 12) {
use_nchw = getScalarData<bool>(operands[ins[11]]);
}
if (use_nchw) {
VLOG(DRIVER)
<< "XNNPACK VisitDepthwiseConv2DNode FAILED: only NHWC layout is supported";
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
int32_t stride_width, stride_height, depth_multiplier, activation;
int32_t dilation_width_factor = 1;
int32_t dilation_height_factor = 1;
uint32_t input_padding_top = 0;
uint32_t input_padding_right = 0;
uint32_t input_padding_bottom = 0;
uint32_t input_padding_left = 0;
uint32_t flags = 0;
if (ins.size() >= 11 && operands[ins[8]].type != OperandType::BOOL) {
// Explicit padding
input_padding_left = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[3]]));
input_padding_right = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[4]]));
input_padding_top = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[5]]));
input_padding_bottom = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[6]]));
stride_width = getScalarData<int32_t>(operands[ins[7]]);
stride_height = getScalarData<int32_t>(operands[ins[8]]);
depth_multiplier = getScalarData<int32_t>(operands[ins[9]]);
activation = getScalarData<int32_t>(operands[ins[10]]);
if (ins.size() == 14) {
dilation_width_factor = getScalarData<int32_t>(operands[ins[12]]);
dilation_height_factor = getScalarData<int32_t>(operands[ins[13]]);
}
} else {
// Implicit padding
int padding_implicit = getScalarData<int32_t>(operands[ins[3]]);
NN_DRIVER_RETURN_IF_ERROR(CalculatePadding(padding_implicit, &flags));
stride_width = getScalarData<int32_t>(operands[ins[4]]);
stride_height = getScalarData<int32_t>(operands[ins[5]]);
depth_multiplier = getScalarData<int32_t>(operands[ins[6]]);
activation = getScalarData<int32_t>(operands[ins[7]]);
if (ins.size() == 11) {
dilation_width_factor = getScalarData<int32_t>(operands[ins[9]]);
dilation_height_factor = getScalarData<int32_t>(operands[ins[10]]);
}
}
float outputMin = -std::numeric_limits<float>::infinity();
float outputMax = +std::numeric_limits<float>::infinity();
NN_DRIVER_RETURN_IF_ERROR(
ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
const RunTimeOperandInfo& filter = operands[ins[1]];
const uint32_t output_channels = filter.dimensions[3];
const uint32_t kernel_height = filter.dimensions[1];
const uint32_t kernel_width = filter.dimensions[2];
NN_DRIVER_RETURN_IF_ERROR(CheckDepthwiseConvolutionParams(
stride_width, stride_height, dilation_width_factor, dilation_height_factor,
depth_multiplier, output_channels));
if (subgraph != nullptr) {
const xnn_status status = xnn_define_depthwise_convolution_2d(
subgraph, input_padding_top, input_padding_right, input_padding_bottom,
input_padding_left, static_cast<uint32_t>(kernel_height),
static_cast<uint32_t>(kernel_width), static_cast<uint32_t>(stride_height),
static_cast<uint32_t>(stride_width),
static_cast<uint32_t>(dilation_height_factor),
static_cast<uint32_t>(dilation_width_factor),
static_cast<uint32_t>(depth_multiplier),
/*input_channels=*/
static_cast<uint32_t>(output_channels / depth_multiplier), outputMin, outputMax,
/*input_id=*/xnnpackTensors[ins[0]],
/*filter_id=*/xnnpackTensors[ins[1]],
/*bias_id=*/xnnpackTensors[ins[2]],
/*output_id=*/xnnpackTensors[outs[0]], flags);
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_define_depthwise_convolution_2d FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus VisitDivNode(xnn_subgraph_t subgraph, const V1_3::Operation& operation,
RunTimeOperandInfo* operands,
const std::vector<uint32_t>& xnnpackTensors) {
const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
float outputMin = -std::numeric_limits<float>::infinity();
float outputMax = +std::numeric_limits<float>::infinity();
int activation = getScalarData<int32_t>(operands[ins[2]]);
NN_DRIVER_RETURN_IF_ERROR(
ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
if (subgraph != nullptr) {
const xnn_status status =
xnn_define_divide(subgraph, outputMin, outputMax,
/*input1_id=*/xnnpackTensors[ins[0]],
/*input2_id=*/xnnpackTensors[ins[1]],
/*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_define_divide FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus VisitFullyConnectedNode(xnn_subgraph_t subgraph,
const V1_3::Operation& operation,
RunTimeOperandInfo* operands,
const std::vector<uint32_t>& xnnpackTensors) {
const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[1]].lifetime));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[2]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[3]].lifetime));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
float outputMin = -std::numeric_limits<float>::infinity();
float outputMax = +std::numeric_limits<float>::infinity();
int activation = getScalarData<int32_t>(operands[ins[3]]);
NN_DRIVER_RETURN_IF_ERROR(
ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
if (subgraph != nullptr) {
const xnn_status status =
xnn_define_fully_connected(subgraph, outputMin, outputMax,
/*input_id=*/xnnpackTensors[ins[0]],
/*filter_id=*/xnnpackTensors[ins[1]],
/*bias_id=*/xnnpackTensors[ins[2]],
/*output_id=*/xnnpackTensors[outs[0]],
/*flags=*/XNN_FLAG_TENSORFLOW_RESHAPE_2D);
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_define_fully_connected FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus VisitFloorNode(xnn_subgraph_t subgraph,
const V1_3::Operation& operation,
RunTimeOperandInfo* operands,
const std::vector<uint32_t>& xnnpackTensors) {
const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
if (subgraph != nullptr) {
const xnn_status status =
xnn_define_floor(subgraph,
/*input1_id=*/xnnpackTensors[ins[0]],
/*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_define_floor FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus VisitHardSwishNode(xnn_subgraph_t subgraph,
const V1_3::Operation& operation,
RunTimeOperandInfo* operands,
const std::vector<uint32_t>& xnnpackTensors) {
const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
if (subgraph != nullptr) {
const xnn_status status =
xnn_define_hardswish(subgraph, /*input_id=*/xnnpackTensors[ins[0]],
/*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_define_hardswish FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus VisitLogisticNode(xnn_subgraph_t subgraph,
const V1_3::Operation& operation,
RunTimeOperandInfo* operands,
const std::vector<uint32_t>& xnnpackTensors) {
const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
if (subgraph != nullptr) {
const xnn_status status =
xnn_define_sigmoid(subgraph, /*input_id=*/xnnpackTensors[ins[0]],
/*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_define_sigmoid FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus VisitMaxPool2DNode(xnn_subgraph_t subgraph,
const V1_3::Operation& operation,
RunTimeOperandInfo* operands,
const std::vector<uint32_t>& xnnpackTensors) {
const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
// Make sure all scalar params are constant.
for (uint32_t i = 1; i < ins.size(); i++) {
NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[i]].lifetime));
}
bool use_nchw = false;
if (ins.size() == 8) {
use_nchw = getScalarData<bool>(operands[ins[7]]);
}
if (ins.size() == 11) {
use_nchw = getScalarData<bool>(operands[ins[10]]);
}
if (use_nchw) {
VLOG(DRIVER) << "XNNPACK VisitMaxPool2DNode FAILED: only NHWC layout is supported";
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
int32_t stride_width, stride_height, filter_width, filter_height, activation;
uint32_t input_padding_top = 0;
uint32_t input_padding_right = 0;
uint32_t input_padding_bottom = 0;
uint32_t input_padding_left = 0;
uint32_t flags = 0;
if (ins.size() >= 10) {
// Explicit padding
input_padding_left = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[1]]));
input_padding_right = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[2]]));
input_padding_top = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[3]]));
input_padding_bottom = static_cast<uint32_t>(getScalarData<int32_t>(operands[ins[4]]));
stride_width = getScalarData<int32_t>(operands[ins[5]]);
stride_height = getScalarData<int32_t>(operands[ins[6]]);
filter_width = getScalarData<int32_t>(operands[ins[7]]);
filter_height = getScalarData<int32_t>(operands[ins[8]]);
activation = getScalarData<int32_t>(operands[ins[9]]);
} else {
// Implicit padding
int padding_implicit = getScalarData<int32_t>(operands[ins[1]]);
NN_DRIVER_RETURN_IF_ERROR(CalculatePadding(padding_implicit, &flags));
stride_width = getScalarData<int32_t>(operands[ins[2]]);
stride_height = getScalarData<int32_t>(operands[ins[3]]);
filter_width = getScalarData<int32_t>(operands[ins[4]]);
filter_height = getScalarData<int32_t>(operands[ins[5]]);
activation = getScalarData<int32_t>(operands[ins[6]]);
}
NN_DRIVER_RETURN_IF_ERROR(
CheckPoolingParams(stride_width, stride_height, filter_width, filter_height));
float outputMin = -std::numeric_limits<float>::infinity();
float outputMax = +std::numeric_limits<float>::infinity();
NN_DRIVER_RETURN_IF_ERROR(
ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
if (subgraph != nullptr) {
xnn_status status = xnn_status_success;
if (filter_width == 1 && filter_height == 1) {
status = xnn_define_clamp(subgraph, outputMin, outputMax,
/*input_id=*/xnnpackTensors[ins[0]],
/*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
} else {
status = xnn_define_max_pooling_2d(
subgraph, input_padding_top, input_padding_right, input_padding_bottom,
input_padding_left, static_cast<uint32_t>(filter_height),
static_cast<uint32_t>(filter_width), static_cast<uint32_t>(stride_height),
static_cast<uint32_t>(stride_width), /*dilation_height=*/1,
/*dilation_width=*/1, outputMin, outputMax,
/*input_id=*/xnnpackTensors[ins[0]],
/*output_id=*/xnnpackTensors[outs[0]], flags);
}
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_define_max_pooling_2d FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus VisitMaximumNode(xnn_subgraph_t subgraph,
const V1_3::Operation& operation,
RunTimeOperandInfo* operands,
const std::vector<uint32_t>& xnnpackTensors) {
const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
float outputMin = -std::numeric_limits<float>::infinity();
float outputMax = +std::numeric_limits<float>::infinity();
int activation = getScalarData<int32_t>(operands[ins[2]]);
NN_DRIVER_RETURN_IF_ERROR(
ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
if (subgraph != nullptr) {
const xnn_status status =
xnn_define_maximum2(subgraph,
/*input1_id=*/xnnpackTensors[ins[0]],
/*input2_id=*/xnnpackTensors[ins[1]],
/*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_define_maximum2 FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus VisitMeanNode(xnn_subgraph_t subgraph,
const V1_3::Operation& operation,
RunTimeOperandInfo* operands,
const std::vector<uint32_t>& xnnpackTensors) {
const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorShape(operands[ins[0]].dimensions, 4));
NN_DRIVER_RETURN_IF_ERROR(CheckAxesTensorShape(operands[ins[1]].dimensions));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[1]].lifetime));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorShape(operands[outs[0]].dimensions, 4));
int keep_dims = getScalarData<int32_t>(operands[ins[2]]);
if (keep_dims <= 0) {
LOG(ERROR) << "XNNPACK VisitMeanNode FAILED: only support keep_dims";
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
const int32_t* axes_buffer = reinterpret_cast<const int32_t*>(operands[ins[1]].buffer);
if (operands[ins[1]].dimensions[0] != 2) {
LOG(ERROR) << "XNNPACK VisitMeanNode FAILED: unsupported axes";
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
if (std::min(axes_buffer[0], axes_buffer[1]) != 1 ||
std::max(axes_buffer[0], axes_buffer[1]) != 2) {
LOG(ERROR) << "XNNPACK VisitMeanNode FAILED: unsupported axes";
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
if (subgraph != nullptr) {
const xnn_status status = xnn_define_global_average_pooling_2d(
subgraph,
/*outputMin=*/-std::numeric_limits<float>::infinity(),
/*outputMax=*/+std::numeric_limits<float>::infinity(),
/*input_id=*/xnnpackTensors[ins[0]],
/*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_define_global_average_pooling_2d FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus VisitMinimumNode(xnn_subgraph_t subgraph,
const V1_3::Operation& operation,
RunTimeOperandInfo* operands,
const std::vector<uint32_t>& xnnpackTensors) {
const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
float outputMin = -std::numeric_limits<float>::infinity();
float outputMax = +std::numeric_limits<float>::infinity();
int activation = getScalarData<int32_t>(operands[ins[2]]);
NN_DRIVER_RETURN_IF_ERROR(
ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
if (subgraph != nullptr) {
const xnn_status status =
xnn_define_minimum2(subgraph,
/*input1_id=*/xnnpackTensors[ins[0]],
/*input2_id=*/xnnpackTensors[ins[1]],
/*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_define_minimum2 FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus VisitMulNode(xnn_subgraph_t subgraph, const V1_3::Operation& operation,
RunTimeOperandInfo* operands,
const std::vector<uint32_t>& xnnpackTensors) {
const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
int activation = getScalarData<int32_t>(operands[ins[2]]);
float outputMin = -std::numeric_limits<float>::infinity();
float outputMax = +std::numeric_limits<float>::infinity();
NN_DRIVER_RETURN_IF_ERROR(
ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
if (subgraph != nullptr) {
const xnn_status status =
xnn_define_multiply2(subgraph, outputMin, outputMax,
/*input1_id=*/xnnpackTensors[ins[0]],
/*input2_id=*/xnnpackTensors[ins[1]],
/*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_define_multiply2 FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus VisitNegNode(xnn_subgraph_t subgraph, const V1_3::Operation& operation,
RunTimeOperandInfo* operands,
const std::vector<uint32_t>& xnnpackTensors) {
const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
if (subgraph != nullptr) {
const xnn_status status =
xnn_define_negate(subgraph,
/*input1_id=*/xnnpackTensors[ins[0]],
/*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_define_negate FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus VisitPreluNode(xnn_subgraph_t subgraph,
const V1_3::Operation& operation,
RunTimeOperandInfo* operands,
const std::vector<uint32_t>& xnnpackTensors) {
const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
NN_DRIVER_RETURN_IF_ERROR(
CheckTensorShape(operands[ins[0]].dimensions, 1, XNN_MAX_TENSOR_DIMS));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckSlopeTensorShape(operands[ins[1]].dimensions));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
NN_DRIVER_RETURN_IF_ERROR(
CheckTensorShape(operands[outs[0]].dimensions, 1, XNN_MAX_TENSOR_DIMS));
if (subgraph != nullptr) {
const xnn_status status =
xnn_define_prelu(subgraph, /*input_id=*/xnnpackTensors[ins[0]],
/*slope_id=*/xnnpackTensors[ins[1]],
/*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_define_prelu FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus VisitPadNode(xnn_subgraph_t subgraph, const V1_3::Operation& operation,
RunTimeOperandInfo* operands, float padding_value,
const std::vector<uint32_t>& xnnpackTensors) {
const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
NN_DRIVER_RETURN_IF_ERROR(
CheckTensorShape(operands[ins[0]].dimensions, 1, XNN_MAX_TENSOR_DIMS));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[1]].lifetime));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
NN_DRIVER_RETURN_IF_ERROR(
CheckTensorShape(operands[outs[0]].dimensions, 1, XNN_MAX_TENSOR_DIMS));
const int32_t* paddings_data = reinterpret_cast<const int32_t*>(operands[ins[1]].buffer);
for (size_t i = 0; i < operands[ins[1]].dimensions.size() * 2; i++) {
if (paddings_data[i] < 0) return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
if (subgraph != nullptr) {
std::array<size_t, XNN_MAX_TENSOR_DIMS> pre_paddings{};
std::array<size_t, XNN_MAX_TENSOR_DIMS> post_paddings{};
for (size_t i = 0; i < operands[ins[1]].dimensions.size(); i++) {
pre_paddings[i] = static_cast<size_t>(paddings_data[i * 2 + 0]);
post_paddings[i] = static_cast<size_t>(paddings_data[i * 2 + 1]);
}
const xnn_status status = xnn_define_static_constant_pad(
subgraph, pre_paddings.data(), post_paddings.data(), padding_value,
/*input_id=*/xnnpackTensors[ins[0]],
/*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_define_static_constant_pad FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus VisitPadV2Node(xnn_subgraph_t subgraph,
const V1_3::Operation& operation,
RunTimeOperandInfo* operands,
const std::vector<uint32_t>& xnnpackTensors) {
const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
if (operands[ins[2]].type != OperandType::FLOAT32) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
float padding_value = getScalarData<float>(operands[ins[2]]);
return VisitPadNode(subgraph, operation, operands, padding_value, xnnpackTensors);
}
static V1_3::ErrorStatus VisitReshapeNode(xnn_subgraph_t subgraph,
const V1_3::Operation& operation,
RunTimeOperandInfo* operands,
const std::vector<uint32_t>& xnnpackTensors) {
const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
NN_DRIVER_RETURN_IF_ERROR(
CheckTensorShape(operands[ins[0]].dimensions, 0, XNN_MAX_TENSOR_DIMS));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[1]].lifetime));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
NN_DRIVER_RETURN_IF_ERROR(
CheckTensorShape(operands[outs[0]].dimensions, 0, XNN_MAX_TENSOR_DIMS));
if (subgraph != nullptr) {
std::array<size_t, XNN_MAX_TENSOR_DIMS> new_shape;
for (uint32_t i = 0; i < operands[outs[0]].dimensions.size(); i++) {
new_shape[i] = static_cast<size_t>(operands[outs[0]].dimensions[i]);
}
const xnn_status status = xnn_define_static_reshape(
subgraph, static_cast<size_t>(operands[outs[0]].dimensions.size()),
new_shape.data(),
/*input_id=*/xnnpackTensors[ins[0]],
/*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_define_static_reshape FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus VisitResizeBilinearNode(xnn_subgraph_t subgraph,
const V1_3::Operation& operation,
RunTimeOperandInfo* operands,
const std::vector<uint32_t>& xnnpackTensors) {
const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorShape(operands[ins[0]].dimensions, 4));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorShape(operands[outs[0]].dimensions, 4));
// Make sure all scalar params are constant.
for (uint32_t i = 1; i < ins.size(); i++) {
NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[i]].lifetime));
}
if (ins.size() >= 4) {
bool use_nchw = getScalarData<bool>(operands[ins[3]]);
if (use_nchw) {
VLOG(DRIVER)
<< "XNNPACK VisitResizeBilinearNode FAILED: only NHWC layout is supported";
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
}
size_t new_height, new_width;
if (operands[ins[1]].type == OperandType::INT32) {
// explicitly specify the output dimension.
new_width = static_cast<size_t>(getScalarData<int32_t>(operands[ins[1]]));
new_height = static_cast<size_t>(getScalarData<int32_t>(operands[ins[2]]));
} else if (operands[ins[1]].type == OperandType::FLOAT32) {
// specify the output dimension scaling factor.
float width_scale = getScalarData<float>(operands[ins[1]]);
float height_scale = getScalarData<float>(operands[ins[2]]);
if (width_scale <= 0 || height_scale <= 0) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
new_height = static_cast<size_t>(operands[ins[0]].dimensions[1] * height_scale);
new_width = static_cast<size_t>(operands[ins[0]].dimensions[2] * width_scale);
} else {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
bool align_corners = false;
bool half_pixel_centers = false;
if (ins.size() == 6) {
align_corners = getScalarData<bool>(operands[ins[4]]);
half_pixel_centers = getScalarData<bool>(operands[ins[5]]);
}
if (align_corners && !half_pixel_centers) {
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
if (subgraph != nullptr) {
uint32_t flags = 0;
if (align_corners) {
flags |= XNN_FLAG_ALIGN_CORNERS;
} else if (!half_pixel_centers) {
flags |= XNN_FLAG_TENSORFLOW_LEGACY_MODE;
}
const xnn_status status = xnn_define_static_resize_bilinear_2d(
subgraph, new_height, new_width,
/*input_id=*/xnnpackTensors[ins[0]],
/*output_id=*/xnnpackTensors[outs[0]], flags);
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_define_static_resize_bilinear_2d FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus VisitReluNode(xnn_subgraph_t subgraph,
const V1_3::Operation& operation,
RunTimeOperandInfo* operands, float outputMin,
float outputMax,
const std::vector<uint32_t>& xnnpackTensors) {
const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
if (subgraph != nullptr) {
const xnn_status status =
xnn_define_clamp(subgraph, outputMin, outputMax,
/*input_id=*/xnnpackTensors[ins[0]],
/*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_define_clamp FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus VisitSqrtNode(xnn_subgraph_t subgraph,
const V1_3::Operation& operation,
RunTimeOperandInfo* operands,
const std::vector<uint32_t>& xnnpackTensors) {
const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
if (subgraph != nullptr) {
const xnn_status status =
xnn_define_square_root(subgraph,
/*input1_id=*/xnnpackTensors[ins[0]],
/*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_define_bankers_rounding FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus VisitSubNode(xnn_subgraph_t subgraph, const V1_3::Operation& operation,
RunTimeOperandInfo* operands,
const std::vector<uint32_t>& xnnpackTensors) {
const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[1]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
float outputMin = -std::numeric_limits<float>::infinity();
float outputMax = +std::numeric_limits<float>::infinity();
int activation = getScalarData<int32_t>(operands[ins[2]]);
NN_DRIVER_RETURN_IF_ERROR(
ConvertActivationToOutputRange(activation, &outputMin, &outputMax));
if (subgraph != nullptr) {
const xnn_status status =
xnn_define_subtract(subgraph, outputMin, outputMax,
/*input1_id=*/xnnpackTensors[ins[0]],
/*input2_id=*/xnnpackTensors[ins[1]],
/*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_define_subtract FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
}
return V1_3::ErrorStatus::NONE;
}
static V1_3::ErrorStatus VisitSoftmaxNode(xnn_subgraph_t subgraph,
const V1_3::Operation& operation,
RunTimeOperandInfo* operands,
const std::vector<uint32_t>& xnnpackTensors) {
const hardware::hidl_vec<uint32_t>& ins = operation.inputs;
const hardware::hidl_vec<uint32_t>& outs = operation.outputs;
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[ins[0]].type));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[1]].lifetime));
NN_DRIVER_RETURN_IF_ERROR(CheckTensorFloatType(operands[outs[0]].type));
float beta = getScalarData<float>(operands[ins[1]]);
if (beta != 1.0f) {
LOG(ERROR) << "XNNPACK VisitSoftmaxNode FAILED, unsupported beta value: " << beta;
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
if (ins.size() >= 3) {
NN_DRIVER_RETURN_IF_ERROR(CheckTensorStaticAllocation(operands[ins[2]].lifetime));
int axis = getScalarData<int32_t>(operands[ins[2]]);
if (axis != -1) {
LOG(ERROR) << "XNNPACK VisitSoftmaxNode FAILED, unsupported axis value: " << axis;
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
}
if (subgraph != nullptr) {
const xnn_status status =
xnn_define_softmax(subgraph, /*input_id=*/xnnpackTensors[ins[0]],
/*output_id=*/xnnpackTensors[outs[0]], /*flags=*/0);
if (status != xnn_status_success) {
LOG(ERROR) << "XNNPACK xnn_define_softmax FAILED";
return V1_3::ErrorStatus::GENERAL_FAILURE;
}
}
return V1_3::ErrorStatus::NONE;
}
private:
Subgraph(xnn_runtime_t runtime, std::unordered_set<uint32_t>&& externals,
bool useStaticBuffer = false)
: mRuntime(runtime, &xnn_delete_runtime),
mExternals(externals),
mUseStaticBuffer(useStaticBuffer) {}
// XNNPACK Runtime (subgraph + workspace) with smart-pointer for lifetime
// management.
std::unique_ptr<xnn_runtime, decltype(&xnn_delete_runtime)> mRuntime{nullptr,
&xnn_delete_runtime};
std::unordered_set<uint32_t> mExternals;
bool mFirstRun = true;
bool mUseStaticBuffer;
};
class SamplePreparedModelXNNPACK : public SamplePreparedModel {
public:
SamplePreparedModelXNNPACK(const V1_3::Model& model, const SampleDriver* driver,
V1_1::ExecutionPreference preference, uid_t userId,
V1_3::Priority priority)
: SamplePreparedModel(model, driver, preference, userId, priority),
mSubgraph(nullptr),
mThreadpool(nullptr) {}
~SamplePreparedModelXNNPACK() {
delete mSubgraph;
pthreadpool_destroy(mThreadpool);
};
bool initialize();
hardware::Return<V1_0::ErrorStatus> execute(
const V1_0::Request& request, const sp<V1_0::IExecutionCallback>& callback) override;
hardware::Return<V1_0::ErrorStatus> execute_1_2(
const V1_0::Request& request, V1_2::MeasureTiming measure,
const sp<V1_2::IExecutionCallback>& callback) override;
hardware::Return<V1_3::ErrorStatus> execute_1_3(
const V1_3::Request& request, V1_2::MeasureTiming measure,
const V1_3::OptionalTimePoint& deadline,
const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
const sp<V1_3::IExecutionCallback>& callback) override;
hardware::Return<void> executeSynchronously(const V1_0::Request& request,
V1_2::MeasureTiming measure,
executeSynchronously_cb cb) override;
hardware::Return<void> executeSynchronously_1_3(
const V1_3::Request& request, V1_2::MeasureTiming measure,
const V1_3::OptionalTimePoint& deadline,
const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
executeSynchronously_1_3_cb cb) override;
hardware::Return<void> configureExecutionBurst(
const sp<V1_2::IBurstCallback>& callback,
const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel,
configureExecutionBurst_cb cb) override;
hardware::Return<void> executeFenced(const V1_3::Request& request,
const hardware::hidl_vec<hardware::hidl_handle>& wait_for,
V1_2::MeasureTiming measure,
const V1_3::OptionalTimePoint& deadline,
const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
const V1_3::OptionalTimeoutDuration& duration,
executeFenced_cb callback) override;
private:
Subgraph* mSubgraph;
std::vector<RunTimeOperandInfo> mOperands;
pthreadpool* mThreadpool;
};
hardware::Return<void> SamplePreparedModelXNNPACK::configureExecutionBurst(
const sp<V1_2::IBurstCallback>& callback,
const MQDescriptorSync<V1_2::FmqRequestDatum>& requestChannel,
const MQDescriptorSync<V1_2::FmqResultDatum>& resultChannel,
configureExecutionBurst_cb cb) {
VLOG(DRIVER) << "SamplePreparedModelXNNPACK::configureExecutionBurst not supported";
cb(V1_0::ErrorStatus::GENERAL_FAILURE, {});
return hardware::Void();
}
bool SamplePreparedModelXNNPACK::initialize() {
auto status = SamplePreparedModel::initialize();
mThreadpool = pthreadpool_create(kNumOfWorkerThreads);
if (mThreadpool == nullptr) {
VLOG(DRIVER) << "SamplePreparedModelXNNPACK::initialize failed to create pthreadpool, "
"fallback to single threaded execution";
}
const V1_3::Model* model = getModel();
mOperands = initializeRunTimeInfo(model->main, mPoolInfos, &model->operandValues);
mSubgraph = Subgraph::Create(model->main.operations, mOperands, model->main.inputIndexes,
model->main.outputIndexes, mThreadpool);
return status;
}
template <typename T_IExecutionCallback>
void asyncExecuteXNNPACK(Subgraph* subgraph, RunTimeOperandInfo* operands,
const V1_3::Request& request, V1_2::MeasureTiming measure,
const V1_3::Model& model, const LegacyOptionalTimePoint& deadline,
const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
const sp<T_IExecutionCallback>& callback) {
std::vector<RunTimePoolInfo> requestPoolInfos;
if (!setRunTimePoolInfosFromMemoryPools(&requestPoolInfos, uncheckedConvert(request.pools))) {
notify(callback, V1_3::ErrorStatus::GENERAL_FAILURE, {}, kNoTiming);
}
updateForArguments(model.main.inputIndexes, request.inputs, requestPoolInfos, operands);
updateForArguments(model.main.outputIndexes, request.outputs, requestPoolInfos, operands);
auto status = subgraph->Invoke(operands);
VLOG(DRIVER) << "XNNPACK subgraph invoke returned " << toString(status);
if (status == V1_3::ErrorStatus::NONE) {
VLOG(DRIVER) << "Completed run normally";
for (auto& runtimeInfo : requestPoolInfos) {
runtimeInfo.flush();
}
}
notify(callback, status, {}, kNoTiming);
}
template <typename T_IExecutionCallback>
V1_3::ErrorStatus executeXNNPACKBase(Subgraph* subgraph, RunTimeOperandInfo* operands,
const V1_3::Request& request, V1_2::MeasureTiming measure,
const V1_3::Model& model,
const V1_3::OptionalTimePoint& halDeadline,
const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
const sp<T_IExecutionCallback>& callback) {
VLOG(DRIVER) << "executeXNNPACKBase(" << SHOW_IF_DEBUG(toString(request)) << ")";
if (callback.get() == nullptr) {
LOG(ERROR) << "invalid callback passed to executeXNNPACKBase";
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
if (!validateRequest(request, model, /*allowUnspecifiedOutput=*/false)) {
notify(callback, V1_3::ErrorStatus::INVALID_ARGUMENT, {}, kNoTiming);
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
const auto deadline = makeDeadline(halDeadline);
if (hasDeadlinePassed(deadline)) {
notify(callback, V1_3::ErrorStatus::MISSED_DEADLINE_PERSISTENT, {}, kNoTiming);
return V1_3::ErrorStatus::NONE;
}
// This thread is intentionally detached because the sample driver service
// is expected to live forever.
std::thread([&subgraph, &operands, &model, request, measure, deadline, loopTimeoutDuration,
callback] {
asyncExecuteXNNPACK(subgraph, operands, request, measure, model, deadline,
loopTimeoutDuration, callback);
}).detach();
return V1_3::ErrorStatus::NONE;
}
hardware::Return<V1_0::ErrorStatus> SamplePreparedModelXNNPACK::execute(
const V1_0::Request& request, const sp<V1_0::IExecutionCallback>& callback) {
const V1_3::Model* model = getModel();
const V1_3::ErrorStatus status =
executeXNNPACKBase(mSubgraph, mOperands.data(), convertToV1_3(request),
V1_2::MeasureTiming::NO, *model, {}, {}, callback);
return convertToV1_0(status);
}
hardware::Return<V1_0::ErrorStatus> SamplePreparedModelXNNPACK::execute_1_2(
const V1_0::Request& request, V1_2::MeasureTiming measure,
const sp<V1_2::IExecutionCallback>& callback) {
const V1_3::Model* model = getModel();
const V1_3::ErrorStatus status = executeXNNPACKBase(
mSubgraph, mOperands.data(), convertToV1_3(request), measure, *model, {}, {}, callback);
return convertToV1_0(status);
}
hardware::Return<V1_3::ErrorStatus> SamplePreparedModelXNNPACK::execute_1_3(
const V1_3::Request& request, V1_2::MeasureTiming measure,
const V1_3::OptionalTimePoint& deadline,
const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
const sp<V1_3::IExecutionCallback>& callback) {
const V1_3::Model* model = getModel();
return executeXNNPACKBase(mSubgraph, mOperands.data(), request, measure, *model, deadline,
loopTimeoutDuration, callback);
}
static std::tuple<V1_3::ErrorStatus, hardware::hidl_vec<V1_2::OutputShape>, V1_2::Timing>
executeSynchronouslyXNNPACKBase(Subgraph* subgraph, RunTimeOperandInfo* operands,
const V1_3::Request& request, V1_2::MeasureTiming measure,
const V1_3::Model& model,
const V1_3::OptionalTimePoint& halDeadline,
const V1_3::OptionalTimeoutDuration& loopTimeoutDuration) {
VLOG(DRIVER) << "executeSynchronouslyXNNPACKBase(" << SHOW_IF_DEBUG(toString(request)) << ")";
if (!validateRequest(request, model, /*allowUnspecifiedOutput=*/false)) {
return {V1_3::ErrorStatus::INVALID_ARGUMENT, {}, kNoTiming};
}
const auto deadline = makeDeadline(halDeadline);
if (hasDeadlinePassed(deadline)) {
return {V1_3::ErrorStatus::MISSED_DEADLINE_PERSISTENT, {}, kNoTiming};
}
std::vector<RunTimePoolInfo> requestPoolInfos;
if (!setRunTimePoolInfosFromMemoryPools(&requestPoolInfos, uncheckedConvert(request.pools))) {
return {V1_3::ErrorStatus::GENERAL_FAILURE, {}, kNoTiming};
}
updateForArguments(model.main.inputIndexes, request.inputs, requestPoolInfos, operands);
updateForArguments(model.main.outputIndexes, request.outputs, requestPoolInfos, operands);
VLOG(DRIVER) << "XNNPACK subgraph invoke started";
auto status = subgraph->Invoke(operands);
VLOG(DRIVER) << "XNNPACK subgraph invoke returned " << toString(status);
if (status == V1_3::ErrorStatus::NONE) {
VLOG(DRIVER) << "Completed run normally";
for (auto& runtimeInfo : requestPoolInfos) {
runtimeInfo.flush();
}
}
return {status, {}, kNoTiming};
}
hardware::Return<void> SamplePreparedModelXNNPACK::executeSynchronously(
const V1_0::Request& request, V1_2::MeasureTiming measure, executeSynchronously_cb cb) {
const V1_3::Model* model = getModel();
auto [status, outputShapes, timing] = executeSynchronouslyXNNPACKBase(
mSubgraph, mOperands.data(), convertToV1_3(request), measure, *model, {}, {});
cb(convertToV1_0(status), std::move(outputShapes), timing);
return hardware::Void();
}
hardware::Return<void> SamplePreparedModelXNNPACK::executeSynchronously_1_3(
const V1_3::Request& request, V1_2::MeasureTiming measure,
const V1_3::OptionalTimePoint& deadline,
const V1_3::OptionalTimeoutDuration& loopTimeoutDuration, executeSynchronously_1_3_cb cb) {
const V1_3::Model* model = getModel();
auto [status, outputShapes, timing] = executeSynchronouslyXNNPACKBase(
mSubgraph, mOperands.data(), request, measure, *model, deadline, loopTimeoutDuration);
cb(status, std::move(outputShapes), timing);
return hardware::Void();
}
// The sample driver will finish the execution and then return.
hardware::Return<void> SamplePreparedModelXNNPACK::executeFenced(
const V1_3::Request& request, const hardware::hidl_vec<hardware::hidl_handle>& waitFor,
V1_2::MeasureTiming measure, const V1_3::OptionalTimePoint& halDeadline,
const V1_3::OptionalTimeoutDuration& loopTimeoutDuration,
const V1_3::OptionalTimeoutDuration& duration, executeFenced_cb cb) {
VLOG(DRIVER) << "executeFenced(" << SHOW_IF_DEBUG(toString(request)) << ")";
const V1_3::Model* model = getModel();
if (!validateRequest(request, *model, /*allowUnspecifiedOutput=*/false)) {
cb(V1_3::ErrorStatus::INVALID_ARGUMENT, hardware::hidl_handle(nullptr), nullptr);
return hardware::Void();
}
const auto deadline = makeDeadline(halDeadline);
if (hasDeadlinePassed(deadline)) {
cb(V1_3::ErrorStatus::MISSED_DEADLINE_PERSISTENT, hardware::hidl_handle(nullptr), nullptr);
return hardware::Void();
}
// Wait for the dependent events to signal
for (const auto& fenceHandle : waitFor) {
if (!fenceHandle.getNativeHandle()) {
cb(V1_3::ErrorStatus::INVALID_ARGUMENT, hardware::hidl_handle(nullptr), nullptr);
return hardware::Void();
}
int syncFenceFd = fenceHandle.getNativeHandle()->data[0];
if (syncWait(syncFenceFd, -1) != FenceState::SIGNALED) {
LOG(ERROR) << "syncWait failed";
cb(V1_3::ErrorStatus::GENERAL_FAILURE, hardware::hidl_handle(nullptr), nullptr);
return hardware::Void();
}
}
std::vector<RunTimePoolInfo> requestPoolInfos;
if (!setRunTimePoolInfosFromMemoryPools(&requestPoolInfos, uncheckedConvert(request.pools))) {
cb(V1_3::ErrorStatus::GENERAL_FAILURE, hardware::hidl_handle(nullptr), nullptr);
}
updateForArguments(model->main.inputIndexes, request.inputs, requestPoolInfos,
mOperands.data());
updateForArguments(model->main.outputIndexes, request.outputs, requestPoolInfos,
mOperands.data());
auto status = mSubgraph->Invoke(mOperands.data());
VLOG(DRIVER) << "XNNPACK subgraph invoke returned " << toString(status);
if (status == V1_3::ErrorStatus::NONE) {
VLOG(DRIVER) << "Completed run normally";
for (auto& runtimeInfo : requestPoolInfos) {
runtimeInfo.flush();
}
}
sp<SampleFencedExecutionCallback> fencedExecutionCallback =
new SampleFencedExecutionCallback(kNoTiming, kNoTiming, status);
cb(status, hardware::hidl_handle(nullptr), fencedExecutionCallback);
return hardware::Void();
}
class SampleDriverFloatXNNPACK : public SampleDriverPartial {
public:
SampleDriverFloatXNNPACK() : SampleDriverPartial("nnapi-sample_float_xnnpack") {}
hardware::Return<void> getCapabilities_1_3(getCapabilities_1_3_cb cb) override;
hardware::Return<V1_0::ErrorStatus> prepareModel(
const V1_0::Model& model, const sp<V1_0::IPreparedModelCallback>& callback) override;
hardware::Return<V1_0::ErrorStatus> prepareModel_1_1(
const V1_1::Model& model, V1_1::ExecutionPreference preference,
const sp<V1_0::IPreparedModelCallback>& callback) override;
hardware::Return<V1_0::ErrorStatus> prepareModel_1_2(
const V1_2::Model& model, V1_1::ExecutionPreference preference,
const hardware::hidl_vec<hardware::hidl_handle>& modelCache,
const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token,
const sp<V1_2::IPreparedModelCallback>& callback) override;
hardware::Return<V1_3::ErrorStatus> prepareModel_1_3(
const V1_3::Model& model, V1_1::ExecutionPreference preference, V1_3::Priority priority,
const V1_3::OptionalTimePoint& deadline,
const hardware::hidl_vec<hardware::hidl_handle>& modelCache,
const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token,
const sp<V1_3::IPreparedModelCallback>& callback) override;
hardware::Return<void> allocate(
const V1_3::BufferDesc& desc,
const hardware::hidl_vec<sp<V1_3::IPreparedModel>>& preparedModels,
const hardware::hidl_vec<V1_3::BufferRole>& inputRoles,
const hardware::hidl_vec<V1_3::BufferRole>& outputRoles, allocate_cb cb) override;
private:
std::vector<bool> getSupportedOperationsImpl(const V1_3::Model& model) const override;
};
template <typename T_Model, typename T_IPreparedModelCallback>
V1_3::ErrorStatus prepareModelXNNPACK(const T_Model& model, const SampleDriver* driver,
V1_1::ExecutionPreference preference, V1_3::Priority priority,
const V1_3::OptionalTimePoint& deadline,
const sp<T_IPreparedModelCallback>& callback) {
const uid_t userId = hardware::IPCThreadState::self()->getCallingUid();
if (callback.get() == nullptr) {
LOG(ERROR) << "invalid callback passed to prepareModelBase";
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
if (VLOG_IS_ON(DRIVER)) {
VLOG(DRIVER) << "prepareModelBase";
logModelToInfo(model);
}
if (!validateModel(model) || !validateExecutionPreference(preference) ||
!validatePriority(priority)) {
notify(callback, V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr);
return V1_3::ErrorStatus::INVALID_ARGUMENT;
}
// asynchronously prepare the model from a new, detached thread
std::thread([model, driver, preference, userId, priority, callback] {
sp<SamplePreparedModelXNNPACK> preparedModel = new SamplePreparedModelXNNPACK(
convertToV1_3(model), driver, preference, userId, priority);
if (!preparedModel->initialize()) {
notify(callback, V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr);
return;
}
notify(callback, V1_3::ErrorStatus::NONE, preparedModel);
}).detach();
return V1_3::ErrorStatus::NONE;
}
hardware::Return<V1_0::ErrorStatus> SampleDriverFloatXNNPACK::prepareModel(
const V1_0::Model& model, const sp<V1_0::IPreparedModelCallback>& callback) {
const V1_3::ErrorStatus status =
prepareModelXNNPACK(model, this, V1_1::ExecutionPreference::FAST_SINGLE_ANSWER,
kDefaultPriority13, {}, callback);
return convertToV1_0(status);
}
hardware::Return<V1_0::ErrorStatus> SampleDriverFloatXNNPACK::prepareModel_1_1(
const V1_1::Model& model, V1_1::ExecutionPreference preference,
const sp<V1_0::IPreparedModelCallback>& callback) {
const V1_3::ErrorStatus status =
prepareModelXNNPACK(model, this, preference, kDefaultPriority13, {}, callback);
return convertToV1_0(status);
}
hardware::Return<V1_0::ErrorStatus> SampleDriverFloatXNNPACK::prepareModel_1_2(
const V1_2::Model& model, V1_1::ExecutionPreference preference,
const hardware::hidl_vec<hardware::hidl_handle>&,
const hardware::hidl_vec<hardware::hidl_handle>&, const HalCacheToken&,
const sp<V1_2::IPreparedModelCallback>& callback) {
const V1_3::ErrorStatus status =
prepareModelXNNPACK(model, this, preference, kDefaultPriority13, {}, callback);
return convertToV1_0(status);
}
hardware::Return<V1_3::ErrorStatus> SampleDriverFloatXNNPACK::prepareModel_1_3(
const V1_3::Model& model, V1_1::ExecutionPreference preference, V1_3::Priority priority,
const V1_3::OptionalTimePoint& deadline,
const hardware::hidl_vec<hardware::hidl_handle>& modelCache,
const hardware::hidl_vec<hardware::hidl_handle>& dataCache, const HalCacheToken& token,
const sp<V1_3::IPreparedModelCallback>& callback) {
return prepareModelXNNPACK(model, this, preference, priority, deadline, callback);
}
hardware::Return<void> SampleDriverFloatXNNPACK::getCapabilities_1_3(getCapabilities_1_3_cb cb) {
android::nn::initVLogMask();
VLOG(DRIVER) << "SampleDriverFloatXNNPACK::getCapabilities()";
V1_3::Capabilities capabilities = {
.relaxedFloat32toFloat16PerformanceScalar = {.execTime = 0.7f, .powerUsage = 1.1f},
.relaxedFloat32toFloat16PerformanceTensor = {.execTime = 0.7f, .powerUsage = 1.1f},
.operandPerformance = nonExtensionOperandPerformance<HalVersion::V1_3>({1.0f, 1.0f}),
.ifPerformance = {.execTime = 1.0f, .powerUsage = 1.0f},
.whilePerformance = {.execTime = 1.0f, .powerUsage = 1.0f}};
update(&capabilities.operandPerformance, V1_3::OperandType::TENSOR_FLOAT32,
{.execTime = 0.8f, .powerUsage = 1.2f});
update(&capabilities.operandPerformance, V1_3::OperandType::FLOAT32,
{.execTime = 0.8f, .powerUsage = 1.2f});
cb(V1_3::ErrorStatus::NONE, capabilities);
return hardware::Void();
}
std::vector<bool> SampleDriverFloatXNNPACK::getSupportedOperationsImpl(
const V1_3::Model& model) const {
std::vector<RunTimePoolInfo> poolInfos;
setRunTimePoolInfosFromCanonicalMemories(&poolInfos, uncheckedConvert(model.pools));
auto operands = initializeRunTimeInfo(model.main, poolInfos, &model.operandValues);
const size_t count = model.main.operations.size();
std::vector<bool> supported(count);
for (size_t i = 0; i < count; i++) {
bool isSupportedOp = false;
const V1_3::Operation& operation = model.main.operations[i];
if (Subgraph::VisitNode(/*subgraph=*/nullptr, operation, operands.data(), {}) ==
V1_3::ErrorStatus::NONE) {
isSupportedOp = true;
}
supported[i] = isSupportedOp;
}
return supported;
}
hardware::Return<void> SampleDriverFloatXNNPACK::allocate(
const V1_3::BufferDesc& desc,
const hardware::hidl_vec<sp<V1_3::IPreparedModel>>& preparedModels,
const hardware::hidl_vec<V1_3::BufferRole>& inputRoles,
const hardware::hidl_vec<V1_3::BufferRole>& outputRoles, allocate_cb cb) {
VLOG(DRIVER) << "SampleDriverFloatXNNPACK::allocate not supported";
constexpr uint32_t kInvalidBufferToken = 0;
cb(V1_3::ErrorStatus::INVALID_ARGUMENT, nullptr, kInvalidBufferToken);
return hardware::Void();
}
} // namespace sample_driver
} // namespace nn
} // namespace android
using android::sp;
using android::nn::sample_driver::SampleDriverFloatXNNPACK;
int main() {
sp<SampleDriverFloatXNNPACK> driver(new SampleDriverFloatXNNPACK());
xnn_status status = xnn_initialize(/*allocator=*/nullptr);
if (status != xnn_status_success) {
return 0;
}
return driver->run();
}