You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

459 lines
21 KiB

/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#define LOG_TAG "Operations"
#include <vector>
#include "OperationResolver.h"
#include "Tracing.h"
#include "nnapi/Validation.h"
#ifdef NN_INCLUDE_CPU_IMPLEMENTATION
#include <tensorflow/lite/kernels/internal/optimized/optimized_ops.h>
#include <tensorflow/lite/kernels/internal/reference/integer_ops/pooling.h>
#include "CpuOperationUtils.h"
#endif // NN_INCLUDE_CPU_IMPLEMENTATION
namespace android {
namespace nn {
namespace pooling {
constexpr uint32_t kInputTensor = 0;
constexpr uint32_t kNumOutputs = 1;
constexpr uint32_t kOutputTensor = 0;
#ifdef NN_INCLUDE_CPU_IMPLEMENTATION
namespace {
struct PoolingParam {
int32_t padding_left, padding_right;
int32_t padding_top, padding_bottom;
int32_t stride_width, stride_height;
int32_t filter_width, filter_height;
int32_t activation;
bool useNchw = false;
bool initialize(const IOperationExecutionContext* context) {
uint32_t inCount = context->getNumInputs();
int32_t padding_implicit = 0;
if (inCount >= 10) {
padding_left = context->getInputValue<int32_t>(1);
padding_right = context->getInputValue<int32_t>(2);
padding_top = context->getInputValue<int32_t>(3);
padding_bottom = context->getInputValue<int32_t>(4);
stride_width = context->getInputValue<int32_t>(5);
stride_height = context->getInputValue<int32_t>(6);
filter_width = context->getInputValue<int32_t>(7);
filter_height = context->getInputValue<int32_t>(8);
activation = context->getInputValue<int32_t>(9);
if (inCount == 11) {
useNchw = context->getInputValue<bool>(10);
}
} else {
padding_implicit = context->getInputValue<int32_t>(1);
stride_width = context->getInputValue<int32_t>(2);
stride_height = context->getInputValue<int32_t>(3);
filter_width = context->getInputValue<int32_t>(4);
filter_height = context->getInputValue<int32_t>(5);
activation = context->getInputValue<int32_t>(6);
if (inCount == 8) {
useNchw = context->getInputValue<bool>(7);
}
}
if (inCount <= 8) {
Shape inputShape = context->getInputShape(kInputTensor);
int32_t input_height = getSizeOfDimension(inputShape, useNchw ? 2 : 1);
int32_t input_width = getSizeOfDimension(inputShape, useNchw ? 3 : 2);
calculateExplicitPadding(input_width, stride_width, filter_width, padding_implicit,
&padding_left, &padding_right);
calculateExplicitPadding(input_height, stride_height, filter_height, padding_implicit,
&padding_top, &padding_bottom);
}
NN_RET_CHECK_GE(padding_left, 0);
NN_RET_CHECK_GE(padding_right, 0);
NN_RET_CHECK_GE(padding_top, 0);
NN_RET_CHECK_GE(padding_bottom, 0);
NN_RET_CHECK_GT(stride_width, 0);
NN_RET_CHECK_GT(stride_height, 0);
NN_RET_CHECK_GT(filter_width, 0);
NN_RET_CHECK_GT(filter_height, 0);
NN_RET_CHECK_GE(activation, 0);
NN_RET_CHECK_GT(filter_width, padding_left);
NN_RET_CHECK_GT(filter_width, padding_right);
NN_RET_CHECK_GT(filter_height, padding_top);
NN_RET_CHECK_GT(filter_height, padding_bottom);
return true;
}
tflite::PoolParams toTfliteParam(const Shape& output) const {
tflite::PoolParams params = {
.padding_values = {.width = static_cast<int16_t>(padding_left),
.height = static_cast<int16_t>(padding_top),
.width_offset = 0,
.height_offset = 0},
.stride_height = stride_height,
.stride_width = stride_width,
.filter_height = filter_height,
.filter_width = filter_width,
};
if (output.type == OperandType::TENSOR_QUANT8_ASYMM) {
int32_t output_activation_min = 0;
int32_t output_activation_max = 0;
CalculateActivationRangeUint8(activation, output, &output_activation_min,
&output_activation_max);
params.quantized_activation_min = output_activation_min;
params.quantized_activation_max = output_activation_max;
} else if (output.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
int32_t output_activation_min = 0;
int32_t output_activation_max = 0;
CalculateActivationRangeInt8(activation, output, &output_activation_min,
&output_activation_max);
params.quantized_activation_min = output_activation_min;
params.quantized_activation_max = output_activation_max;
} else {
float output_activation_min, output_activation_max;
CalculateActivationRangeFloat(activation, &output_activation_min,
&output_activation_max);
params.float_activation_min = output_activation_min;
params.float_activation_max = output_activation_max;
}
return params;
}
};
bool averagePoolNhwc(const float* inputData, const Shape& inputShape, const PoolingParam& param,
float* outputData, const Shape& outputShape) {
NNTRACE_TRANS("averagePoolFloat32");
auto op_params = param.toTfliteParam(outputShape);
NNTRACE_COMP_SWITCH("optimized_ops::AveragePool");
tflite::optimized_ops::AveragePool(op_params, convertShapeToTflshape(inputShape), inputData,
convertShapeToTflshape(outputShape), outputData);
return true;
}
bool averagePoolNhwc(const _Float16* inputData, const Shape& inputShape, const PoolingParam& param,
_Float16* outputData, const Shape& outputShape) {
NNTRACE_TRANS("averagePoolFloat16");
std::vector<float> inputDataFloat32(getNumberOfElements(inputShape));
std::vector<float> outputDataFloat32(getNumberOfElements(outputShape));
convertFloat16ToFloat32(inputData, &inputDataFloat32);
averagePoolNhwc(inputDataFloat32.data(), inputShape, param, outputDataFloat32.data(),
outputShape);
convertFloat32ToFloat16(outputDataFloat32, outputData);
return true;
}
bool averagePoolNhwc(const uint8_t* inputData, const Shape& inputShape, const PoolingParam& param,
uint8_t* outputData, const Shape& outputShape) {
NNTRACE_TRANS("averagePoolQuant8");
auto op_params = param.toTfliteParam(outputShape);
NNTRACE_COMP_SWITCH("optimized_ops::AveragePool");
tflite::optimized_ops::AveragePool(op_params, convertShapeToTflshape(inputShape), inputData,
convertShapeToTflshape(outputShape), outputData);
return true;
}
bool averagePoolNhwc(const int8_t* inputData, const Shape& inputShape, const PoolingParam& param,
int8_t* outputData, const Shape& outputShape) {
NNTRACE_TRANS("averagePoolQuant8Signed");
auto op_params = param.toTfliteParam(outputShape);
NNTRACE_COMP_SWITCH("optimized_integer_ops::AveragePool");
// We are using reference implementation of the AveragePool op because the
// optimized version fails to pass some of the quantization coupling tests.
tflite::reference_integer_ops::AveragePool(op_params, convertShapeToTflshape(inputShape),
inputData, convertShapeToTflshape(outputShape),
outputData);
return true;
}
bool l2PoolNhwc(const float* inputData, const Shape& inputShape, const PoolingParam& param,
float* outputData, const Shape& outputShape) {
NNTRACE_TRANS("l2PoolFloat32");
auto op_params = param.toTfliteParam(outputShape);
NNTRACE_COMP_SWITCH("optimized_ops::L2Pool");
tflite::optimized_ops::L2Pool(op_params, convertShapeToTflshape(inputShape), inputData,
convertShapeToTflshape(outputShape), outputData);
return true;
}
bool l2PoolNhwc(const _Float16* inputData, const Shape& inputShape, const PoolingParam& param,
_Float16* outputData, const Shape& outputShape) {
NNTRACE_TRANS("l2PoolFloat16");
std::vector<float> inputDataFloat32(getNumberOfElements(inputShape));
std::vector<float> outputDataFloat32(getNumberOfElements(outputShape));
convertFloat16ToFloat32(inputData, &inputDataFloat32);
l2PoolNhwc(inputDataFloat32.data(), inputShape, param, outputDataFloat32.data(), outputShape);
convertFloat32ToFloat16(outputDataFloat32, outputData);
return true;
}
bool maxPoolNhwc(const float* inputData, const Shape& inputShape, const PoolingParam& param,
float* outputData, const Shape& outputShape) {
NNTRACE_TRANS("maxPoolFloat32");
auto op_params = param.toTfliteParam(outputShape);
NNTRACE_COMP_SWITCH("optimized_ops::MaxPool");
tflite::optimized_ops::MaxPool(op_params, convertShapeToTflshape(inputShape), inputData,
convertShapeToTflshape(outputShape), outputData);
return true;
}
bool maxPoolNhwc(const uint8_t* inputData, const Shape& inputShape, const PoolingParam& param,
uint8_t* outputData, const Shape& outputShape) {
NNTRACE_TRANS("maxPoolQuant8");
auto op_params = param.toTfliteParam(outputShape);
NNTRACE_COMP_SWITCH("optimized_ops::MaxPool");
tflite::optimized_ops::MaxPool(op_params, convertShapeToTflshape(inputShape), inputData,
convertShapeToTflshape(outputShape), outputData);
return true;
}
bool maxPoolNhwc(const int8_t* inputData, const Shape& inputShape, const PoolingParam& param,
int8_t* outputData, const Shape& outputShape) {
NNTRACE_TRANS("maxPoolQuant8Signed");
auto op_params = param.toTfliteParam(outputShape);
NNTRACE_COMP_SWITCH("optimized_integer_ops::MaxPool");
// We are using reference implementation of the MaxPool op because the
// optimized version fails to pass some of the quantization coupling tests.
tflite::reference_integer_ops::MaxPool(op_params, convertShapeToTflshape(inputShape), inputData,
convertShapeToTflshape(outputShape), outputData);
return true;
}
bool maxPoolNhwc(const _Float16* inputData, const Shape& inputShape, const PoolingParam& param,
_Float16* outputData, const Shape& outputShape) {
NNTRACE_TRANS("maxPoolFloat16");
std::vector<float> inputData_float32(getNumberOfElements(inputShape));
std::vector<float> outputData_float32(getNumberOfElements(outputShape));
convertFloat16ToFloat32(inputData, &inputData_float32);
maxPoolNhwc(inputData_float32.data(), inputShape, param, outputData_float32.data(),
outputShape);
convertFloat32ToFloat16(outputData_float32, outputData);
return true;
}
template <typename T>
bool averagePool(const T* inputData, const Shape& inputShape, const PoolingParam& param,
T* outputData, const Shape& outputShape) {
InputWithLayout<T> input(param.useNchw);
OutputWithLayout<T> output(param.useNchw);
NN_RET_CHECK(input.initialize(inputData, inputShape));
NN_RET_CHECK(output.initialize(outputData, outputShape));
NN_RET_CHECK(averagePoolNhwc(input.getNhwcBuffer(), input.getNhwcShape(), param,
output.getNhwcBuffer(), output.getNhwcShape()));
NN_RET_CHECK(output.commit());
return true;
}
template <typename T>
bool l2Pool(const T* inputData, const Shape& inputShape, const PoolingParam& param, T* outputData,
const Shape& outputShape) {
InputWithLayout<T> input(param.useNchw);
OutputWithLayout<T> output(param.useNchw);
NN_RET_CHECK(input.initialize(inputData, inputShape));
NN_RET_CHECK(output.initialize(outputData, outputShape));
NN_RET_CHECK(l2PoolNhwc(input.getNhwcBuffer(), input.getNhwcShape(), param,
output.getNhwcBuffer(), output.getNhwcShape()));
NN_RET_CHECK(output.commit());
return true;
}
template <typename T>
bool maxPool(const T* inputData, const Shape& inputShape, const PoolingParam& param, T* outputData,
const Shape& outputShape) {
InputWithLayout<T> input(param.useNchw);
OutputWithLayout<T> output(param.useNchw);
NN_RET_CHECK(input.initialize(inputData, inputShape));
NN_RET_CHECK(output.initialize(outputData, outputShape));
NN_RET_CHECK(maxPoolNhwc(input.getNhwcBuffer(), input.getNhwcShape(), param,
output.getNhwcBuffer(), output.getNhwcShape()));
NN_RET_CHECK(output.commit());
return true;
}
} // namespace
#endif // NN_INCLUDE_CPU_IMPLEMENTATION
Result<Version> validate(OperationType opType, const IOperationValidationContext* context) {
NN_RET_CHECK_EQ(context->getNumOutputs(), kNumOutputs);
auto inputCount = context->getNumInputs();
NN_RET_CHECK(inputCount == 11 || inputCount == 10 || inputCount == 8 || inputCount == 7);
auto inputType = context->getInputType(kInputTensor);
std::vector<OperandType> inExpectedTypes;
auto minSupportedVersion = Version::ANDROID_OC_MR1;
if (inputType == OperandType::TENSOR_FLOAT32) {
minSupportedVersion = Version::ANDROID_OC_MR1;
inExpectedTypes = {
inputType, OperandType::INT32, OperandType::INT32, OperandType::INT32,
OperandType::INT32, OperandType::INT32, OperandType::INT32,
};
} else if (inputType == OperandType::TENSOR_FLOAT16) {
minSupportedVersion = Version::ANDROID_Q;
inExpectedTypes = {
OperandType::TENSOR_FLOAT16, OperandType::INT32, OperandType::INT32,
OperandType::INT32, OperandType::INT32, OperandType::INT32,
OperandType::INT32,
};
} else if (opType != OperationType::L2_POOL_2D &&
inputType == OperandType::TENSOR_QUANT8_ASYMM) {
minSupportedVersion = Version::ANDROID_OC_MR1;
inExpectedTypes = {
OperandType::TENSOR_QUANT8_ASYMM,
OperandType::INT32,
OperandType::INT32,
OperandType::INT32,
OperandType::INT32,
OperandType::INT32,
OperandType::INT32,
};
} else if (opType != OperationType::L2_POOL_2D &&
inputType == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
minSupportedVersion = Version::ANDROID_R;
inExpectedTypes = {
OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
OperandType::INT32,
OperandType::INT32,
OperandType::INT32,
OperandType::INT32,
OperandType::INT32,
OperandType::INT32,
};
} else {
NN_RET_CHECK_FAIL() << "Unsupported input tensor type for operation " << opType;
}
if (inputCount >= 10) {
std::vector<OperandType> explicitScalarTypes(3, OperandType::INT32);
inExpectedTypes.insert(inExpectedTypes.end(), explicitScalarTypes.begin(),
explicitScalarTypes.end());
}
if (inputCount == 11 || inputCount == 8) {
inExpectedTypes.push_back(OperandType::BOOL);
minSupportedVersion = combineVersions(minSupportedVersion, Version::ANDROID_Q);
} else {
minSupportedVersion = combineVersions(minSupportedVersion, Version::ANDROID_OC_MR1);
}
NN_RET_CHECK(validateInputTypes(context, inExpectedTypes));
NN_RET_CHECK(validateOutputTypes(context, {inputType}));
return minSupportedVersion;
}
#ifdef NN_INCLUDE_CPU_IMPLEMENTATION
bool prepare(IOperationExecutionContext* context) {
Shape input = context->getInputShape(kInputTensor);
NN_RET_CHECK_EQ(getNumberOfDimensions(input), 4);
PoolingParam param;
NN_RET_CHECK(param.initialize(context));
// Only batches can be zero.
uint32_t batches = getSizeOfDimension(input, 0);
uint32_t height = getSizeOfDimension(input, param.useNchw ? 2 : 1);
uint32_t width = getSizeOfDimension(input, param.useNchw ? 3 : 2);
uint32_t channels = getSizeOfDimension(input, param.useNchw ? 1 : 3);
NN_RET_CHECK_GT(height, 0);
NN_RET_CHECK_GT(width, 0);
NN_RET_CHECK_GT(channels, 0);
uint32_t outWidth = computeOutSize(width, param.filter_width, param.stride_width,
param.padding_left, param.padding_right);
uint32_t outHeight = computeOutSize(height, param.filter_height, param.stride_height,
param.padding_top, param.padding_bottom);
Shape output = input;
if (param.useNchw) {
output.dimensions = {batches, channels, outHeight, outWidth};
} else {
output.dimensions = {batches, outHeight, outWidth, channels};
}
return context->setOutputShape(kOutputTensor, output);
}
#define POOLING_DISPATCH_INPUT_TYPE(name, type, cppType) \
case OperandType::type: \
return name(context->getInputBuffer<cppType>(kInputTensor), \
context->getInputShape(kInputTensor), param, \
context->getOutputBuffer<cppType>(kOutputTensor), \
context->getOutputShape(kOutputTensor))
bool executeAveragePool(IOperationExecutionContext* context) {
// Bypass execution in the case of zero-sized input.
if (getNumberOfElements(context->getOutputShape(kOutputTensor)) == 0) return true;
PoolingParam param;
NN_RET_CHECK(param.initialize(context));
switch (context->getInputType(kInputTensor)) {
POOLING_DISPATCH_INPUT_TYPE(averagePool, TENSOR_FLOAT32, float);
POOLING_DISPATCH_INPUT_TYPE(averagePool, TENSOR_FLOAT16, _Float16);
POOLING_DISPATCH_INPUT_TYPE(averagePool, TENSOR_QUANT8_ASYMM, uint8_t);
POOLING_DISPATCH_INPUT_TYPE(averagePool, TENSOR_QUANT8_ASYMM_SIGNED, int8_t);
default:
NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation AVERAGE_POOL_2D";
}
}
bool executeL2Pool(IOperationExecutionContext* context) {
// Bypass execution in the case of zero-sized input.
if (getNumberOfElements(context->getOutputShape(kOutputTensor)) == 0) return true;
PoolingParam param;
NN_RET_CHECK(param.initialize(context));
switch (context->getInputType(kInputTensor)) {
POOLING_DISPATCH_INPUT_TYPE(l2Pool, TENSOR_FLOAT32, float);
POOLING_DISPATCH_INPUT_TYPE(l2Pool, TENSOR_FLOAT16, _Float16);
default:
NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation L2_POOL_2D";
}
}
bool executeMaxPool(IOperationExecutionContext* context) {
// Bypass execution in the case of zero-sized input.
if (getNumberOfElements(context->getOutputShape(kOutputTensor)) == 0) return true;
PoolingParam param;
NN_RET_CHECK(param.initialize(context));
switch (context->getInputType(kInputTensor)) {
POOLING_DISPATCH_INPUT_TYPE(maxPool, TENSOR_FLOAT32, float);
POOLING_DISPATCH_INPUT_TYPE(maxPool, TENSOR_FLOAT16, _Float16);
POOLING_DISPATCH_INPUT_TYPE(maxPool, TENSOR_QUANT8_ASYMM, uint8_t);
POOLING_DISPATCH_INPUT_TYPE(maxPool, TENSOR_QUANT8_ASYMM_SIGNED, int8_t);
default:
NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation MAX_POOL_2D";
}
}
#endif // NN_INCLUDE_CPU_IMPLEMENTATION
#undef POOLING_DISPATCH_INPUT_TYPE
} // namespace pooling
using std::placeholders::_1;
NN_REGISTER_OPERATION(AVERAGE_POOL_2D, "AVERAGE_POOL_2D",
std::bind(pooling::validate, OperationType::AVERAGE_POOL_2D, _1),
pooling::prepare, pooling::executeAveragePool, .allowZeroSizedInput = true);
NN_REGISTER_OPERATION(L2_POOL_2D, "L2_POOL_2D",
std::bind(pooling::validate, OperationType::L2_POOL_2D, _1), pooling::prepare,
pooling::executeL2Pool, .allowZeroSizedInput = true);
NN_REGISTER_OPERATION(MAX_POOL_2D, "MAX_POOL_2D",
std::bind(pooling::validate, OperationType::MAX_POOL_2D, _1),
pooling::prepare, pooling::executeMaxPool, .allowZeroSizedInput = true);
} // namespace nn
} // namespace android