You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
362 lines
17 KiB
362 lines
17 KiB
/*
|
|
* Copyright (C) 2017 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#define LOG_TAG "Operations"
|
|
|
|
#include <vector>
|
|
|
|
#include "OperationResolver.h"
|
|
#include "Tracing.h"
|
|
|
|
#ifdef NN_INCLUDE_CPU_IMPLEMENTATION
|
|
#include <tensorflow/lite/kernels/internal/optimized/legacy_optimized_ops.h>
|
|
#include <tensorflow/lite/kernels/internal/reference/integer_ops/fully_connected.h>
|
|
#include <tensorflow/lite/kernels/internal/reference/reference_ops.h>
|
|
#include <tensorflow/lite/kernels/internal/types.h>
|
|
|
|
#include "CpuOperationUtils.h"
|
|
#endif // NN_INCLUDE_CPU_IMPLEMENTATION
|
|
|
|
namespace android {
|
|
namespace nn {
|
|
namespace fully_connected {
|
|
|
|
constexpr char kOperationName[] = "FULLY_CONNECTED";
|
|
|
|
constexpr uint32_t kNumInputs = 4;
|
|
constexpr uint32_t kInputTensor = 0;
|
|
constexpr uint32_t kWeightsTensor = 1;
|
|
constexpr uint32_t kBiasTensor = 2;
|
|
constexpr uint32_t kActivationScalar = 3;
|
|
|
|
constexpr uint32_t kNumOutputs = 1;
|
|
constexpr uint32_t kOutputTensor = 0;
|
|
|
|
namespace {
|
|
|
|
#ifdef NN_INCLUDE_CPU_IMPLEMENTATION
|
|
// executionMutex is used to protect concurrent access of non-threadsafe resources
|
|
// like gemmlowp::GemmContext.
|
|
// std::mutex is safe for pthreads on Android.
|
|
static std::mutex executionMutex;
|
|
|
|
bool fullyConnectedFloat32(const float* inputData, const Shape& inputShape,
|
|
const float* weightsData, const Shape& weightsShape,
|
|
const float* biasData, const Shape& biasShape, int32_t activation,
|
|
float* outputData, const Shape& outputShape) {
|
|
NNTRACE_TRANS("fullyConnectedFloat32");
|
|
float output_activation_min, output_activation_max;
|
|
CalculateActivationRangeFloat(activation, &output_activation_min, &output_activation_max);
|
|
|
|
// b/80425683, optimized implementation produces incorrect results when the
|
|
// number of input elements is the squre of batch_size.
|
|
uint32_t batch_size = getSizeOfDimension(outputShape, 0);
|
|
uint32_t input_n_elements = getNumberOfElements(inputShape);
|
|
if (batch_size * batch_size == input_n_elements) {
|
|
NNTRACE_COMP_SWITCH("reference_ops::FullyConnected");
|
|
tflite::reference_ops::FullyConnected(inputData, convertShapeToDims(inputShape),
|
|
weightsData, convertShapeToDims(weightsShape),
|
|
biasData, convertShapeToDims(biasShape),
|
|
output_activation_min, output_activation_max,
|
|
outputData, convertShapeToDims(outputShape));
|
|
} else {
|
|
NNTRACE_COMP_SWITCH("optimized_ops::FullyConnected");
|
|
tflite::optimized_ops::FullyConnected(inputData, convertShapeToDims(inputShape),
|
|
weightsData, convertShapeToDims(weightsShape),
|
|
biasData, convertShapeToDims(biasShape),
|
|
output_activation_min, output_activation_max,
|
|
outputData, convertShapeToDims(outputShape));
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool fullyConnectedFloat16(const _Float16* inputData, const Shape& inputShape,
|
|
const _Float16* weightsData, const Shape& weightsShape,
|
|
const _Float16* biasData, const Shape& biasShape, int32_t activation,
|
|
_Float16* outputData, const Shape& outputShape) {
|
|
NNTRACE_TRANS("fullyConnectedFloat16");
|
|
std::vector<float> inputDataFloat32(getNumberOfElements(inputShape));
|
|
convertFloat16ToFloat32(inputData, &inputDataFloat32);
|
|
std::vector<float> weightsDataFloat32(getNumberOfElements(weightsShape));
|
|
convertFloat16ToFloat32(weightsData, &weightsDataFloat32);
|
|
std::vector<float> biasDataFloat32(getNumberOfElements(biasShape));
|
|
convertFloat16ToFloat32(biasData, &biasDataFloat32);
|
|
|
|
std::vector<float> outputDataFloat32(getNumberOfElements(outputShape));
|
|
fullyConnectedFloat32(inputDataFloat32.data(), inputShape, weightsDataFloat32.data(),
|
|
weightsShape, biasDataFloat32.data(), biasShape, activation,
|
|
outputDataFloat32.data(), outputShape);
|
|
convertFloat32ToFloat16(outputDataFloat32, outputData);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool fullyConnectedQuant8(const uint8_t* inputData, const Shape& inputShape,
|
|
const uint8_t* weightsData, const Shape& weightsShape,
|
|
const int32_t* biasData, const Shape& biasShape, int32_t activation,
|
|
uint8_t* outputData, const Shape& outputShape) {
|
|
NNTRACE_TRANS("fullyConnectedQuant8");
|
|
int32_t inputOffset = -inputShape.offset;
|
|
int32_t weightsOffset = -weightsShape.offset;
|
|
int32_t outputOffset = outputShape.offset;
|
|
|
|
double realMultiplier = 0.0;
|
|
int32_t outputMultiplier = 0;
|
|
int32_t outputShift = 0;
|
|
int32_t outputActivationMin = 0;
|
|
int32_t outputActivationMax = 0;
|
|
|
|
NN_RET_CHECK(GetQuantizedConvolutionMultipler(inputShape, weightsShape, biasShape, outputShape,
|
|
&realMultiplier));
|
|
int exponent;
|
|
NN_RET_CHECK(QuantizeMultiplier(realMultiplier, &outputMultiplier, &exponent));
|
|
outputShift = -exponent;
|
|
CalculateActivationRangeUint8(activation, outputShape, &outputActivationMin,
|
|
&outputActivationMax);
|
|
|
|
static gemmlowp::GemmContext gemmContext;
|
|
|
|
// Prevent concurrent executions that access gemmContext.
|
|
std::unique_lock<std::mutex> lock(executionMutex);
|
|
// Alow gemmlowp automatically decide how many threads to use.
|
|
gemmContext.set_max_num_threads(0);
|
|
|
|
NNTRACE_COMP_SWITCH("optimized_ops::FullyConnected");
|
|
tflite::optimized_ops::FullyConnected(inputData, convertShapeToDims(inputShape), inputOffset,
|
|
weightsData, convertShapeToDims(weightsShape),
|
|
weightsOffset, biasData, convertShapeToDims(biasShape),
|
|
outputOffset, outputMultiplier, outputShift,
|
|
outputActivationMin, outputActivationMax, outputData,
|
|
convertShapeToDims(outputShape), &gemmContext);
|
|
|
|
return true;
|
|
}
|
|
|
|
bool fullyConnectedQuant8(const int8_t* inputData, const Shape& inputShape,
|
|
const int8_t* weightsData, const Shape& weightsShape,
|
|
const int32_t* biasData, const Shape& biasShape, int32_t activation,
|
|
int8_t* outputData, const Shape& outputShape) {
|
|
NNTRACE_TRANS("fullyConnectedQuant8Signed");
|
|
|
|
double realMultiplier = 0.0;
|
|
int32_t outputMultiplier = 0;
|
|
int32_t outputShift = 0;
|
|
int32_t outputActivationMin = 0;
|
|
int32_t outputActivationMax = 0;
|
|
|
|
NN_RET_CHECK(GetQuantizedConvolutionMultipler(inputShape, weightsShape, biasShape, outputShape,
|
|
&realMultiplier));
|
|
NN_RET_CHECK(QuantizeMultiplier(realMultiplier, &outputMultiplier, &outputShift));
|
|
CalculateActivationRangeInt8(activation, outputShape, &outputActivationMin,
|
|
&outputActivationMax);
|
|
|
|
tflite::FullyConnectedParams params;
|
|
params.input_offset = -inputShape.offset;
|
|
params.weights_offset = -weightsShape.offset;
|
|
params.output_offset = outputShape.offset;
|
|
params.output_multiplier = outputMultiplier;
|
|
params.output_shift = outputShift;
|
|
params.quantized_activation_min = outputActivationMin;
|
|
params.quantized_activation_max = outputActivationMax;
|
|
|
|
NNTRACE_COMP_SWITCH("reference_integer_ops::FullyConnected");
|
|
tflite::reference_integer_ops::FullyConnected(
|
|
params, convertShapeToTflshape(inputShape), inputData,
|
|
convertShapeToTflshape(weightsShape), weightsData, convertShapeToTflshape(biasShape),
|
|
biasData, convertShapeToTflshape(outputShape), outputData);
|
|
|
|
return true;
|
|
}
|
|
#endif // NN_INCLUDE_CPU_IMPLEMENTATION
|
|
|
|
bool validateShapes(const Shape& input, const Shape& weights, const Shape& bias,
|
|
Shape* output = nullptr) {
|
|
// Check all the parameters of tensor match within themselves and match the
|
|
// input configuration.
|
|
NN_RET_CHECK(weights.type == input.type);
|
|
if (input.type == OperandType::TENSOR_QUANT8_ASYMM ||
|
|
input.type == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
|
|
NN_RET_CHECK(bias.type == OperandType::TENSOR_INT32);
|
|
} else {
|
|
NN_RET_CHECK(bias.type == input.type);
|
|
}
|
|
// The Tensorflow fully connected layer specification says that input should
|
|
// be of at least rank 2, so we check. Tflite doesn't check.
|
|
NN_RET_CHECK_GE(getNumberOfDimensions(input), 2);
|
|
NN_RET_CHECK_LE(getNumberOfDimensions(input), 4);
|
|
NN_RET_CHECK_EQ(getNumberOfDimensions(weights), 2);
|
|
NN_RET_CHECK_EQ(getNumberOfDimensions(bias), 1);
|
|
uint32_t input_n_elements = getNumberOfElements(input);
|
|
uint32_t num_units = getSizeOfDimension(weights, 0);
|
|
uint32_t input_size = getSizeOfDimension(weights, 1);
|
|
uint32_t bias_len = getSizeOfDimension(bias, 0);
|
|
uint32_t batch_size = input_size == 0 ? 0 : input_n_elements / input_size;
|
|
if (batch_size != 0) {
|
|
NN_RET_CHECK_EQ(input_size * batch_size, input_n_elements);
|
|
}
|
|
if (num_units != 0 && bias_len != 0) {
|
|
NN_RET_CHECK_EQ(bias_len, num_units);
|
|
}
|
|
if (output != nullptr) {
|
|
// Only batch_size can be 0.
|
|
NN_RET_CHECK_GT(num_units, 0);
|
|
NN_RET_CHECK_GT(input_size, 0);
|
|
output->type = input.type;
|
|
output->dimensions = {batch_size, num_units};
|
|
}
|
|
return true;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
Result<Version> validate(const IOperationValidationContext* context) {
|
|
NN_RET_CHECK_EQ(context->getNumInputs(), kNumInputs);
|
|
NN_RET_CHECK_EQ(context->getNumOutputs(), kNumOutputs);
|
|
auto inputType = context->getInputType(kInputTensor);
|
|
std::vector<OperandType> inExpectedTypes;
|
|
std::vector<OperandType> outExpectedTypes;
|
|
auto minSupportedVersion = Version::ANDROID_OC_MR1;
|
|
if (inputType == OperandType::TENSOR_FLOAT32) {
|
|
minSupportedVersion = Version::ANDROID_OC_MR1;
|
|
inExpectedTypes = {
|
|
OperandType::TENSOR_FLOAT32,
|
|
OperandType::TENSOR_FLOAT32,
|
|
OperandType::TENSOR_FLOAT32,
|
|
OperandType::INT32,
|
|
};
|
|
} else if (inputType == OperandType::TENSOR_FLOAT16) {
|
|
minSupportedVersion = Version::ANDROID_Q;
|
|
inExpectedTypes = {
|
|
OperandType::TENSOR_FLOAT16,
|
|
OperandType::TENSOR_FLOAT16,
|
|
OperandType::TENSOR_FLOAT16,
|
|
OperandType::INT32,
|
|
};
|
|
} else if (inputType == OperandType::TENSOR_QUANT8_ASYMM) {
|
|
// NeuralNetworks.h specifies that ANEURALNETWORKS_FULLY_CONNECTED's output must
|
|
// meet "outputScale > inputScale * weightsScale" for the operand type
|
|
// ANEURALNETWORKS_TENSOR_QUANT8_ASYMM before API level 29.
|
|
const float inputScale = context->getInputShape(kInputTensor).scale;
|
|
const float weightsScale = context->getInputShape(kWeightsTensor).scale;
|
|
const float outputScale = context->getOutputShape(kOutputTensor).scale;
|
|
bool meetsQuantizedScaleConstraintBeforeV1_2 = (outputScale > inputScale * weightsScale);
|
|
|
|
if (!meetsQuantizedScaleConstraintBeforeV1_2) {
|
|
minSupportedVersion = Version::ANDROID_Q;
|
|
} else {
|
|
minSupportedVersion = Version::ANDROID_OC_MR1;
|
|
}
|
|
|
|
inExpectedTypes = {
|
|
OperandType::TENSOR_QUANT8_ASYMM,
|
|
OperandType::TENSOR_QUANT8_ASYMM,
|
|
OperandType::TENSOR_INT32,
|
|
OperandType::INT32,
|
|
};
|
|
} else if (inputType == OperandType::TENSOR_QUANT8_ASYMM_SIGNED) {
|
|
minSupportedVersion = Version::ANDROID_R;
|
|
|
|
inExpectedTypes = {
|
|
OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
|
|
OperandType::TENSOR_QUANT8_ASYMM_SIGNED,
|
|
OperandType::TENSOR_INT32,
|
|
OperandType::INT32,
|
|
};
|
|
} else {
|
|
NN_RET_CHECK_FAIL() << "Unsupported input tensor type for operation " << kOperationName;
|
|
}
|
|
NN_RET_CHECK(validateInputTypes(context, inExpectedTypes));
|
|
NN_RET_CHECK(validateOutputTypes(context, {inputType}));
|
|
|
|
Shape input = context->getInputShape(kInputTensor);
|
|
Shape weights = context->getInputShape(kWeightsTensor);
|
|
Shape bias = context->getInputShape(kBiasTensor);
|
|
if (hasKnownRank(input) && hasKnownRank(weights) && hasKnownRank(bias)) {
|
|
NN_RET_CHECK(validateShapes(input, weights, bias));
|
|
}
|
|
|
|
return minSupportedVersion;
|
|
}
|
|
|
|
#ifdef NN_INCLUDE_CPU_IMPLEMENTATION
|
|
bool prepare(IOperationExecutionContext* context) {
|
|
Shape input = context->getInputShape(kInputTensor);
|
|
Shape weights = context->getInputShape(kWeightsTensor);
|
|
Shape bias = context->getInputShape(kBiasTensor);
|
|
Shape output = context->getOutputShape(kOutputTensor);
|
|
NN_RET_CHECK(validateShapes(input, weights, bias, &output));
|
|
return context->setOutputShape(kOutputTensor, output);
|
|
}
|
|
|
|
bool execute(IOperationExecutionContext* context) {
|
|
// Bypass execution in the case of zero-sized input.
|
|
if (getNumberOfElements(context->getOutputShape(kOutputTensor)) == 0) return true;
|
|
switch (context->getInputType(kInputTensor)) {
|
|
case OperandType::TENSOR_FLOAT32:
|
|
return fullyConnectedFloat32(context->getInputBuffer<float>(kInputTensor),
|
|
context->getInputShape(kInputTensor),
|
|
context->getInputBuffer<float>(kWeightsTensor),
|
|
context->getInputShape(kWeightsTensor),
|
|
context->getInputBuffer<float>(kBiasTensor),
|
|
context->getInputShape(kBiasTensor),
|
|
context->getInputValue<int32_t>(kActivationScalar),
|
|
context->getOutputBuffer<float>(kOutputTensor),
|
|
context->getOutputShape(kOutputTensor));
|
|
case OperandType::TENSOR_FLOAT16:
|
|
return fullyConnectedFloat16(context->getInputBuffer<_Float16>(kInputTensor),
|
|
context->getInputShape(kInputTensor),
|
|
context->getInputBuffer<_Float16>(kWeightsTensor),
|
|
context->getInputShape(kWeightsTensor),
|
|
context->getInputBuffer<_Float16>(kBiasTensor),
|
|
context->getInputShape(kBiasTensor),
|
|
context->getInputValue<int32_t>(kActivationScalar),
|
|
context->getOutputBuffer<_Float16>(kOutputTensor),
|
|
context->getOutputShape(kOutputTensor));
|
|
case OperandType::TENSOR_QUANT8_ASYMM:
|
|
return fullyConnectedQuant8(context->getInputBuffer<uint8_t>(kInputTensor),
|
|
context->getInputShape(kInputTensor),
|
|
context->getInputBuffer<uint8_t>(kWeightsTensor),
|
|
context->getInputShape(kWeightsTensor),
|
|
context->getInputBuffer<int32_t>(kBiasTensor),
|
|
context->getInputShape(kBiasTensor),
|
|
context->getInputValue<int32_t>(kActivationScalar),
|
|
context->getOutputBuffer<uint8_t>(kOutputTensor),
|
|
context->getOutputShape(kOutputTensor));
|
|
case OperandType::TENSOR_QUANT8_ASYMM_SIGNED:
|
|
return fullyConnectedQuant8(context->getInputBuffer<int8_t>(kInputTensor),
|
|
context->getInputShape(kInputTensor),
|
|
context->getInputBuffer<int8_t>(kWeightsTensor),
|
|
context->getInputShape(kWeightsTensor),
|
|
context->getInputBuffer<int32_t>(kBiasTensor),
|
|
context->getInputShape(kBiasTensor),
|
|
context->getInputValue<int32_t>(kActivationScalar),
|
|
context->getOutputBuffer<int8_t>(kOutputTensor),
|
|
context->getOutputShape(kOutputTensor));
|
|
default:
|
|
NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
|
|
}
|
|
}
|
|
#endif // NN_INCLUDE_CPU_IMPLEMENTATION
|
|
|
|
} // namespace fully_connected
|
|
|
|
NN_REGISTER_OPERATION(FULLY_CONNECTED, fully_connected::kOperationName, fully_connected::validate,
|
|
fully_connected::prepare, fully_connected::execute,
|
|
.allowZeroSizedInput = true);
|
|
|
|
} // namespace nn
|
|
} // namespace android
|