v811_spc009/packages/modules/NeuralNetworks/common/operations/SVDF.cpp

/*
 * Copyright (C) 2017 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#define LOG_TAG "Operations"

#include "SVDF.h"

#include <algorithm>
#include <vector>

#include "CpuExecutor.h"
#include "CpuOperationUtils.h"
#include "Tracing.h"

namespace android {
namespace nn {

SVDF::SVDF(const Operation& operation, RunTimeOperandInfo* operands) {
    NNTRACE_TRANS("SVDF::SVDF");
    input_ = GetInput(operation, operands, kInputTensor);
    weights_feature_ = GetInput(operation, operands, kWeightsFeatureTensor);
    weights_time_ = GetInput(operation, operands, kWeightsTimeTensor);
    bias_ = GetInput(operation, operands, kBiasTensor);
    state_in_ = GetInput(operation, operands, kStateInTensor);

    const auto& rankOperand = *GetInput(operation, operands, kRankParam);
    params_.rank_ = getScalarDataWithDefault<int>(rankOperand, 0);
    const auto& activationOperand = *GetInput(operation, operands, kActivationParam);
    params_.activation_ = static_cast<TfLiteFusedActivation>(getScalarDataWithDefault<int>(
            activationOperand, TfLiteFusedActivation::kTfLiteActNone));

    state_out_ = GetOutput(operation, operands, kStateOutTensor);
    output_ = GetOutput(operation, operands, kOutputTensor);
}

bool SVDF::Prepare(const Operation& operation, RunTimeOperandInfo* operands, Shape* stateShape,
                   Shape* outputShape) {
    NNTRACE_TRANS("SVDF::Prepare");
    // Check we have all the inputs and outputs we need.
    const int num_inputs = NumInputsWithValues(operation, operands);

    NN_CHECK(num_inputs == 6 || num_inputs == 7);
    constexpr int requiredInputs[] = {
            kInputTensor, kWeightsFeatureTensor, kWeightsTimeTensor, kStateInTensor,
            kRankParam,   kActivationParam,
    };
    for (const int requiredInput : requiredInputs) {
        NN_RET_CHECK(!IsNullInput(GetInput(operation, operands, requiredInput)))
                << "required input " << requiredInput << " is omitted";
    }
    NN_CHECK_EQ(NumOutputs(operation), 2);

    // Check that the scalar operands' buffers are large enough.
    const auto& rankOperand = *GetInput(operation, operands, kRankParam);
    NN_RET_CHECK(rankOperand.length >= sizeof(int));
    const auto& activationOperand = *GetInput(operation, operands, kActivationParam);
    NN_RET_CHECK(activationOperand.length >= sizeof(int));

    const RunTimeOperandInfo* input = GetInput(operation, operands, SVDF::kInputTensor);
    const RunTimeOperandInfo* weights_feature =
            GetInput(operation, operands, SVDF::kWeightsFeatureTensor);
    const RunTimeOperandInfo* weights_time =
            GetInput(operation, operands, SVDF::kWeightsTimeTensor);

    // Check all the parameters of tensor match within themselves and match the
    // input configuration.
    const int rank = getScalarData<int>(*GetInput(operation, operands, kRankParam));
    const uint32_t batch_size = SizeOfDimension(input, 0);
    const uint32_t num_filters = SizeOfDimension(weights_feature, 0);
    NN_CHECK_EQ(num_filters % rank, 0);
    const uint32_t num_units = num_filters / rank;
    const uint32_t memory_size = SizeOfDimension(weights_time, 1);
    NN_CHECK_EQ(SizeOfDimension(input, 1), SizeOfDimension(weights_feature, 1));
    NN_CHECK_EQ(SizeOfDimension(weights_time, 0), num_filters);

    const RunTimeOperandInfo* bias = GetInput(operation, operands, kBiasTensor);
    if (!IsNullInput(bias)) {
        NN_CHECK_EQ(SizeOfDimension(bias, 0), num_units);
    }

    // Resize state.
    const Shape& inputShape = input->shape();
    stateShape->type = inputShape.type;
    stateShape->dimensions = {batch_size, memory_size * num_filters};
    stateShape->offset = inputShape.offset;
    stateShape->scale = inputShape.scale;

    // Resize output.
    outputShape->type = inputShape.type;
    outputShape->dimensions = {batch_size, num_units};
    outputShape->offset = inputShape.offset;
    outputShape->scale = inputShape.scale;

    return true;
}

bool SVDF::Eval() {
    NNTRACE_TRANS("SVDF::Eval");
    switch (input_->type) {
        case OperandType::TENSOR_FLOAT16: {
            std::vector<float> inputDataFloat32(getNumberOfElements(input_->shape()));
            convertFloat16ToFloat32(reinterpret_cast<_Float16*>(input_->buffer), &inputDataFloat32);
            std::vector<float> inputStateDataFloat32(getNumberOfElements(state_in_->shape()));
            convertFloat16ToFloat32(reinterpret_cast<_Float16*>(state_in_->buffer),
                                    &inputStateDataFloat32);
            std::vector<float> biasDataFloat32(getNumberOfElements(bias_->shape()));
            if (!IsNullInput(bias_)) {
                convertFloat16ToFloat32(reinterpret_cast<_Float16*>(bias_->buffer),
                                        &biasDataFloat32);
            }
            std::vector<float> weightsFeatureDataFloat32(
                    getNumberOfElements(weights_feature_->shape()));
            convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_feature_->buffer),
                                    &weightsFeatureDataFloat32);
            std::vector<float> weightsTimeDataFloat32(getNumberOfElements(weights_time_->shape()));
            convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_time_->buffer),
                                    &weightsTimeDataFloat32);
            std::vector<float> outputDataFloat32(getNumberOfElements(output_->shape()));
            std::vector<float> outputStateDataFloat32(getNumberOfElements(state_out_->shape()));

            EvalFloat32(inputDataFloat32.data(), inputStateDataFloat32.data(),
                        biasDataFloat32.data(), weightsFeatureDataFloat32.data(),
                        weightsTimeDataFloat32.data(), outputDataFloat32.data(),
                        outputStateDataFloat32.data());
            convertFloat32ToFloat16(outputDataFloat32,
                                    reinterpret_cast<_Float16*>(output_->buffer));
            convertFloat32ToFloat16(outputStateDataFloat32,
                                    reinterpret_cast<_Float16*>(state_out_->buffer));
            break;
        }
        case OperandType::TENSOR_FLOAT32: {
            EvalFloat32(reinterpret_cast<float*>(input_->buffer),
                        reinterpret_cast<float*>(state_in_->buffer),
                        reinterpret_cast<float*>(bias_->buffer),
                        reinterpret_cast<float*>(weights_feature_->buffer),
                        reinterpret_cast<float*>(weights_time_->buffer),
                        reinterpret_cast<float*>(output_->buffer),
                        reinterpret_cast<float*>(state_out_->buffer));
            break;
        }
        default: {
            LOG(ERROR) << "Unsupported data type: " << static_cast<int>(input_->type);
            return false;
        }
    }
    return true;
}

void SVDF::EvalFloat32(const float* inputData, const float* inputStateData, const float* biasData,
                       const float* weightsFeatureData, const float* weightsTimeData,
                       float* outputData, float* outputStateData) {
    NNTRACE_COMP("SVDF::EvalFloat32");

    const int rank = params_.rank_;
    const int batch_size = SizeOfDimension(input_, 0);
    const int input_size = SizeOfDimension(input_, 1);
    const int num_filters = SizeOfDimension(weights_feature_, 0);
    const int num_units = num_filters / rank;
    const int memory_size = SizeOfDimension(weights_time_, 1);

    memcpy(outputStateData, inputStateData, sizeof(float) * batch_size * memory_size * num_filters);
    // Compute conv1d(inputs, weights_feature).
    for (int b = 0; b < batch_size; b++) {
        float* state_ptr_batch = outputStateData + b * memory_size * num_filters;
        for (int c = 0; c < num_filters; c++) {
            float* state_ptr = state_ptr_batch + c * memory_size;
            state_ptr[memory_size - 1] = 0.0;
        }
    }

    // Clear scratch (the matmul is accumulative).
    float scratch[batch_size * num_filters];
    std::fill_n(scratch, batch_size * num_filters, 0.0f);
    tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate(
            weightsFeatureData, num_filters, input_size, inputData, batch_size, scratch);

    // Copy the latest activation from scratch into activation_state:
    // The last, i.e. (memory_size-1)th entry for each batch, and filter.
    for (int i = 0; i < batch_size * num_filters; ++i) {
        outputStateData[i * memory_size + memory_size - 1] = scratch[i];
    }

    // Begin ApplyTimeWeightsBiasAndActivation
    // Compute matmul(state, weights_time).
    for (int b = 0; b < batch_size; b++) {
        float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters;
        float* scratch_ptr_batch = scratch + b * num_filters;
        tflite::tensor_utils::BatchVectorBatchVectorDotProduct(
                weightsTimeData, state_out_ptr_batch, memory_size, num_filters, scratch_ptr_batch);
    }

    // Reduction sum
    tflite::tensor_utils::ReductionSumVector(scratch, outputData, batch_size * num_units, rank);

    // Add bias if provided.
    if (!IsNullInput(bias_)) {
        tflite::tensor_utils::VectorBatchVectorAdd(biasData, num_units, batch_size, outputData);
    }

    // Apply activation.
    tflite::tensor_utils::ApplyActivationToVector(outputData, batch_size * num_units,
                                                  params_.activation_, outputData);
    // Finished ApplyTimeWeightsBiasAndActivation

    // Right shift the state.
    for (int b = 0; b < batch_size; b++) {
        float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters;
        for (int f = 0; f < num_filters; f++) {
            std::copy(state_out_ptr_batch + 1, state_out_ptr_batch + memory_size,
                      state_out_ptr_batch);
            state_out_ptr_batch[memory_size - 1] = 0.0;
            state_out_ptr_batch += memory_size;
        }
    }
}

}  // namespace nn
}  // namespace android
v811_spc009_project 4 months ago			`/*`
			`* Copyright (C) 2017 The Android Open Source Project`
			`*`
			`* Licensed under the Apache License, Version 2.0 (the "License");`
			`* you may not use this file except in compliance with the License.`
			`* You may obtain a copy of the License at`
			`*`
			`* http://www.apache.org/licenses/LICENSE-2.0`
			`*`
			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an "AS IS" BASIS,`
			`* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`* See the License for the specific language governing permissions and`
			`* limitations under the License.`
			`*/`

			`#define LOG_TAG "Operations"`

			`#include "SVDF.h"`

			`#include <algorithm>`
			`#include <vector>`

			`#include "CpuExecutor.h"`
			`#include "CpuOperationUtils.h"`
			`#include "Tracing.h"`

			`namespace android {`
			`namespace nn {`

			`SVDF::SVDF(const Operation& operation, RunTimeOperandInfo* operands) {`
			`NNTRACE_TRANS("SVDF::SVDF");`
			`input_ = GetInput(operation, operands, kInputTensor);`
			`weights_feature_ = GetInput(operation, operands, kWeightsFeatureTensor);`
			`weights_time_ = GetInput(operation, operands, kWeightsTimeTensor);`
			`bias_ = GetInput(operation, operands, kBiasTensor);`
			`state_in_ = GetInput(operation, operands, kStateInTensor);`

			`const auto& rankOperand = *GetInput(operation, operands, kRankParam);`
			`params_.rank_ = getScalarDataWithDefault<int>(rankOperand, 0);`
			`const auto& activationOperand = *GetInput(operation, operands, kActivationParam);`
			`params_.activation_ = static_cast<TfLiteFusedActivation>(getScalarDataWithDefault<int>(`
			`activationOperand, TfLiteFusedActivation::kTfLiteActNone));`

			`state_out_ = GetOutput(operation, operands, kStateOutTensor);`
			`output_ = GetOutput(operation, operands, kOutputTensor);`
			`}`

			`bool SVDF::Prepare(const Operation& operation, RunTimeOperandInfo* operands, Shape* stateShape,`
			`Shape* outputShape) {`
			`NNTRACE_TRANS("SVDF::Prepare");`
			`// Check we have all the inputs and outputs we need.`
			`const int num_inputs = NumInputsWithValues(operation, operands);`

			`NN_CHECK(num_inputs == 6 \|\| num_inputs == 7);`
			`constexpr int requiredInputs[] = {`
			`kInputTensor, kWeightsFeatureTensor, kWeightsTimeTensor, kStateInTensor,`
			`kRankParam, kActivationParam,`
			`};`
			`for (const int requiredInput : requiredInputs) {`
			`NN_RET_CHECK(!IsNullInput(GetInput(operation, operands, requiredInput)))`
			`<< "required input " << requiredInput << " is omitted";`
			`}`
			`NN_CHECK_EQ(NumOutputs(operation), 2);`

			`// Check that the scalar operands' buffers are large enough.`
			`const auto& rankOperand = *GetInput(operation, operands, kRankParam);`
			`NN_RET_CHECK(rankOperand.length >= sizeof(int));`
			`const auto& activationOperand = *GetInput(operation, operands, kActivationParam);`
			`NN_RET_CHECK(activationOperand.length >= sizeof(int));`

			`const RunTimeOperandInfo* input = GetInput(operation, operands, SVDF::kInputTensor);`
			`const RunTimeOperandInfo* weights_feature =`
			`GetInput(operation, operands, SVDF::kWeightsFeatureTensor);`
			`const RunTimeOperandInfo* weights_time =`
			`GetInput(operation, operands, SVDF::kWeightsTimeTensor);`

			`// Check all the parameters of tensor match within themselves and match the`
			`// input configuration.`
			`const int rank = getScalarData<int>(*GetInput(operation, operands, kRankParam));`
			`const uint32_t batch_size = SizeOfDimension(input, 0);`
			`const uint32_t num_filters = SizeOfDimension(weights_feature, 0);`
			`NN_CHECK_EQ(num_filters % rank, 0);`
			`const uint32_t num_units = num_filters / rank;`
			`const uint32_t memory_size = SizeOfDimension(weights_time, 1);`
			`NN_CHECK_EQ(SizeOfDimension(input, 1), SizeOfDimension(weights_feature, 1));`
			`NN_CHECK_EQ(SizeOfDimension(weights_time, 0), num_filters);`

			`const RunTimeOperandInfo* bias = GetInput(operation, operands, kBiasTensor);`
			`if (!IsNullInput(bias)) {`
			`NN_CHECK_EQ(SizeOfDimension(bias, 0), num_units);`
			`}`

			`// Resize state.`
			`const Shape& inputShape = input->shape();`
			`stateShape->type = inputShape.type;`
			`stateShape->dimensions = {batch_size, memory_size * num_filters};`
			`stateShape->offset = inputShape.offset;`
			`stateShape->scale = inputShape.scale;`

			`// Resize output.`
			`outputShape->type = inputShape.type;`
			`outputShape->dimensions = {batch_size, num_units};`
			`outputShape->offset = inputShape.offset;`
			`outputShape->scale = inputShape.scale;`

			`return true;`
			`}`

			`bool SVDF::Eval() {`
			`NNTRACE_TRANS("SVDF::Eval");`
			`switch (input_->type) {`
			`case OperandType::TENSOR_FLOAT16: {`
			`std::vector<float> inputDataFloat32(getNumberOfElements(input_->shape()));`
			`convertFloat16ToFloat32(reinterpret_cast<_Float16*>(input_->buffer), &inputDataFloat32);`
			`std::vector<float> inputStateDataFloat32(getNumberOfElements(state_in_->shape()));`
			`convertFloat16ToFloat32(reinterpret_cast<_Float16*>(state_in_->buffer),`
			`&inputStateDataFloat32);`
			`std::vector<float> biasDataFloat32(getNumberOfElements(bias_->shape()));`
			`if (!IsNullInput(bias_)) {`
			`convertFloat16ToFloat32(reinterpret_cast<_Float16*>(bias_->buffer),`
			`&biasDataFloat32);`
			`}`
			`std::vector<float> weightsFeatureDataFloat32(`
			`getNumberOfElements(weights_feature_->shape()));`
			`convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_feature_->buffer),`
			`&weightsFeatureDataFloat32);`
			`std::vector<float> weightsTimeDataFloat32(getNumberOfElements(weights_time_->shape()));`
			`convertFloat16ToFloat32(reinterpret_cast<_Float16*>(weights_time_->buffer),`
			`&weightsTimeDataFloat32);`
			`std::vector<float> outputDataFloat32(getNumberOfElements(output_->shape()));`
			`std::vector<float> outputStateDataFloat32(getNumberOfElements(state_out_->shape()));`

			`EvalFloat32(inputDataFloat32.data(), inputStateDataFloat32.data(),`
			`biasDataFloat32.data(), weightsFeatureDataFloat32.data(),`
			`weightsTimeDataFloat32.data(), outputDataFloat32.data(),`
			`outputStateDataFloat32.data());`
			`convertFloat32ToFloat16(outputDataFloat32,`
			`reinterpret_cast<_Float16*>(output_->buffer));`
			`convertFloat32ToFloat16(outputStateDataFloat32,`
			`reinterpret_cast<_Float16*>(state_out_->buffer));`
			`break;`
			`}`
			`case OperandType::TENSOR_FLOAT32: {`
			`EvalFloat32(reinterpret_cast<float*>(input_->buffer),`
			`reinterpret_cast<float*>(state_in_->buffer),`
			`reinterpret_cast<float*>(bias_->buffer),`
			`reinterpret_cast<float*>(weights_feature_->buffer),`
			`reinterpret_cast<float*>(weights_time_->buffer),`
			`reinterpret_cast<float*>(output_->buffer),`
			`reinterpret_cast<float*>(state_out_->buffer));`
			`break;`
			`}`
			`default: {`
			`LOG(ERROR) << "Unsupported data type: " << static_cast<int>(input_->type);`
			`return false;`
			`}`
			`}`
			`return true;`
			`}`

			`void SVDF::EvalFloat32(const float* inputData, const float* inputStateData, const float* biasData,`
			`const float* weightsFeatureData, const float* weightsTimeData,`
			`float* outputData, float* outputStateData) {`
			`NNTRACE_COMP("SVDF::EvalFloat32");`

			`const int rank = params_.rank_;`
			`const int batch_size = SizeOfDimension(input_, 0);`
			`const int input_size = SizeOfDimension(input_, 1);`
			`const int num_filters = SizeOfDimension(weights_feature_, 0);`
			`const int num_units = num_filters / rank;`
			`const int memory_size = SizeOfDimension(weights_time_, 1);`

			`memcpy(outputStateData, inputStateData, sizeof(float) * batch_size * memory_size * num_filters);`
			`// Compute conv1d(inputs, weights_feature).`
			`for (int b = 0; b < batch_size; b++) {`
			`float* state_ptr_batch = outputStateData + b * memory_size * num_filters;`
			`for (int c = 0; c < num_filters; c++) {`
			`float* state_ptr = state_ptr_batch + c * memory_size;`
			`state_ptr[memory_size - 1] = 0.0;`
			`}`
			`}`

			`// Clear scratch (the matmul is accumulative).`
			`float scratch[batch_size * num_filters];`
			`std::fill_n(scratch, batch_size * num_filters, 0.0f);`
			`tflite::tensor_utils::MatrixBatchVectorMultiplyAccumulate(`
			`weightsFeatureData, num_filters, input_size, inputData, batch_size, scratch);`

			`// Copy the latest activation from scratch into activation_state:`
			`// The last, i.e. (memory_size-1)th entry for each batch, and filter.`
			`for (int i = 0; i < batch_size * num_filters; ++i) {`
			`outputStateData[i * memory_size + memory_size - 1] = scratch[i];`
			`}`

			`// Begin ApplyTimeWeightsBiasAndActivation`
			`// Compute matmul(state, weights_time).`
			`for (int b = 0; b < batch_size; b++) {`
			`float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters;`
			`float* scratch_ptr_batch = scratch + b * num_filters;`
			`tflite::tensor_utils::BatchVectorBatchVectorDotProduct(`
			`weightsTimeData, state_out_ptr_batch, memory_size, num_filters, scratch_ptr_batch);`
			`}`

			`// Reduction sum`
			`tflite::tensor_utils::ReductionSumVector(scratch, outputData, batch_size * num_units, rank);`

			`// Add bias if provided.`
			`if (!IsNullInput(bias_)) {`
			`tflite::tensor_utils::VectorBatchVectorAdd(biasData, num_units, batch_size, outputData);`
			`}`

			`// Apply activation.`
			`tflite::tensor_utils::ApplyActivationToVector(outputData, batch_size * num_units,`
			`params_.activation_, outputData);`
			`// Finished ApplyTimeWeightsBiasAndActivation`

			`// Right shift the state.`
			`for (int b = 0; b < batch_size; b++) {`
			`float* state_out_ptr_batch = outputStateData + b * memory_size * num_filters;`
			`for (int f = 0; f < num_filters; f++) {`
			`std::copy(state_out_ptr_batch + 1, state_out_ptr_batch + memory_size,`
			`state_out_ptr_batch);`
			`state_out_ptr_batch[memory_size - 1] = 0.0;`
			`state_out_ptr_batch += memory_size;`
			`}`
			`}`
			`}`

			`} // namespace nn`
			`} // namespace android`