You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
187 lines
6.8 KiB
187 lines
6.8 KiB
4 months ago
|
/*
|
||
|
* Copyright (C) 2018 The Android Open Source Project
|
||
|
*
|
||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
* you may not use this file except in compliance with the License.
|
||
|
* You may obtain a copy of the License at
|
||
|
*
|
||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||
|
*
|
||
|
* Unless required by applicable law or agreed to in writing, software
|
||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
* See the License for the specific language governing permissions and
|
||
|
* limitations under the License.
|
||
|
*/
|
||
|
|
||
|
#include "annotator/model-executor.h"
|
||
|
|
||
|
#include "annotator/quantization.h"
|
||
|
#include "utils/base/logging.h"
|
||
|
|
||
|
namespace libtextclassifier3 {
|
||
|
|
||
|
TensorView<float> ModelExecutor::ComputeLogits(
|
||
|
const TensorView<float>& features, tflite::Interpreter* interpreter) const {
|
||
|
if (!interpreter) {
|
||
|
return TensorView<float>::Invalid();
|
||
|
}
|
||
|
interpreter->ResizeInputTensor(kInputIndexFeatures, features.shape());
|
||
|
if (interpreter->AllocateTensors() != kTfLiteOk) {
|
||
|
TC3_VLOG(1) << "Allocation failed.";
|
||
|
return TensorView<float>::Invalid();
|
||
|
}
|
||
|
|
||
|
SetInput<float>(kInputIndexFeatures, features, interpreter);
|
||
|
|
||
|
if (interpreter->Invoke() != kTfLiteOk) {
|
||
|
TC3_VLOG(1) << "Interpreter failed.";
|
||
|
return TensorView<float>::Invalid();
|
||
|
}
|
||
|
|
||
|
return OutputView<float>(kOutputIndexLogits, interpreter);
|
||
|
}
|
||
|
|
||
|
std::unique_ptr<TFLiteEmbeddingExecutor> TFLiteEmbeddingExecutor::FromBuffer(
|
||
|
const flatbuffers::Vector<uint8_t>* model_spec_buffer, int embedding_size,
|
||
|
int quantization_bits,
|
||
|
const Model_::EmbeddingPruningMask* embedding_pruning_mask) {
|
||
|
std::unique_ptr<TfLiteModelExecutor> executor =
|
||
|
TfLiteModelExecutor::FromBuffer(model_spec_buffer);
|
||
|
if (!executor) {
|
||
|
TC3_LOG(ERROR) << "Could not load TFLite model for embeddings.";
|
||
|
return nullptr;
|
||
|
}
|
||
|
|
||
|
std::unique_ptr<tflite::Interpreter> interpreter =
|
||
|
executor->CreateInterpreter();
|
||
|
if (!interpreter) {
|
||
|
TC3_LOG(ERROR) << "Could not build TFLite interpreter for embeddings.";
|
||
|
return nullptr;
|
||
|
}
|
||
|
|
||
|
if (interpreter->tensors_size() != 2) {
|
||
|
return nullptr;
|
||
|
}
|
||
|
const TfLiteTensor* embeddings = interpreter->tensor(0);
|
||
|
if (embeddings->dims->size != 2) {
|
||
|
return nullptr;
|
||
|
}
|
||
|
int num_buckets = embeddings->dims->data[0];
|
||
|
const TfLiteTensor* scales = interpreter->tensor(1);
|
||
|
if (scales->dims->size != 2 || scales->dims->data[0] != num_buckets ||
|
||
|
scales->dims->data[1] != 1) {
|
||
|
return nullptr;
|
||
|
}
|
||
|
int bytes_per_embedding = embeddings->dims->data[1];
|
||
|
if (!CheckQuantizationParams(bytes_per_embedding, quantization_bits,
|
||
|
embedding_size)) {
|
||
|
TC3_LOG(ERROR) << "Mismatch in quantization parameters.";
|
||
|
return nullptr;
|
||
|
}
|
||
|
|
||
|
return std::unique_ptr<TFLiteEmbeddingExecutor>(new TFLiteEmbeddingExecutor(
|
||
|
std::move(executor), quantization_bits, num_buckets, bytes_per_embedding,
|
||
|
embedding_size, scales, embeddings, std::move(interpreter),
|
||
|
embedding_pruning_mask));
|
||
|
}
|
||
|
|
||
|
TFLiteEmbeddingExecutor::TFLiteEmbeddingExecutor(
|
||
|
std::unique_ptr<TfLiteModelExecutor> executor, int quantization_bits,
|
||
|
int num_buckets, int bytes_per_embedding, int output_embedding_size,
|
||
|
const TfLiteTensor* scales, const TfLiteTensor* embeddings,
|
||
|
std::unique_ptr<tflite::Interpreter> interpreter,
|
||
|
const Model_::EmbeddingPruningMask* embedding_pruning_mask)
|
||
|
: executor_(std::move(executor)),
|
||
|
quantization_bits_(quantization_bits),
|
||
|
num_buckets_(num_buckets),
|
||
|
bytes_per_embedding_(bytes_per_embedding),
|
||
|
output_embedding_size_(output_embedding_size),
|
||
|
scales_(scales),
|
||
|
embeddings_(embeddings),
|
||
|
interpreter_(std::move(interpreter)) {
|
||
|
if ((embedding_pruning_mask != nullptr) &&
|
||
|
(embedding_pruning_mask->enabled())) {
|
||
|
for (int i = 0; i < embedding_pruning_mask->pruning_mask()->size(); i++) {
|
||
|
pruning_mask_.push_back((*(embedding_pruning_mask->pruning_mask()))[i]);
|
||
|
}
|
||
|
ComputePrefixCounts();
|
||
|
full_num_buckets_ = embedding_pruning_mask->full_num_buckets();
|
||
|
pruned_row_bucket_id_ = embedding_pruning_mask->pruned_row_bucket_id();
|
||
|
} else {
|
||
|
full_num_buckets_ = num_buckets;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void TFLiteEmbeddingExecutor::ComputePrefixCounts() {
|
||
|
// Pre-compute the prefix sums.
|
||
|
// For each i in {0, 1,...,pruning_mask_.size()-1}, we compute number of 1s
|
||
|
// in binary representations of the uint64 values in pruning_mask_ before
|
||
|
// index i. We set pruned_row_bucket_id_ to the total number of 1s
|
||
|
// in binary representations of all values in pruning_mask_.
|
||
|
int count = 0;
|
||
|
for (const uint64 mask : pruning_mask_) {
|
||
|
prefix_counts_.push_back(count);
|
||
|
count += __builtin_popcountll(mask);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
int TFLiteEmbeddingExecutor::PruneBucketId(int bucket_id) const {
|
||
|
// Implements auxiliary data structure for computing the pruned index of a
|
||
|
// given bucket_id.
|
||
|
// If bucket_id is present in pruning_mask_, we compute floor(bucket_id/64),
|
||
|
// look it up in the auxiliary array prefix_counts_, and add to it the number
|
||
|
// of 1s before before bucket_id % 64 in the 64-bit sequence
|
||
|
// pruning_mask_[floor(bucket_id/64)].
|
||
|
// If bucket_id is absent from pruning_mask_, we return pruned_row_bucket_id_.
|
||
|
const int bucket_id_major = bucket_id >> 6;
|
||
|
const int bucket_id_minor = bucket_id & 63;
|
||
|
uint64_t one = 1;
|
||
|
if (!(pruning_mask_[bucket_id_major] & (one << bucket_id_minor)))
|
||
|
return pruned_row_bucket_id_;
|
||
|
const uint64 zero = 0;
|
||
|
uint64 minor_mask;
|
||
|
if (bucket_id_minor == 0)
|
||
|
minor_mask = zero;
|
||
|
else
|
||
|
minor_mask = ((~zero) >> (64 - bucket_id_minor));
|
||
|
return prefix_counts_[bucket_id_major] +
|
||
|
__builtin_popcountll(pruning_mask_[bucket_id_major] & minor_mask);
|
||
|
}
|
||
|
|
||
|
bool TFLiteEmbeddingExecutor::AddEmbedding(
|
||
|
const TensorView<int>& sparse_features, float* dest, int dest_size) const {
|
||
|
if (dest_size != output_embedding_size_) {
|
||
|
TC3_LOG(ERROR) << "Mismatching dest_size and output_embedding_size: "
|
||
|
<< dest_size << " " << output_embedding_size_;
|
||
|
return false;
|
||
|
}
|
||
|
const int num_sparse_features = sparse_features.size();
|
||
|
for (int i = 0; i < num_sparse_features; ++i) {
|
||
|
const int bucket_id = sparse_features.data()[i];
|
||
|
int full_num_buckets;
|
||
|
if (!pruning_mask_.empty()) {
|
||
|
full_num_buckets = full_num_buckets_;
|
||
|
} else {
|
||
|
full_num_buckets = num_buckets_;
|
||
|
}
|
||
|
if (bucket_id >= full_num_buckets) {
|
||
|
return false;
|
||
|
}
|
||
|
int final_bucket_id;
|
||
|
if (!pruning_mask_.empty()) {
|
||
|
final_bucket_id = PruneBucketId(bucket_id);
|
||
|
} else {
|
||
|
final_bucket_id = bucket_id;
|
||
|
}
|
||
|
if (!DequantizeAdd(scales_->data.f, embeddings_->data.uint8,
|
||
|
bytes_per_embedding_, num_sparse_features,
|
||
|
quantization_bits_, final_bucket_id, dest, dest_size)) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
} // namespace libtextclassifier3
|