You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
159 lines
5.7 KiB
159 lines
5.7 KiB
/**
|
|
* Copyright 2017 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#ifndef COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H
|
|
#define COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H
|
|
|
|
#include "tensorflow/lite/delegates/gpu/delegate.h"
|
|
#include "tensorflow/lite/delegates/nnapi/nnapi_delegate.h"
|
|
#include "tensorflow/lite/interpreter.h"
|
|
#include "tensorflow/lite/model.h"
|
|
|
|
#include <memory>
|
|
#include <unistd.h>
|
|
#include <vector>
|
|
|
|
struct InferenceOutput {
|
|
uint8_t* ptr;
|
|
size_t size;
|
|
};
|
|
|
|
// Inputs and expected outputs for inference
|
|
struct InferenceInOut {
|
|
// Input can either be directly specified as a pointer or indirectly with
|
|
// the createInput callback. This is needed for large datasets where
|
|
// allocating memory for all inputs at once is not feasible.
|
|
uint8_t* input;
|
|
size_t input_size;
|
|
|
|
std::vector<InferenceOutput> outputs;
|
|
std::function<bool(uint8_t*, size_t)> createInput;
|
|
};
|
|
|
|
// Inputs and expected outputs for an inference sequence.
|
|
using InferenceInOutSequence = std::vector<InferenceInOut>;
|
|
|
|
// Result of a single inference
|
|
struct InferenceResult {
|
|
float computeTimeSec;
|
|
// MSE for each output
|
|
std::vector<float> meanSquareErrors;
|
|
// Max single error for each output
|
|
std::vector<float> maxSingleErrors;
|
|
// Outputs
|
|
std::vector<std::vector<uint8_t>> inferenceOutputs;
|
|
int inputOutputSequenceIndex;
|
|
int inputOutputIndex;
|
|
};
|
|
|
|
struct CompilationBenchmarkResult {
|
|
std::vector<float> compileWithoutCacheTimeSec;
|
|
// The following optional fields have no value if compilation caching is not supported.
|
|
std::optional<std::vector<float>> saveToCacheTimeSec;
|
|
std::optional<std::vector<float>> prepareFromCacheTimeSec;
|
|
// The total size of cache files. It is zero if compilation caching is not supported.
|
|
int cacheSizeBytes = 0;
|
|
};
|
|
|
|
/** Discard inference output in inference results. */
|
|
const int FLAG_DISCARD_INFERENCE_OUTPUT = 1 << 0;
|
|
/** Do not expect golden output for inference inputs. */
|
|
const int FLAG_IGNORE_GOLDEN_OUTPUT = 1 << 1;
|
|
/** Collect only 1 benchmark result every INFERENCE_OUT_SAMPLE_RATE **/
|
|
const int FLAG_SAMPLE_BENCHMARK_RESULTS = 1 << 2;
|
|
|
|
const int INFERENCE_OUT_SAMPLE_RATE = 10;
|
|
|
|
enum class CompilationBenchmarkType {
|
|
// Benchmark without cache
|
|
WITHOUT_CACHE,
|
|
// Benchmark cache miss
|
|
SAVE_TO_CACHE,
|
|
// Benchmark cache hit
|
|
PREPARE_FROM_CACHE,
|
|
};
|
|
|
|
/** TFLite backend. */
|
|
constexpr int TFLITE_CPU = 0;
|
|
constexpr int TFLITE_NNAPI = 1;
|
|
constexpr int TFLITE_GPU = 2;
|
|
|
|
class BenchmarkModel {
|
|
public:
|
|
~BenchmarkModel();
|
|
|
|
static BenchmarkModel* create(const char* modelfile, int tfliteBackend,
|
|
bool enable_intermediate_tensors_dump,
|
|
int* nnapiErrno, const char* nnapi_device_name,
|
|
bool mmapModel, const char* nnapi_cache_dir);
|
|
|
|
bool resizeInputTensors(std::vector<int> shape);
|
|
bool setInput(const uint8_t* dataPtr, size_t length);
|
|
bool runInference();
|
|
// Resets TFLite states (RNN/LSTM states etc).
|
|
bool resetStates();
|
|
|
|
bool benchmark(const std::vector<InferenceInOutSequence>& inOutData,
|
|
int seqInferencesMaxCount, float timeout, int flags,
|
|
std::vector<InferenceResult>* result);
|
|
|
|
bool benchmarkCompilation(int maxNumIterations, float warmupTimeout, float runTimeout,
|
|
CompilationBenchmarkResult* result);
|
|
|
|
bool dumpAllLayers(const char* path,
|
|
const std::vector<InferenceInOutSequence>& inOutData);
|
|
|
|
private:
|
|
BenchmarkModel() = default;
|
|
bool init(const char* modelfile, int tfliteBackend,
|
|
bool enable_intermediate_tensors_dump,
|
|
int* nnapiErrno, const char* nnapi_device_name,
|
|
/* flag to choose between memory mapping the model and initializing
|
|
the model from programs memory*/
|
|
bool mmapModel,
|
|
const char* nnapi_cache_dir);
|
|
|
|
void getOutputError(const uint8_t* dataPtr, size_t length,
|
|
InferenceResult* result, int output_index);
|
|
void saveInferenceOutput(InferenceResult* result, int output_index);
|
|
|
|
bool runCompilation(const char* cacheDir);
|
|
bool benchmarkSingleTypeOfCompilation(CompilationBenchmarkType type, int maxNumIterations,
|
|
float timeout, std::vector<float>* results);
|
|
bool benchmarkSingleTypeOfCompilationWithWarmup(CompilationBenchmarkType type,
|
|
int maxNumIterations, float warmupTimeout,
|
|
float runTimeout, std::vector<float>* results);
|
|
bool getCompilationCacheSize(int* cacheSizeBytes);
|
|
|
|
std::string mModelBuffer;
|
|
std::unique_ptr<tflite::FlatBufferModel> mTfliteModel;
|
|
std::unique_ptr<tflite::Interpreter> mTfliteInterpreter;
|
|
std::unique_ptr<tflite::StatefulNnApiDelegate> mTfliteNnapiDelegate;
|
|
// Store indices of output tensors, used to dump intermediate tensors
|
|
std::vector<int> outputs;
|
|
|
|
// Parameters for compilation
|
|
std::string mModelFile;
|
|
std::optional<std::string> mCacheDir;
|
|
std::string mNnApiDeviceName;
|
|
#if defined(NN_BENCHMARK_ENABLE_GPU)
|
|
TfLiteDelegate* mGpuDelegate;
|
|
#endif // defined(NN_BENCHMARK_ENABLE_GPU)
|
|
int mTfliteBackend;
|
|
};
|
|
|
|
#endif // COM_EXAMPLE_ANDROID_NN_BENCHMARK_RUN_TFLITE_H
|