You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
261 lines
10 KiB
261 lines
10 KiB
4 months ago
|
/*
|
||
|
* Copyright (C) 2017 The Android Open Source Project
|
||
|
*
|
||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||
|
* you may not use this file except in compliance with the License.
|
||
|
* You may obtain a copy of the License at
|
||
|
*
|
||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||
|
*
|
||
|
* Unless required by applicable law or agreed to in writing, software
|
||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||
|
* See the License for the specific language governing permissions and
|
||
|
* limitations under the License.
|
||
|
*/
|
||
|
|
||
|
#ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MANAGER_H
|
||
|
#define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MANAGER_H
|
||
|
|
||
|
#include <LegacyUtils.h>
|
||
|
#include <android-base/macros.h>
|
||
|
#include <nnapi/IBurst.h>
|
||
|
#include <nnapi/IDevice.h>
|
||
|
#include <nnapi/Types.h>
|
||
|
|
||
|
#include <map>
|
||
|
#include <memory>
|
||
|
#include <string>
|
||
|
#include <tuple>
|
||
|
#include <unordered_set>
|
||
|
#include <utility>
|
||
|
#include <vector>
|
||
|
|
||
|
#include "ExecutionCallback.h"
|
||
|
#include "Memory.h"
|
||
|
|
||
|
namespace android {
|
||
|
namespace nn {
|
||
|
|
||
|
// Forward declaration
|
||
|
class Device;
|
||
|
class MetaModel;
|
||
|
class ModelArgumentInfo;
|
||
|
|
||
|
// A unified interface for a reusable execution with cached resources.
|
||
|
// This object provides no thread-safety guarantee. The caller must guarantee there is at most one
|
||
|
// call to RuntimeExecution::compute or RuntimeExecution::computeFenced on the same RuntimeExecution
|
||
|
// object in flight at a time.
|
||
|
class RuntimeExecution {
|
||
|
DISALLOW_COPY_AND_ASSIGN(RuntimeExecution);
|
||
|
|
||
|
public:
|
||
|
RuntimeExecution() = default;
|
||
|
virtual ~RuntimeExecution() = default;
|
||
|
|
||
|
virtual std::tuple<int, std::vector<OutputShape>, Timing> compute(
|
||
|
const SharedBurst& burstController, const OptionalTimePoint& deadline) const = 0;
|
||
|
|
||
|
// The returned timing information is only valid if the callback is nullptr.
|
||
|
// Returns error_code, sync_fence, callback and timing.
|
||
|
virtual std::tuple<int, int, ExecuteFencedInfoCallback, Timing> computeFenced(
|
||
|
const std::vector<int>& waitFor, const OptionalTimePoint& deadline,
|
||
|
const OptionalDuration& timeoutDurationAfterFence) const = 0;
|
||
|
};
|
||
|
|
||
|
// A unified interface for actual driver prepared model as well as the CPU.
|
||
|
class RuntimePreparedModel {
|
||
|
DISALLOW_COPY_AND_ASSIGN(RuntimePreparedModel);
|
||
|
|
||
|
public:
|
||
|
RuntimePreparedModel() = default;
|
||
|
virtual ~RuntimePreparedModel() = default;
|
||
|
|
||
|
virtual const Device* getDevice() const = 0;
|
||
|
virtual SharedPreparedModel getInterface() const = 0;
|
||
|
|
||
|
// Perform computation with given input/output argument info and memory pools.
|
||
|
virtual std::tuple<int, std::vector<OutputShape>, Timing> execute(
|
||
|
const std::vector<ModelArgumentInfo>& inputs,
|
||
|
const std::vector<ModelArgumentInfo>& outputs,
|
||
|
const std::vector<const RuntimeMemory*>& memories, const SharedBurst& burstController,
|
||
|
MeasureTiming measure, const OptionalTimePoint& deadline,
|
||
|
const OptionalDuration& loopTimeoutDuration) const = 0;
|
||
|
|
||
|
// Perform fenced computation with given input/output argument info and memory pools.
|
||
|
// The returned timing information is only valid if the callback is nullptr.
|
||
|
// Returns error_code, sync_fence, callback and timing.
|
||
|
virtual std::tuple<int, int, ExecuteFencedInfoCallback, Timing> executeFenced(
|
||
|
const std::vector<ModelArgumentInfo>& inputs,
|
||
|
const std::vector<ModelArgumentInfo>& outputs,
|
||
|
const std::vector<const RuntimeMemory*>& memories, const std::vector<int>& waitFor,
|
||
|
MeasureTiming measure, const OptionalTimePoint& deadline,
|
||
|
const OptionalDuration& loopTimeoutDuration,
|
||
|
const OptionalDuration& timeoutDurationAfterFence) const = 0;
|
||
|
|
||
|
// Create a reusable execution with given input/output argument info and memory pools.
|
||
|
virtual std::pair<int, std::shared_ptr<RuntimeExecution>> createReusableExecution(
|
||
|
const std::vector<ModelArgumentInfo>& inputs,
|
||
|
const std::vector<ModelArgumentInfo>& outputs,
|
||
|
const std::vector<const RuntimeMemory*>& memories, MeasureTiming measure,
|
||
|
const OptionalDuration& loopTimeoutDuration) const = 0;
|
||
|
|
||
|
virtual GeneralResult<SharedBurst> configureExecutionBurst() const = 0;
|
||
|
|
||
|
virtual MemoryPreference getMemoryPreference() const = 0;
|
||
|
};
|
||
|
|
||
|
using ModelFactory = std::function<Model()>;
|
||
|
|
||
|
struct CacheHandles {
|
||
|
std::vector<SharedHandle> modelCache;
|
||
|
std::vector<SharedHandle> dataCache;
|
||
|
};
|
||
|
|
||
|
using CacheDir = std::string;
|
||
|
|
||
|
struct CacheInfo {
|
||
|
std::variant<CacheDir, CacheHandles> variant;
|
||
|
};
|
||
|
|
||
|
// A unified interface for actual driver devices as well as the CPU
|
||
|
class Device {
|
||
|
DISALLOW_COPY_AND_ASSIGN(Device);
|
||
|
|
||
|
public:
|
||
|
Device() = default;
|
||
|
virtual ~Device() = default;
|
||
|
|
||
|
// Introspection methods returning device information
|
||
|
virtual const std::string& getName() const = 0;
|
||
|
virtual const std::string& getVersionString() const = 0;
|
||
|
virtual int64_t getFeatureLevel() const = 0;
|
||
|
virtual int32_t getType() const = 0;
|
||
|
virtual bool isUpdatable() const = 0;
|
||
|
virtual const std::vector<Extension>& getSupportedExtensions() const = 0;
|
||
|
|
||
|
// See the MetaModel class in MetaModel.h for more details.
|
||
|
virtual std::vector<bool> getSupportedOperations(const MetaModel& metaModel) const = 0;
|
||
|
|
||
|
virtual const Capabilities& getCapabilities() const = 0;
|
||
|
virtual Capabilities::PerformanceInfo getPerformance(OperandType type) const = 0;
|
||
|
virtual Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceScalar() const = 0;
|
||
|
virtual Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceTensor() const = 0;
|
||
|
virtual Capabilities::PerformanceInfo getIfPerformance() const = 0;
|
||
|
virtual Capabilities::PerformanceInfo getWhilePerformance() const = 0;
|
||
|
virtual std::pair<uint32_t, uint32_t> getNumberOfCacheFilesNeeded() const = 0;
|
||
|
virtual bool isCachingSupported() const = 0;
|
||
|
virtual int wait() const = 0;
|
||
|
|
||
|
virtual std::pair<int, std::shared_ptr<RuntimePreparedModel>> prepareModel(
|
||
|
const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
|
||
|
const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
|
||
|
const std::optional<CacheToken>& maybeToken) const = 0;
|
||
|
|
||
|
// The caller is responsible for making sure the MemoryDescriptor only contains
|
||
|
// PreparedModels from the same Device.
|
||
|
virtual std::pair<int, std::unique_ptr<RuntimeMemory>> allocate(const MemoryDescriptor& desc,
|
||
|
OperandType type) const = 0;
|
||
|
};
|
||
|
|
||
|
// Manages the NN HAL devices. Only one instance of this class will exist.
|
||
|
// Use get() to retrieve it.
|
||
|
class DeviceManager {
|
||
|
public:
|
||
|
const std::vector<std::shared_ptr<Device>>& getDrivers() const {
|
||
|
if (mSetCpuOnly || mDebugNNCpuOnly) {
|
||
|
return mDevicesCpuOnly;
|
||
|
}
|
||
|
return mDevices;
|
||
|
}
|
||
|
|
||
|
// For testing only:
|
||
|
void setUseCpuOnly(bool useCpuOnly) { mSetCpuOnly = useCpuOnly; }
|
||
|
bool getUseCpuOnly() const { return mSetCpuOnly; }
|
||
|
|
||
|
bool syncExecCpu() const { return mSyncExecCpu; }
|
||
|
bool syncExecRuntime() const { return mSyncExecRuntime; }
|
||
|
|
||
|
// How to handle graph partitioning?
|
||
|
// 0 - Don't do graph partitioning.
|
||
|
// 1 - Do graph partitioning; but fall back to non-partitioned
|
||
|
// execution if there is a partitioning failure.
|
||
|
// 2 - Do graph partitioning, and rely on it; there is no fallback.
|
||
|
enum { kPartitioningNo = 0, kPartitioningWithFallback = 1, kPartitioningWithoutFallback = 2 };
|
||
|
uint32_t getPartitioning() const { return mPartitioning; }
|
||
|
static bool partitioningAllowsFallback(uint32_t partitioning) {
|
||
|
return partitioning == kPartitioningWithFallback;
|
||
|
}
|
||
|
|
||
|
bool strictSlicing() const { return mStrictSlicing; }
|
||
|
|
||
|
// Returns the singleton manager.
|
||
|
static DeviceManager* get();
|
||
|
|
||
|
// Returns the singleton Cpu device.
|
||
|
static std::shared_ptr<Device> getCpuDevice();
|
||
|
|
||
|
// The forTest_* functions below are solely intended for use by unit tests.
|
||
|
|
||
|
// Returns all devices (ignores the cpu-only flags).
|
||
|
std::vector<std::shared_ptr<Device>> forTest_getDevices() const { return mDevices; }
|
||
|
|
||
|
// Sets the device list (does not affect cpu-only queries).
|
||
|
void forTest_setDevices(std::vector<std::shared_ptr<Device>> devices) {
|
||
|
mDevices = std::move(devices);
|
||
|
}
|
||
|
|
||
|
// Register a test device.
|
||
|
void forTest_registerDevice(const SharedDevice& device) { registerDevice(device); }
|
||
|
|
||
|
// Re-initialize the list of available devices.
|
||
|
void forTest_reInitializeDeviceList() {
|
||
|
mDevices.clear();
|
||
|
mDevicesCpuOnly.clear();
|
||
|
findAvailableDevices();
|
||
|
}
|
||
|
|
||
|
// Make a test device
|
||
|
static std::shared_ptr<Device> forTest_makeDriverDevice(const SharedDevice& device);
|
||
|
|
||
|
bool forTest_isCpuDevice(const ANeuralNetworksDevice* device) const {
|
||
|
return reinterpret_cast<const Device*>(device) == getCpuDevice().get();
|
||
|
}
|
||
|
|
||
|
private:
|
||
|
// Builds the list of available drivers and queries their capabilities.
|
||
|
DeviceManager();
|
||
|
|
||
|
// Adds a device for the manager to use.
|
||
|
void registerDevice(const SharedDevice& device);
|
||
|
|
||
|
void findAvailableDevices();
|
||
|
|
||
|
// List of all the devices we discovered (including CpuDevice).
|
||
|
std::vector<std::shared_ptr<Device>> mDevices;
|
||
|
|
||
|
// We set this one to have CpuDevice only. To be used when m*CpuOnly is true.
|
||
|
std::vector<std::shared_ptr<Device>> mDevicesCpuOnly;
|
||
|
|
||
|
// If either of these is true, we'll ignore the drivers that are
|
||
|
// on the device and run everything on the CPU.
|
||
|
bool mSetCpuOnly = false; // set by setUseCpuOnly()
|
||
|
bool mDebugNNCpuOnly = false; // derived from system property debug.nn.cpuonly
|
||
|
|
||
|
// synchronous execution
|
||
|
bool mSyncExecCpu = true;
|
||
|
bool mSyncExecRuntime = false;
|
||
|
|
||
|
static const uint32_t kPartitioningDefault = kPartitioningWithFallback;
|
||
|
uint32_t mPartitioning = kPartitioningDefault;
|
||
|
|
||
|
bool mStrictSlicing = false;
|
||
|
};
|
||
|
|
||
|
std::vector<SharedDevice> getDevices();
|
||
|
|
||
|
} // namespace nn
|
||
|
} // namespace android
|
||
|
|
||
|
#endif // ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MANAGER_H
|