You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

261 lines
10 KiB

/*
* Copyright (C) 2017 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MANAGER_H
#define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MANAGER_H
#include <LegacyUtils.h>
#include <android-base/macros.h>
#include <nnapi/IBurst.h>
#include <nnapi/IDevice.h>
#include <nnapi/Types.h>
#include <map>
#include <memory>
#include <string>
#include <tuple>
#include <unordered_set>
#include <utility>
#include <vector>
#include "ExecutionCallback.h"
#include "Memory.h"
namespace android {
namespace nn {
// Forward declaration
class Device;
class MetaModel;
class ModelArgumentInfo;
// A unified interface for a reusable execution with cached resources.
// This object provides no thread-safety guarantee. The caller must guarantee there is at most one
// call to RuntimeExecution::compute or RuntimeExecution::computeFenced on the same RuntimeExecution
// object in flight at a time.
class RuntimeExecution {
DISALLOW_COPY_AND_ASSIGN(RuntimeExecution);
public:
RuntimeExecution() = default;
virtual ~RuntimeExecution() = default;
virtual std::tuple<int, std::vector<OutputShape>, Timing> compute(
const SharedBurst& burstController, const OptionalTimePoint& deadline) const = 0;
// The returned timing information is only valid if the callback is nullptr.
// Returns error_code, sync_fence, callback and timing.
virtual std::tuple<int, int, ExecuteFencedInfoCallback, Timing> computeFenced(
const std::vector<int>& waitFor, const OptionalTimePoint& deadline,
const OptionalDuration& timeoutDurationAfterFence) const = 0;
};
// A unified interface for actual driver prepared model as well as the CPU.
class RuntimePreparedModel {
DISALLOW_COPY_AND_ASSIGN(RuntimePreparedModel);
public:
RuntimePreparedModel() = default;
virtual ~RuntimePreparedModel() = default;
virtual const Device* getDevice() const = 0;
virtual SharedPreparedModel getInterface() const = 0;
// Perform computation with given input/output argument info and memory pools.
virtual std::tuple<int, std::vector<OutputShape>, Timing> execute(
const std::vector<ModelArgumentInfo>& inputs,
const std::vector<ModelArgumentInfo>& outputs,
const std::vector<const RuntimeMemory*>& memories, const SharedBurst& burstController,
MeasureTiming measure, const OptionalTimePoint& deadline,
const OptionalDuration& loopTimeoutDuration) const = 0;
// Perform fenced computation with given input/output argument info and memory pools.
// The returned timing information is only valid if the callback is nullptr.
// Returns error_code, sync_fence, callback and timing.
virtual std::tuple<int, int, ExecuteFencedInfoCallback, Timing> executeFenced(
const std::vector<ModelArgumentInfo>& inputs,
const std::vector<ModelArgumentInfo>& outputs,
const std::vector<const RuntimeMemory*>& memories, const std::vector<int>& waitFor,
MeasureTiming measure, const OptionalTimePoint& deadline,
const OptionalDuration& loopTimeoutDuration,
const OptionalDuration& timeoutDurationAfterFence) const = 0;
// Create a reusable execution with given input/output argument info and memory pools.
virtual std::pair<int, std::shared_ptr<RuntimeExecution>> createReusableExecution(
const std::vector<ModelArgumentInfo>& inputs,
const std::vector<ModelArgumentInfo>& outputs,
const std::vector<const RuntimeMemory*>& memories, MeasureTiming measure,
const OptionalDuration& loopTimeoutDuration) const = 0;
virtual GeneralResult<SharedBurst> configureExecutionBurst() const = 0;
virtual MemoryPreference getMemoryPreference() const = 0;
};
using ModelFactory = std::function<Model()>;
struct CacheHandles {
std::vector<SharedHandle> modelCache;
std::vector<SharedHandle> dataCache;
};
using CacheDir = std::string;
struct CacheInfo {
std::variant<CacheDir, CacheHandles> variant;
};
// A unified interface for actual driver devices as well as the CPU
class Device {
DISALLOW_COPY_AND_ASSIGN(Device);
public:
Device() = default;
virtual ~Device() = default;
// Introspection methods returning device information
virtual const std::string& getName() const = 0;
virtual const std::string& getVersionString() const = 0;
virtual int64_t getFeatureLevel() const = 0;
virtual int32_t getType() const = 0;
virtual bool isUpdatable() const = 0;
virtual const std::vector<Extension>& getSupportedExtensions() const = 0;
// See the MetaModel class in MetaModel.h for more details.
virtual std::vector<bool> getSupportedOperations(const MetaModel& metaModel) const = 0;
virtual const Capabilities& getCapabilities() const = 0;
virtual Capabilities::PerformanceInfo getPerformance(OperandType type) const = 0;
virtual Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceScalar() const = 0;
virtual Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceTensor() const = 0;
virtual Capabilities::PerformanceInfo getIfPerformance() const = 0;
virtual Capabilities::PerformanceInfo getWhilePerformance() const = 0;
virtual std::pair<uint32_t, uint32_t> getNumberOfCacheFilesNeeded() const = 0;
virtual bool isCachingSupported() const = 0;
virtual int wait() const = 0;
virtual std::pair<int, std::shared_ptr<RuntimePreparedModel>> prepareModel(
const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
const std::optional<CacheToken>& maybeToken) const = 0;
// The caller is responsible for making sure the MemoryDescriptor only contains
// PreparedModels from the same Device.
virtual std::pair<int, std::unique_ptr<RuntimeMemory>> allocate(const MemoryDescriptor& desc,
OperandType type) const = 0;
};
// Manages the NN HAL devices. Only one instance of this class will exist.
// Use get() to retrieve it.
class DeviceManager {
public:
const std::vector<std::shared_ptr<Device>>& getDrivers() const {
if (mSetCpuOnly || mDebugNNCpuOnly) {
return mDevicesCpuOnly;
}
return mDevices;
}
// For testing only:
void setUseCpuOnly(bool useCpuOnly) { mSetCpuOnly = useCpuOnly; }
bool getUseCpuOnly() const { return mSetCpuOnly; }
bool syncExecCpu() const { return mSyncExecCpu; }
bool syncExecRuntime() const { return mSyncExecRuntime; }
// How to handle graph partitioning?
// 0 - Don't do graph partitioning.
// 1 - Do graph partitioning; but fall back to non-partitioned
// execution if there is a partitioning failure.
// 2 - Do graph partitioning, and rely on it; there is no fallback.
enum { kPartitioningNo = 0, kPartitioningWithFallback = 1, kPartitioningWithoutFallback = 2 };
uint32_t getPartitioning() const { return mPartitioning; }
static bool partitioningAllowsFallback(uint32_t partitioning) {
return partitioning == kPartitioningWithFallback;
}
bool strictSlicing() const { return mStrictSlicing; }
// Returns the singleton manager.
static DeviceManager* get();
// Returns the singleton Cpu device.
static std::shared_ptr<Device> getCpuDevice();
// The forTest_* functions below are solely intended for use by unit tests.
// Returns all devices (ignores the cpu-only flags).
std::vector<std::shared_ptr<Device>> forTest_getDevices() const { return mDevices; }
// Sets the device list (does not affect cpu-only queries).
void forTest_setDevices(std::vector<std::shared_ptr<Device>> devices) {
mDevices = std::move(devices);
}
// Register a test device.
void forTest_registerDevice(const SharedDevice& device) { registerDevice(device); }
// Re-initialize the list of available devices.
void forTest_reInitializeDeviceList() {
mDevices.clear();
mDevicesCpuOnly.clear();
findAvailableDevices();
}
// Make a test device
static std::shared_ptr<Device> forTest_makeDriverDevice(const SharedDevice& device);
bool forTest_isCpuDevice(const ANeuralNetworksDevice* device) const {
return reinterpret_cast<const Device*>(device) == getCpuDevice().get();
}
private:
// Builds the list of available drivers and queries their capabilities.
DeviceManager();
// Adds a device for the manager to use.
void registerDevice(const SharedDevice& device);
void findAvailableDevices();
// List of all the devices we discovered (including CpuDevice).
std::vector<std::shared_ptr<Device>> mDevices;
// We set this one to have CpuDevice only. To be used when m*CpuOnly is true.
std::vector<std::shared_ptr<Device>> mDevicesCpuOnly;
// If either of these is true, we'll ignore the drivers that are
// on the device and run everything on the CPU.
bool mSetCpuOnly = false; // set by setUseCpuOnly()
bool mDebugNNCpuOnly = false; // derived from system property debug.nn.cpuonly
// synchronous execution
bool mSyncExecCpu = true;
bool mSyncExecRuntime = false;
static const uint32_t kPartitioningDefault = kPartitioningWithFallback;
uint32_t mPartitioning = kPartitioningDefault;
bool mStrictSlicing = false;
};
std::vector<SharedDevice> getDevices();
} // namespace nn
} // namespace android
#endif // ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MANAGER_H