v811_spc009/packages/modules/NeuralNetworks/runtime/Manager.h

/*
 * Copyright (C) 2017 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MANAGER_H
#define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MANAGER_H

#include <LegacyUtils.h>
#include <android-base/macros.h>
#include <nnapi/IBurst.h>
#include <nnapi/IDevice.h>
#include <nnapi/Types.h>

#include <map>
#include <memory>
#include <string>
#include <tuple>
#include <unordered_set>
#include <utility>
#include <vector>

#include "ExecutionCallback.h"
#include "Memory.h"

namespace android {
namespace nn {

// Forward declaration
class Device;
class MetaModel;
class ModelArgumentInfo;

// A unified interface for a reusable execution with cached resources.
// This object provides no thread-safety guarantee. The caller must guarantee there is at most one
// call to RuntimeExecution::compute or RuntimeExecution::computeFenced on the same RuntimeExecution
// object in flight at a time.
class RuntimeExecution {
    DISALLOW_COPY_AND_ASSIGN(RuntimeExecution);

   public:
    RuntimeExecution() = default;
    virtual ~RuntimeExecution() = default;

    virtual std::tuple<int, std::vector<OutputShape>, Timing> compute(
            const SharedBurst& burstController, const OptionalTimePoint& deadline) const = 0;

    // The returned timing information is only valid if the callback is nullptr.
    // Returns error_code, sync_fence, callback and timing.
    virtual std::tuple<int, int, ExecuteFencedInfoCallback, Timing> computeFenced(
            const std::vector<int>& waitFor, const OptionalTimePoint& deadline,
            const OptionalDuration& timeoutDurationAfterFence) const = 0;
};

// A unified interface for actual driver prepared model as well as the CPU.
class RuntimePreparedModel {
    DISALLOW_COPY_AND_ASSIGN(RuntimePreparedModel);

   public:
    RuntimePreparedModel() = default;
    virtual ~RuntimePreparedModel() = default;

    virtual const Device* getDevice() const = 0;
    virtual SharedPreparedModel getInterface() const = 0;

    // Perform computation with given input/output argument info and memory pools.
    virtual std::tuple<int, std::vector<OutputShape>, Timing> execute(
            const std::vector<ModelArgumentInfo>& inputs,
            const std::vector<ModelArgumentInfo>& outputs,
            const std::vector<const RuntimeMemory*>& memories, const SharedBurst& burstController,
            MeasureTiming measure, const OptionalTimePoint& deadline,
            const OptionalDuration& loopTimeoutDuration) const = 0;

    // Perform fenced computation with given input/output argument info and memory pools.
    // The returned timing information is only valid if the callback is nullptr.
    // Returns error_code, sync_fence, callback and timing.
    virtual std::tuple<int, int, ExecuteFencedInfoCallback, Timing> executeFenced(
            const std::vector<ModelArgumentInfo>& inputs,
            const std::vector<ModelArgumentInfo>& outputs,
            const std::vector<const RuntimeMemory*>& memories, const std::vector<int>& waitFor,
            MeasureTiming measure, const OptionalTimePoint& deadline,
            const OptionalDuration& loopTimeoutDuration,
            const OptionalDuration& timeoutDurationAfterFence) const = 0;

    // Create a reusable execution with given input/output argument info and memory pools.
    virtual std::pair<int, std::shared_ptr<RuntimeExecution>> createReusableExecution(
            const std::vector<ModelArgumentInfo>& inputs,
            const std::vector<ModelArgumentInfo>& outputs,
            const std::vector<const RuntimeMemory*>& memories, MeasureTiming measure,
            const OptionalDuration& loopTimeoutDuration) const = 0;

    virtual GeneralResult<SharedBurst> configureExecutionBurst() const = 0;

    virtual MemoryPreference getMemoryPreference() const = 0;
};

using ModelFactory = std::function<Model()>;

struct CacheHandles {
    std::vector<SharedHandle> modelCache;
    std::vector<SharedHandle> dataCache;
};

using CacheDir = std::string;

struct CacheInfo {
    std::variant<CacheDir, CacheHandles> variant;
};

// A unified interface for actual driver devices as well as the CPU
class Device {
    DISALLOW_COPY_AND_ASSIGN(Device);

   public:
    Device() = default;
    virtual ~Device() = default;

    // Introspection methods returning device information
    virtual const std::string& getName() const = 0;
    virtual const std::string& getVersionString() const = 0;
    virtual int64_t getFeatureLevel() const = 0;
    virtual int32_t getType() const = 0;
    virtual bool isUpdatable() const = 0;
    virtual const std::vector<Extension>& getSupportedExtensions() const = 0;

    // See the MetaModel class in MetaModel.h for more details.
    virtual std::vector<bool> getSupportedOperations(const MetaModel& metaModel) const = 0;

    virtual const Capabilities& getCapabilities() const = 0;
    virtual Capabilities::PerformanceInfo getPerformance(OperandType type) const = 0;
    virtual Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceScalar() const = 0;
    virtual Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceTensor() const = 0;
    virtual Capabilities::PerformanceInfo getIfPerformance() const = 0;
    virtual Capabilities::PerformanceInfo getWhilePerformance() const = 0;
    virtual std::pair<uint32_t, uint32_t> getNumberOfCacheFilesNeeded() const = 0;
    virtual bool isCachingSupported() const = 0;
    virtual int wait() const = 0;

    virtual std::pair<int, std::shared_ptr<RuntimePreparedModel>> prepareModel(
            const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,
            const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,
            const std::optional<CacheToken>& maybeToken) const = 0;

    // The caller is responsible for making sure the MemoryDescriptor only contains
    // PreparedModels from the same Device.
    virtual std::pair<int, std::unique_ptr<RuntimeMemory>> allocate(const MemoryDescriptor& desc,
                                                                    OperandType type) const = 0;
};

// Manages the NN HAL devices.  Only one instance of this class will exist.
// Use get() to retrieve it.
class DeviceManager {
   public:
    const std::vector<std::shared_ptr<Device>>& getDrivers() const {
        if (mSetCpuOnly || mDebugNNCpuOnly) {
            return mDevicesCpuOnly;
        }
        return mDevices;
    }

    // For testing only:
    void setUseCpuOnly(bool useCpuOnly) { mSetCpuOnly = useCpuOnly; }
    bool getUseCpuOnly() const { return mSetCpuOnly; }

    bool syncExecCpu() const { return mSyncExecCpu; }
    bool syncExecRuntime() const { return mSyncExecRuntime; }

    // How to handle graph partitioning?
    // 0 - Don't do graph partitioning.
    // 1 - Do graph partitioning; but fall back to non-partitioned
    //     execution if there is a partitioning failure.
    // 2 - Do graph partitioning, and rely on it; there is no fallback.
    enum { kPartitioningNo = 0, kPartitioningWithFallback = 1, kPartitioningWithoutFallback = 2 };
    uint32_t getPartitioning() const { return mPartitioning; }
    static bool partitioningAllowsFallback(uint32_t partitioning) {
        return partitioning == kPartitioningWithFallback;
    }

    bool strictSlicing() const { return mStrictSlicing; }

    // Returns the singleton manager.
    static DeviceManager* get();

    // Returns the singleton Cpu device.
    static std::shared_ptr<Device> getCpuDevice();

    // The forTest_* functions below are solely intended for use by unit tests.

    // Returns all devices (ignores the cpu-only flags).
    std::vector<std::shared_ptr<Device>> forTest_getDevices() const { return mDevices; }

    // Sets the device list (does not affect cpu-only queries).
    void forTest_setDevices(std::vector<std::shared_ptr<Device>> devices) {
        mDevices = std::move(devices);
    }

    // Register a test device.
    void forTest_registerDevice(const SharedDevice& device) { registerDevice(device); }

    // Re-initialize the list of available devices.
    void forTest_reInitializeDeviceList() {
        mDevices.clear();
        mDevicesCpuOnly.clear();
        findAvailableDevices();
    }

    // Make a test device
    static std::shared_ptr<Device> forTest_makeDriverDevice(const SharedDevice& device);

    bool forTest_isCpuDevice(const ANeuralNetworksDevice* device) const {
        return reinterpret_cast<const Device*>(device) == getCpuDevice().get();
    }

   private:
    // Builds the list of available drivers and queries their capabilities.
    DeviceManager();

    // Adds a device for the manager to use.
    void registerDevice(const SharedDevice& device);

    void findAvailableDevices();

    // List of all the devices we discovered (including CpuDevice).
    std::vector<std::shared_ptr<Device>> mDevices;

    // We set this one to have CpuDevice only. To be used when m*CpuOnly is true.
    std::vector<std::shared_ptr<Device>> mDevicesCpuOnly;

    // If either of these is true, we'll ignore the drivers that are
    // on the device and run everything on the CPU.
    bool mSetCpuOnly = false;      // set by setUseCpuOnly()
    bool mDebugNNCpuOnly = false;  // derived from system property debug.nn.cpuonly

    // synchronous execution
    bool mSyncExecCpu = true;
    bool mSyncExecRuntime = false;

    static const uint32_t kPartitioningDefault = kPartitioningWithFallback;
    uint32_t mPartitioning = kPartitioningDefault;

    bool mStrictSlicing = false;
};

std::vector<SharedDevice> getDevices();

}  // namespace nn
}  // namespace android

#endif  // ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MANAGER_H
v811_spc009_project 4 months ago			`/*`
			`* Copyright (C) 2017 The Android Open Source Project`
			`*`
			`* Licensed under the Apache License, Version 2.0 (the "License");`
			`* you may not use this file except in compliance with the License.`
			`* You may obtain a copy of the License at`
			`*`
			`* http://www.apache.org/licenses/LICENSE-2.0`
			`*`
			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an "AS IS" BASIS,`
			`* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`* See the License for the specific language governing permissions and`
			`* limitations under the License.`
			`*/`

			`#ifndef ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MANAGER_H`
			`#define ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MANAGER_H`

			`#include <LegacyUtils.h>`
			`#include <android-base/macros.h>`
			`#include <nnapi/IBurst.h>`
			`#include <nnapi/IDevice.h>`
			`#include <nnapi/Types.h>`

			`#include <map>`
			`#include <memory>`
			`#include <string>`
			`#include <tuple>`
			`#include <unordered_set>`
			`#include <utility>`
			`#include <vector>`

			`#include "ExecutionCallback.h"`
			`#include "Memory.h"`

			`namespace android {`
			`namespace nn {`

			`// Forward declaration`
			`class Device;`
			`class MetaModel;`
			`class ModelArgumentInfo;`

			`// A unified interface for a reusable execution with cached resources.`
			`// This object provides no thread-safety guarantee. The caller must guarantee there is at most one`
			`// call to RuntimeExecution::compute or RuntimeExecution::computeFenced on the same RuntimeExecution`
			`// object in flight at a time.`
			`class RuntimeExecution {`
			`DISALLOW_COPY_AND_ASSIGN(RuntimeExecution);`

			`public:`
			`RuntimeExecution() = default;`
			`virtual ~RuntimeExecution() = default;`

			`virtual std::tuple<int, std::vector<OutputShape>, Timing> compute(`
			`const SharedBurst& burstController, const OptionalTimePoint& deadline) const = 0;`

			`// The returned timing information is only valid if the callback is nullptr.`
			`// Returns error_code, sync_fence, callback and timing.`
			`virtual std::tuple<int, int, ExecuteFencedInfoCallback, Timing> computeFenced(`
			`const std::vector<int>& waitFor, const OptionalTimePoint& deadline,`
			`const OptionalDuration& timeoutDurationAfterFence) const = 0;`
			`};`

			`// A unified interface for actual driver prepared model as well as the CPU.`
			`class RuntimePreparedModel {`
			`DISALLOW_COPY_AND_ASSIGN(RuntimePreparedModel);`

			`public:`
			`RuntimePreparedModel() = default;`
			`virtual ~RuntimePreparedModel() = default;`

			`virtual const Device* getDevice() const = 0;`
			`virtual SharedPreparedModel getInterface() const = 0;`

			`// Perform computation with given input/output argument info and memory pools.`
			`virtual std::tuple<int, std::vector<OutputShape>, Timing> execute(`
			`const std::vector<ModelArgumentInfo>& inputs,`
			`const std::vector<ModelArgumentInfo>& outputs,`
			`const std::vector<const RuntimeMemory*>& memories, const SharedBurst& burstController,`
			`MeasureTiming measure, const OptionalTimePoint& deadline,`
			`const OptionalDuration& loopTimeoutDuration) const = 0;`

			`// Perform fenced computation with given input/output argument info and memory pools.`
			`// The returned timing information is only valid if the callback is nullptr.`
			`// Returns error_code, sync_fence, callback and timing.`
			`virtual std::tuple<int, int, ExecuteFencedInfoCallback, Timing> executeFenced(`
			`const std::vector<ModelArgumentInfo>& inputs,`
			`const std::vector<ModelArgumentInfo>& outputs,`
			`const std::vector<const RuntimeMemory*>& memories, const std::vector<int>& waitFor,`
			`MeasureTiming measure, const OptionalTimePoint& deadline,`
			`const OptionalDuration& loopTimeoutDuration,`
			`const OptionalDuration& timeoutDurationAfterFence) const = 0;`

			`// Create a reusable execution with given input/output argument info and memory pools.`
			`virtual std::pair<int, std::shared_ptr<RuntimeExecution>> createReusableExecution(`
			`const std::vector<ModelArgumentInfo>& inputs,`
			`const std::vector<ModelArgumentInfo>& outputs,`
			`const std::vector<const RuntimeMemory*>& memories, MeasureTiming measure,`
			`const OptionalDuration& loopTimeoutDuration) const = 0;`

			`virtual GeneralResult<SharedBurst> configureExecutionBurst() const = 0;`

			`virtual MemoryPreference getMemoryPreference() const = 0;`
			`};`

			`using ModelFactory = std::function<Model()>;`

			`struct CacheHandles {`
			`std::vector<SharedHandle> modelCache;`
			`std::vector<SharedHandle> dataCache;`
			`};`

			`using CacheDir = std::string;`

			`struct CacheInfo {`
			`std::variant<CacheDir, CacheHandles> variant;`
			`};`

			`// A unified interface for actual driver devices as well as the CPU`
			`class Device {`
			`DISALLOW_COPY_AND_ASSIGN(Device);`

			`public:`
			`Device() = default;`
			`virtual ~Device() = default;`

			`// Introspection methods returning device information`
			`virtual const std::string& getName() const = 0;`
			`virtual const std::string& getVersionString() const = 0;`
			`virtual int64_t getFeatureLevel() const = 0;`
			`virtual int32_t getType() const = 0;`
			`virtual bool isUpdatable() const = 0;`
			`virtual const std::vector<Extension>& getSupportedExtensions() const = 0;`

			`// See the MetaModel class in MetaModel.h for more details.`
			`virtual std::vector<bool> getSupportedOperations(const MetaModel& metaModel) const = 0;`

			`virtual const Capabilities& getCapabilities() const = 0;`
			`virtual Capabilities::PerformanceInfo getPerformance(OperandType type) const = 0;`
			`virtual Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceScalar() const = 0;`
			`virtual Capabilities::PerformanceInfo getRelaxedFloat32toFloat16PerformanceTensor() const = 0;`
			`virtual Capabilities::PerformanceInfo getIfPerformance() const = 0;`
			`virtual Capabilities::PerformanceInfo getWhilePerformance() const = 0;`
			`virtual std::pair<uint32_t, uint32_t> getNumberOfCacheFilesNeeded() const = 0;`
			`virtual bool isCachingSupported() const = 0;`
			`virtual int wait() const = 0;`

			`virtual std::pair<int, std::shared_ptr<RuntimePreparedModel>> prepareModel(`
			`const ModelFactory& makeModel, ExecutionPreference preference, Priority priority,`
			`const OptionalTimePoint& deadline, const CacheInfo& cacheInfo,`
			`const std::optional<CacheToken>& maybeToken) const = 0;`

			`// The caller is responsible for making sure the MemoryDescriptor only contains`
			`// PreparedModels from the same Device.`
			`virtual std::pair<int, std::unique_ptr<RuntimeMemory>> allocate(const MemoryDescriptor& desc,`
			`OperandType type) const = 0;`
			`};`

			`// Manages the NN HAL devices. Only one instance of this class will exist.`
			`// Use get() to retrieve it.`
			`class DeviceManager {`
			`public:`
			`const std::vector<std::shared_ptr<Device>>& getDrivers() const {`
			`if (mSetCpuOnly \|\| mDebugNNCpuOnly) {`
			`return mDevicesCpuOnly;`
			`}`
			`return mDevices;`
			`}`

			`// For testing only:`
			`void setUseCpuOnly(bool useCpuOnly) { mSetCpuOnly = useCpuOnly; }`
			`bool getUseCpuOnly() const { return mSetCpuOnly; }`

			`bool syncExecCpu() const { return mSyncExecCpu; }`
			`bool syncExecRuntime() const { return mSyncExecRuntime; }`

			`// How to handle graph partitioning?`
			`// 0 - Don't do graph partitioning.`
			`// 1 - Do graph partitioning; but fall back to non-partitioned`
			`// execution if there is a partitioning failure.`
			`// 2 - Do graph partitioning, and rely on it; there is no fallback.`
			`enum { kPartitioningNo = 0, kPartitioningWithFallback = 1, kPartitioningWithoutFallback = 2 };`
			`uint32_t getPartitioning() const { return mPartitioning; }`
			`static bool partitioningAllowsFallback(uint32_t partitioning) {`
			`return partitioning == kPartitioningWithFallback;`
			`}`

			`bool strictSlicing() const { return mStrictSlicing; }`

			`// Returns the singleton manager.`
			`static DeviceManager* get();`

			`// Returns the singleton Cpu device.`
			`static std::shared_ptr<Device> getCpuDevice();`

			`// The forTest_* functions below are solely intended for use by unit tests.`

			`// Returns all devices (ignores the cpu-only flags).`
			`std::vector<std::shared_ptr<Device>> forTest_getDevices() const { return mDevices; }`

			`// Sets the device list (does not affect cpu-only queries).`
			`void forTest_setDevices(std::vector<std::shared_ptr<Device>> devices) {`
			`mDevices = std::move(devices);`
			`}`

			`// Register a test device.`
			`void forTest_registerDevice(const SharedDevice& device) { registerDevice(device); }`

			`// Re-initialize the list of available devices.`
			`void forTest_reInitializeDeviceList() {`
			`mDevices.clear();`
			`mDevicesCpuOnly.clear();`
			`findAvailableDevices();`
			`}`

			`// Make a test device`
			`static std::shared_ptr<Device> forTest_makeDriverDevice(const SharedDevice& device);`

			`bool forTest_isCpuDevice(const ANeuralNetworksDevice* device) const {`
			`return reinterpret_cast<const Device*>(device) == getCpuDevice().get();`
			`}`

			`private:`
			`// Builds the list of available drivers and queries their capabilities.`
			`DeviceManager();`

			`// Adds a device for the manager to use.`
			`void registerDevice(const SharedDevice& device);`

			`void findAvailableDevices();`

			`// List of all the devices we discovered (including CpuDevice).`
			`std::vector<std::shared_ptr<Device>> mDevices;`

			`// We set this one to have CpuDevice only. To be used when m*CpuOnly is true.`
			`std::vector<std::shared_ptr<Device>> mDevicesCpuOnly;`

			`// If either of these is true, we'll ignore the drivers that are`
			`// on the device and run everything on the CPU.`
			`bool mSetCpuOnly = false; // set by setUseCpuOnly()`
			`bool mDebugNNCpuOnly = false; // derived from system property debug.nn.cpuonly`

			`// synchronous execution`
			`bool mSyncExecCpu = true;`
			`bool mSyncExecRuntime = false;`

			`static const uint32_t kPartitioningDefault = kPartitioningWithFallback;`
			`uint32_t mPartitioning = kPartitioningDefault;`

			`bool mStrictSlicing = false;`
			`};`

			`std::vector<SharedDevice> getDevices();`

			`} // namespace nn`
			`} // namespace android`

			`#endif // ANDROID_FRAMEWORKS_ML_NN_RUNTIME_MANAGER_H`