You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
346 lines
14 KiB
346 lines
14 KiB
/*
|
|
* Copyright (C) 2019 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#ifndef ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H
|
|
#define ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H
|
|
|
|
#include <android-base/macros.h>
|
|
#include <android/hardware/neuralnetworks/1.0/types.h>
|
|
#include <android/hardware/neuralnetworks/1.1/types.h>
|
|
#include <android/hardware/neuralnetworks/1.2/IBurstCallback.h>
|
|
#include <android/hardware/neuralnetworks/1.2/IBurstContext.h>
|
|
#include <android/hardware/neuralnetworks/1.2/IPreparedModel.h>
|
|
#include <android/hardware/neuralnetworks/1.2/types.h>
|
|
#include <fmq/MessageQueue.h>
|
|
#include <hidl/MQDescriptor.h>
|
|
|
|
#include <atomic>
|
|
#include <chrono>
|
|
#include <map>
|
|
#include <memory>
|
|
#include <mutex>
|
|
#include <stack>
|
|
#include <tuple>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
namespace android::nn {
|
|
|
|
/**
|
|
* Number of elements in the FMQ.
|
|
*/
|
|
constexpr const size_t kExecutionBurstChannelLength = 1024;
|
|
|
|
/**
|
|
* Function to serialize a request.
|
|
*
|
|
* Prefer calling RequestChannelSender::send.
|
|
*
|
|
* @param request Request object without the pool information.
|
|
* @param measure Whether to collect timing information for the execution.
|
|
* @param memoryIds Slot identifiers corresponding to memory resources for the
|
|
* request.
|
|
* @return Serialized FMQ request data.
|
|
*/
|
|
std::vector<hardware::neuralnetworks::V1_2::FmqRequestDatum> serialize(
|
|
const hardware::neuralnetworks::V1_0::Request& request,
|
|
hardware::neuralnetworks::V1_2::MeasureTiming measure, const std::vector<int32_t>& slots);
|
|
|
|
/**
|
|
* Deserialize the FMQ result data.
|
|
*
|
|
* The three resulting fields are the status of the execution, the dynamic
|
|
* shapes of the output tensors, and the timing information of the execution.
|
|
*
|
|
* @param data Serialized FMQ result data.
|
|
* @return Result object if successfully deserialized, std::nullopt otherwise.
|
|
*/
|
|
std::optional<std::tuple<hardware::neuralnetworks::V1_0::ErrorStatus,
|
|
std::vector<hardware::neuralnetworks::V1_2::OutputShape>,
|
|
hardware::neuralnetworks::V1_2::Timing>>
|
|
deserialize(const std::vector<hardware::neuralnetworks::V1_2::FmqResultDatum>& data);
|
|
|
|
/**
|
|
* Convert result code to error status.
|
|
*
|
|
* @param resultCode Result code to be converted.
|
|
* @return ErrorStatus Resultant error status.
|
|
*/
|
|
hardware::neuralnetworks::V1_0::ErrorStatus legacyConvertResultCodeToErrorStatus(int resultCode);
|
|
|
|
/**
|
|
* ResultChannelReceiver is responsible for waiting on the channel until the
|
|
* packet is available, extracting the packet from the channel, and
|
|
* deserializing the packet.
|
|
*
|
|
* Because the receiver can wait on a packet that may never come (e.g., because
|
|
* the sending side of the packet has been closed), this object can be
|
|
* invalidated, unblocking the receiver.
|
|
*/
|
|
class ResultChannelReceiver {
|
|
using FmqResultDescriptor =
|
|
hardware::MQDescriptorSync<hardware::neuralnetworks::V1_2::FmqResultDatum>;
|
|
using FmqResultChannel = hardware::MessageQueue<hardware::neuralnetworks::V1_2::FmqResultDatum,
|
|
hardware::kSynchronizedReadWrite>;
|
|
|
|
public:
|
|
/**
|
|
* Create the receiving end of a result channel.
|
|
*
|
|
* Prefer this call over the constructor.
|
|
*
|
|
* @param channelLength Number of elements in the FMQ.
|
|
* @param pollingTimeWindow How much time (in microseconds) the
|
|
* ResultChannelReceiver is allowed to poll the FMQ before waiting on
|
|
* the blocking futex. Polling may result in lower latencies at the
|
|
* potential cost of more power usage.
|
|
* @return A pair of ResultChannelReceiver and the FMQ descriptor on
|
|
* successful creation, both nullptr otherwise.
|
|
*/
|
|
static std::pair<std::unique_ptr<ResultChannelReceiver>, const FmqResultDescriptor*> create(
|
|
size_t channelLength, std::chrono::microseconds pollingTimeWindow);
|
|
|
|
/**
|
|
* Get the result from the channel.
|
|
*
|
|
* This method will block until either:
|
|
* 1) The packet has been retrieved, or
|
|
* 2) The receiver has been invalidated
|
|
*
|
|
* @return Result object if successfully received, std::nullopt if error or
|
|
* if the receiver object was invalidated.
|
|
*/
|
|
std::optional<std::tuple<hardware::neuralnetworks::V1_0::ErrorStatus,
|
|
std::vector<hardware::neuralnetworks::V1_2::OutputShape>,
|
|
hardware::neuralnetworks::V1_2::Timing>>
|
|
getBlocking();
|
|
|
|
/**
|
|
* Method to mark the channel as invalid, unblocking any current or future
|
|
* calls to ResultChannelReceiver::getBlocking.
|
|
*/
|
|
void invalidate();
|
|
|
|
// prefer calling ResultChannelReceiver::getBlocking
|
|
std::optional<std::vector<hardware::neuralnetworks::V1_2::FmqResultDatum>> getPacketBlocking();
|
|
|
|
ResultChannelReceiver(std::unique_ptr<FmqResultChannel> fmqResultChannel,
|
|
std::chrono::microseconds pollingTimeWindow);
|
|
|
|
private:
|
|
const std::unique_ptr<FmqResultChannel> mFmqResultChannel;
|
|
std::atomic<bool> mValid{true};
|
|
const std::chrono::microseconds kPollingTimeWindow;
|
|
};
|
|
|
|
/**
|
|
* RequestChannelSender is responsible for serializing the result packet of
|
|
* information, sending it on the result channel, and signaling that the data is
|
|
* available.
|
|
*/
|
|
class RequestChannelSender {
|
|
using FmqRequestDescriptor =
|
|
hardware::MQDescriptorSync<hardware::neuralnetworks::V1_2::FmqRequestDatum>;
|
|
using FmqRequestChannel =
|
|
hardware::MessageQueue<hardware::neuralnetworks::V1_2::FmqRequestDatum,
|
|
hardware::kSynchronizedReadWrite>;
|
|
|
|
public:
|
|
/**
|
|
* Create the sending end of a request channel.
|
|
*
|
|
* Prefer this call over the constructor.
|
|
*
|
|
* @param channelLength Number of elements in the FMQ.
|
|
* @return A pair of ResultChannelReceiver and the FMQ descriptor on
|
|
* successful creation, both nullptr otherwise.
|
|
*/
|
|
static std::pair<std::unique_ptr<RequestChannelSender>, const FmqRequestDescriptor*> create(
|
|
size_t channelLength);
|
|
|
|
/**
|
|
* Send the request to the channel.
|
|
*
|
|
* @param request Request object without the pool information.
|
|
* @param measure Whether to collect timing information for the execution.
|
|
* @param memoryIds Slot identifiers corresponding to memory resources for
|
|
* the request.
|
|
* @return 'true' on successful send, 'false' otherwise.
|
|
*/
|
|
bool send(const hardware::neuralnetworks::V1_0::Request& request,
|
|
hardware::neuralnetworks::V1_2::MeasureTiming measure,
|
|
const std::vector<int32_t>& slots);
|
|
|
|
/**
|
|
* Method to mark the channel as invalid, causing all future calls to
|
|
* RequestChannelSender::send to immediately return false without attempting
|
|
* to send a message across the FMQ.
|
|
*/
|
|
void invalidate();
|
|
|
|
// prefer calling RequestChannelSender::send
|
|
bool sendPacket(const std::vector<hardware::neuralnetworks::V1_2::FmqRequestDatum>& packet);
|
|
|
|
RequestChannelSender(std::unique_ptr<FmqRequestChannel> fmqRequestChannel);
|
|
|
|
private:
|
|
const std::unique_ptr<FmqRequestChannel> mFmqRequestChannel;
|
|
std::atomic<bool> mValid{true};
|
|
};
|
|
|
|
/**
|
|
* The ExecutionBurstController class manages both the serialization and
|
|
* deserialization of data across FMQ, making it appear to the runtime as a
|
|
* regular synchronous inference. Additionally, this class manages the burst's
|
|
* memory cache.
|
|
*/
|
|
class ExecutionBurstController {
|
|
DISALLOW_IMPLICIT_CONSTRUCTORS(ExecutionBurstController);
|
|
|
|
public:
|
|
/**
|
|
* NN runtime burst callback object and memory cache.
|
|
*
|
|
* ExecutionBurstCallback associates a hidl_memory object with a slot number
|
|
* to be passed across FMQ. The ExecutionBurstServer can use this callback
|
|
* to retrieve this hidl_memory corresponding to the slot via HIDL.
|
|
*
|
|
* Whenever a hidl_memory object is copied, it will duplicate the underlying
|
|
* file descriptor. Because the NN runtime currently copies the hidl_memory
|
|
* on each execution, it is difficult to associate hidl_memory objects with
|
|
* previously cached hidl_memory objects. For this reason, callers of this
|
|
* class must pair each hidl_memory object with an associated key. For
|
|
* efficiency, if two hidl_memory objects represent the same underlying
|
|
* buffer, they must use the same key.
|
|
*/
|
|
class ExecutionBurstCallback : public hardware::neuralnetworks::V1_2::IBurstCallback {
|
|
DISALLOW_COPY_AND_ASSIGN(ExecutionBurstCallback);
|
|
|
|
public:
|
|
ExecutionBurstCallback() = default;
|
|
|
|
hardware::Return<void> getMemories(const hardware::hidl_vec<int32_t>& slots,
|
|
getMemories_cb cb) override;
|
|
|
|
/**
|
|
* This function performs one of two different actions:
|
|
* 1) If a key corresponding to a memory resource is unrecognized by the
|
|
* ExecutionBurstCallback object, the ExecutionBurstCallback object
|
|
* will allocate a slot, bind the memory to the slot, and return the
|
|
* slot identifier.
|
|
* 2) If a key corresponding to a memory resource is recognized by the
|
|
* ExecutionBurstCallback object, the ExecutionBurstCallback object
|
|
* will return the existing slot identifier.
|
|
*
|
|
* @param memories Memory resources used in an inference.
|
|
* @param keys Unique identifiers where each element corresponds to a
|
|
* memory resource element in "memories".
|
|
* @return Unique slot identifiers where each returned slot element
|
|
* corresponds to a memory resource element in "memories".
|
|
*/
|
|
std::vector<int32_t> getSlots(const hardware::hidl_vec<hardware::hidl_memory>& memories,
|
|
const std::vector<intptr_t>& keys);
|
|
|
|
/*
|
|
* This function performs two different actions:
|
|
* 1) Removes an entry from the cache (if present), including the local
|
|
* storage of the hidl_memory object. Note that this call does not
|
|
* free any corresponding hidl_memory object in ExecutionBurstServer,
|
|
* which is separately freed via IBurstContext::freeMemory.
|
|
* 2) Return whether a cache entry was removed and which slot was removed if
|
|
* found. If the key did not to correspond to any entry in the cache, a
|
|
* slot number of 0 is returned. The slot number and whether the entry
|
|
* existed is useful so the same slot can be freed in the
|
|
* ExecutionBurstServer's cache via IBurstContext::freeMemory.
|
|
*/
|
|
std::pair<bool, int32_t> freeMemory(intptr_t key);
|
|
|
|
private:
|
|
int32_t getSlotLocked(const hardware::hidl_memory& memory, intptr_t key);
|
|
int32_t allocateSlotLocked();
|
|
|
|
std::mutex mMutex;
|
|
std::stack<int32_t, std::vector<int32_t>> mFreeSlots;
|
|
std::map<intptr_t, int32_t> mMemoryIdToSlot;
|
|
std::vector<hardware::hidl_memory> mMemoryCache;
|
|
};
|
|
|
|
/**
|
|
* Creates a burst controller on a prepared model.
|
|
*
|
|
* Prefer this over ExecutionBurstController's constructor.
|
|
*
|
|
* @param preparedModel Model prepared for execution to execute on.
|
|
* @param pollingTimeWindow How much time (in microseconds) the
|
|
* ExecutionBurstController is allowed to poll the FMQ before waiting on
|
|
* the blocking futex. Polling may result in lower latencies at the
|
|
* potential cost of more power usage.
|
|
* @return ExecutionBurstController Execution burst controller object.
|
|
*/
|
|
static std::unique_ptr<ExecutionBurstController> create(
|
|
const sp<hardware::neuralnetworks::V1_2::IPreparedModel>& preparedModel,
|
|
std::chrono::microseconds pollingTimeWindow);
|
|
|
|
// prefer calling ExecutionBurstController::create
|
|
ExecutionBurstController(const std::shared_ptr<RequestChannelSender>& requestChannelSender,
|
|
const std::shared_ptr<ResultChannelReceiver>& resultChannelReceiver,
|
|
const sp<hardware::neuralnetworks::V1_2::IBurstContext>& burstContext,
|
|
const sp<ExecutionBurstCallback>& callback,
|
|
const sp<hardware::hidl_death_recipient>& deathHandler = nullptr);
|
|
|
|
// explicit destructor to unregister the death recipient
|
|
~ExecutionBurstController();
|
|
|
|
/**
|
|
* Execute a request on a model.
|
|
*
|
|
* @param request Arguments to be executed on a model.
|
|
* @param measure Whether to collect timing measurements, either YES or NO
|
|
* @param memoryIds Identifiers corresponding to each memory object in the
|
|
* request's pools.
|
|
* @return A tuple of:
|
|
* - result code of the execution
|
|
* - dynamic output shapes from the execution
|
|
* - any execution time measurements of the execution
|
|
* - whether or not a failed burst execution should be re-run using a
|
|
* different path (e.g., IPreparedModel::executeSynchronously)
|
|
*/
|
|
std::tuple<int, std::vector<hardware::neuralnetworks::V1_2::OutputShape>,
|
|
hardware::neuralnetworks::V1_2::Timing, bool>
|
|
compute(const hardware::neuralnetworks::V1_0::Request& request,
|
|
hardware::neuralnetworks::V1_2::MeasureTiming measure,
|
|
const std::vector<intptr_t>& memoryIds);
|
|
|
|
/**
|
|
* Propagate a user's freeing of memory to the service.
|
|
*
|
|
* @param key Key corresponding to the memory object.
|
|
*/
|
|
void freeMemory(intptr_t key);
|
|
|
|
private:
|
|
std::mutex mMutex;
|
|
const std::shared_ptr<RequestChannelSender> mRequestChannelSender;
|
|
const std::shared_ptr<ResultChannelReceiver> mResultChannelReceiver;
|
|
const sp<hardware::neuralnetworks::V1_2::IBurstContext> mBurstContext;
|
|
const sp<ExecutionBurstCallback> mMemoryCache;
|
|
const sp<hardware::hidl_death_recipient> mDeathHandler;
|
|
};
|
|
|
|
} // namespace android::nn
|
|
|
|
#endif // ANDROID_FRAMEWORKS_ML_NN_COMMON_EXECUTION_BURST_CONTROLLER_H
|