You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
303 lines
12 KiB
303 lines
12 KiB
// Copyright 2020 The Chromium Authors. All rights reserved.
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
// found in the LICENSE file.
|
|
|
|
#ifndef CAST_STANDALONE_SENDER_STREAMING_VP8_ENCODER_H_
|
|
#define CAST_STANDALONE_SENDER_STREAMING_VP8_ENCODER_H_
|
|
|
|
#include <vpx/vpx_encoder.h>
|
|
#include <vpx/vpx_image.h>
|
|
|
|
#include <algorithm>
|
|
#include <condition_variable> // NOLINT
|
|
#include <functional>
|
|
#include <memory>
|
|
#include <mutex>
|
|
#include <queue>
|
|
#include <thread>
|
|
#include <vector>
|
|
|
|
#include "absl/base/thread_annotations.h"
|
|
#include "cast/streaming/frame_id.h"
|
|
#include "cast/streaming/rtp_time.h"
|
|
#include "platform/api/task_runner.h"
|
|
#include "platform/api/time.h"
|
|
|
|
namespace openscreen {
|
|
|
|
class TaskRunner;
|
|
|
|
namespace cast {
|
|
|
|
class Sender;
|
|
|
|
// Uses libvpx to encode VP8 video and streams it to a Sender. Includes
|
|
// extensive logic for fine-tuning the encoder parameters in real-time, to
|
|
// provide the best quality results given external, uncontrollable factors:
|
|
// CPU/network availability, and the complexity of the video frame content.
|
|
//
|
|
// Internally, a separate encode thread is created and used to prevent blocking
|
|
// the main thread while frames are being encoded. All public API methods are
|
|
// assumed to be called on the same sequence/thread as the main TaskRunner
|
|
// (injected via the constructor).
|
|
//
|
|
// Usage:
|
|
//
|
|
// 1. EncodeAndSend() is used to queue-up video frames for encoding and sending,
|
|
// which will be done on a best-effort basis.
|
|
//
|
|
// 2. The client is expected to call SetTargetBitrate() frequently based on its
|
|
// own bandwidth estimates and congestion control logic. In addition, a client
|
|
// may provide a callback for each frame's encode statistics, which can be used
|
|
// to further optimize the user experience. For example, the stats can be used
|
|
// as a signal to reduce the data volume (i.e., resolution and/or frame rate)
|
|
// coming from the video capture source.
|
|
class StreamingVp8Encoder {
|
|
public:
|
|
// Configurable parameters passed to the StreamingVp8Encoder constructor.
|
|
struct Parameters {
|
|
// Number of threads to parallelize frame encoding. This should be set based
|
|
// on the number of CPU cores available for encoding, but no more than 8.
|
|
int num_encode_threads =
|
|
std::min(std::max<int>(std::thread::hardware_concurrency(), 1), 8);
|
|
|
|
// Best-quality quantizer (lower is better quality). Range: [0,63]
|
|
int min_quantizer = 4;
|
|
|
|
// Worst-quality quantizer (lower is better quality). Range: [0,63]
|
|
int max_quantizer = 63;
|
|
|
|
// Worst-quality quantizer to use when the CPU is extremely constrained.
|
|
// Range: [min_quantizer,max_quantizer]
|
|
int max_cpu_saver_quantizer = 25;
|
|
|
|
// Maximum amount of wall-time a frame's encode can take, relative to the
|
|
// frame's duration, before the CPU-saver logic is activated. The default
|
|
// (70%) is appropriate for systems with four or more cores, but should be
|
|
// reduced (e.g., 50%) for systems with fewer than three cores.
|
|
//
|
|
// Example: For 30 FPS (continuous) video, the frame duration is ~33.3ms,
|
|
// and a value of 0.5 here would mean that the CPU-saver logic starts
|
|
// sacrificing quality when frame encodes start taking longer than ~16.7ms.
|
|
double max_time_utilization = 0.7;
|
|
};
|
|
|
|
// Represents an input VideoFrame, passed to EncodeAndSend().
|
|
struct VideoFrame {
|
|
// Image width and height.
|
|
int width;
|
|
int height;
|
|
|
|
// I420 format image pointers and row strides (the number of bytes between
|
|
// the start of successive rows). The pointers only need to remain valid
|
|
// until the EncodeAndSend() call returns.
|
|
const uint8_t* yuv_planes[3];
|
|
int yuv_strides[3];
|
|
|
|
// How long this frame will be held before the next frame will be displayed,
|
|
// or zero if unknown. The frame duration is passed to the VP8 codec,
|
|
// affecting a number of important behaviors, including: per-frame
|
|
// bandwidth, CPU time spent encoding, temporal quality trade-offs, and
|
|
// key/golden/alt-ref frame generation intervals.
|
|
Clock::duration duration;
|
|
};
|
|
|
|
// Performance statistics for a single frame's encode.
|
|
//
|
|
// For full details on how to use these stats in an end-to-end system, see:
|
|
// https://www.chromium.org/developers/design-documents/
|
|
// auto-throttled-screen-capture-and-mirroring
|
|
// and https://source.chromium.org/chromium/chromium/src/+/master:
|
|
// media/cast/sender/performance_metrics_overlay.h
|
|
struct Stats {
|
|
// The Cast Streaming ID that was assigned to the frame.
|
|
FrameId frame_id;
|
|
|
|
// The RTP timestamp of the frame.
|
|
RtpTimeTicks rtp_timestamp;
|
|
|
|
// How long the frame took to encode. This is wall time, not CPU time or
|
|
// some other load metric.
|
|
Clock::duration encode_wall_time;
|
|
|
|
// The frame's predicted duration; or, the actual duration if it was
|
|
// provided in the VideoFrame.
|
|
Clock::duration frame_duration;
|
|
|
|
// The encoded frame's size in bytes.
|
|
int encoded_size;
|
|
|
|
// The average size of an encoded frame in bytes, having this
|
|
// |frame_duration| and current target bitrate.
|
|
double target_size;
|
|
|
|
// The actual quantizer the VP8 encoder used, in the range [0,63].
|
|
int quantizer;
|
|
|
|
// The "hindsight" quantizer value that would have produced the best quality
|
|
// encoding of the frame at the current target bitrate. The nominal range is
|
|
// [0.0,63.0]. If it is larger than 63.0, then it was impossible for VP8 to
|
|
// encode the frame within the current target bitrate (e.g., too much
|
|
// "entropy" in the image, or too low a target bitrate).
|
|
double perfect_quantizer;
|
|
|
|
// Utilization feedback metrics. The nominal range for each of these is
|
|
// [0.0,1.0] where 1.0 means "the entire budget available for the frame was
|
|
// exhausted." Going above 1.0 is okay for one or a few frames, since it's
|
|
// the average over many frames that matters before the system is considered
|
|
// "redlining."
|
|
//
|
|
// The max of these three provides an overall utilization control signal.
|
|
// The usual approach is for upstream control logic to increase/decrease the
|
|
// data volume (e.g., video resolution and/or frame rate) to maintain a good
|
|
// target point.
|
|
double time_utilization() const {
|
|
return static_cast<double>(encode_wall_time.count()) /
|
|
frame_duration.count();
|
|
}
|
|
double space_utilization() const { return encoded_size / target_size; }
|
|
double entropy_utilization() const {
|
|
return perfect_quantizer / kMaxQuantizer;
|
|
}
|
|
};
|
|
|
|
StreamingVp8Encoder(const Parameters& params,
|
|
TaskRunner* task_runner,
|
|
Sender* sender);
|
|
|
|
~StreamingVp8Encoder();
|
|
|
|
// Get/Set the target bitrate. This may be changed at any time, as frequently
|
|
// as desired, and it will take effect internally as soon as possible.
|
|
int GetTargetBitrate() const;
|
|
void SetTargetBitrate(int new_bitrate);
|
|
|
|
// Encode |frame| using the VP8 encoder, assemble an EncodedFrame, and enqueue
|
|
// into the Sender. The frame may be dropped if too many frames are in-flight.
|
|
// If provided, the |stats_callback| is run after the frame is enqueued in the
|
|
// Sender (via the main TaskRunner).
|
|
void EncodeAndSend(const VideoFrame& frame,
|
|
Clock::time_point reference_time,
|
|
std::function<void(Stats)> stats_callback);
|
|
|
|
static constexpr int kMinQuantizer = 0;
|
|
static constexpr int kMaxQuantizer = 63;
|
|
|
|
private:
|
|
// Syntactic convenience to wrap the vpx_image_t alloc/free API in a smart
|
|
// pointer.
|
|
struct VpxImageDeleter {
|
|
void operator()(vpx_image_t* ptr) const { vpx_img_free(ptr); }
|
|
};
|
|
using VpxImageUniquePtr = std::unique_ptr<vpx_image_t, VpxImageDeleter>;
|
|
|
|
// Represents the state of one frame encode. This is created in
|
|
// EncodeAndSend(), and passed to the encode thread via the |encode_queue_|.
|
|
struct WorkUnit {
|
|
VpxImageUniquePtr image;
|
|
Clock::duration duration;
|
|
Clock::time_point reference_time;
|
|
RtpTimeTicks rtp_timestamp;
|
|
std::function<void(Stats)> stats_callback;
|
|
};
|
|
|
|
// Same as WorkUnit, but with additional fields to carry the encode results.
|
|
struct WorkUnitWithResults : public WorkUnit {
|
|
std::vector<uint8_t> payload;
|
|
bool is_key_frame;
|
|
Stats stats;
|
|
};
|
|
|
|
bool is_encoder_initialized() const { return config_.g_threads != 0; }
|
|
|
|
// Destroys the VP8 encoder context if it has been initialized.
|
|
void DestroyEncoder();
|
|
|
|
// The procedure for the |encode_thread_| that loops, processing work units
|
|
// from the |encode_queue_| by calling Encode() until it's time to end the
|
|
// thread.
|
|
void ProcessWorkUnitsUntilTimeToQuit();
|
|
|
|
// If the |encoder_| is live, attempt reconfiguration to allow it to encode
|
|
// frames at a new frame size, target bitrate, or "CPU encoding speed." If
|
|
// reconfiguration is not possible, destroy the existing instance and
|
|
// re-create a new |encoder_| instance.
|
|
void PrepareEncoder(int width, int height, int target_bitrate);
|
|
|
|
// Wraps the complex libvpx vpx_codec_encode() call using inputs from
|
|
// |work_unit| and populating results there.
|
|
void EncodeFrame(bool force_key_frame, WorkUnitWithResults* work_unit);
|
|
|
|
// Computes and populates |work_unit.stats| after the last call to
|
|
// EncodeFrame().
|
|
void ComputeFrameEncodeStats(Clock::duration encode_wall_time,
|
|
int target_bitrate,
|
|
WorkUnitWithResults* work_unit);
|
|
|
|
// Updates the |ideal_speed_setting_|, to take effect with the next frame
|
|
// encode, based on the given performance |stats|.
|
|
void UpdateSpeedSettingForNextFrame(const Stats& stats);
|
|
|
|
// Assembles and enqueues an EncodedFrame with the Sender on the main thread.
|
|
void SendEncodedFrame(WorkUnitWithResults results);
|
|
|
|
// Allocates a vpx_image_t and copies the content from |frame| to it.
|
|
static VpxImageUniquePtr CloneAsVpxImage(const VideoFrame& frame);
|
|
|
|
const Parameters params_;
|
|
TaskRunner* const main_task_runner_;
|
|
Sender* const sender_;
|
|
|
|
// The reference time of the first frame passed to EncodeAndSend().
|
|
Clock::time_point start_time_ = Clock::time_point::min();
|
|
|
|
// The RTP timestamp of the last frame that was pushed into the
|
|
// |encode_queue_| by EncodeAndSend(). This is used to check whether
|
|
// timestamps are monotonically increasing.
|
|
RtpTimeTicks last_enqueued_rtp_timestamp_;
|
|
|
|
// Guards a few members shared by both the main and encode threads.
|
|
std::mutex mutex_;
|
|
|
|
// Used by the encode thread to sleep until more work is available.
|
|
std::condition_variable cv_ ABSL_GUARDED_BY(mutex_);
|
|
|
|
// These encode parameters not passed in the WorkUnit struct because it is
|
|
// desirable for them to be applied as soon as possible, with the very next
|
|
// WorkUnit popped from the |encode_queue_| on the encode thread, and not to
|
|
// wait until some later WorkUnit is processed.
|
|
bool needs_key_frame_ ABSL_GUARDED_BY(mutex_) = true;
|
|
int target_bitrate_ ABSL_GUARDED_BY(mutex_) = 2 << 20; // Default: 2 Mbps.
|
|
|
|
// The queue of frame encodes. The size of this queue is implicitly bounded by
|
|
// EncodeAndSend(), where it checks for the total in-flight media duration and
|
|
// maybe drops a frame.
|
|
std::queue<WorkUnit> encode_queue_ ABSL_GUARDED_BY(mutex_);
|
|
|
|
// Current VP8 encoder configuration. Most of the fields are unchanging, and
|
|
// are populated in the ctor; but thereafter, only the encode thread accesses
|
|
// this struct.
|
|
//
|
|
// The speed setting is controlled via a separate libvpx API (see members
|
|
// below).
|
|
vpx_codec_enc_cfg_t config_{};
|
|
|
|
// These represent the magnitude of the VP8 speed setting, where larger values
|
|
// (i.e., faster speed) request less CPU usage but will provide lower video
|
|
// quality. Only the encode thread accesses these.
|
|
double ideal_speed_setting_; // A time-weighted average, from measurements.
|
|
int current_speed_setting_; // Current |encoder_| speed setting.
|
|
|
|
// libvpx VP8 encoder instance. Only the encode thread accesses this.
|
|
vpx_codec_ctx_t encoder_;
|
|
|
|
// This member should be last in the class since the thread should not start
|
|
// until all above members have been initialized by the constructor.
|
|
std::thread encode_thread_;
|
|
};
|
|
|
|
} // namespace cast
|
|
} // namespace openscreen
|
|
|
|
#endif // CAST_STANDALONE_SENDER_STREAMING_VP8_ENCODER_H_
|