You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

492 lines
18 KiB

// Copyright 2020 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "cast/standalone_sender/streaming_vp8_encoder.h"
#include <stdint.h>
#include <string.h>
#include <vpx/vp8cx.h>
#include <chrono>
#include <cmath>
#include <utility>
#include "cast/streaming/encoded_frame.h"
#include "cast/streaming/environment.h"
#include "cast/streaming/sender.h"
#include "util/chrono_helpers.h"
#include "util/osp_logging.h"
#include "util/saturate_cast.h"
namespace openscreen {
namespace cast {
// TODO(https://crbug.com/openscreen/123): Fix the declarations and then remove
// this:
using openscreen::operator<<; // For std::chrono::duration pretty-printing.
namespace {
constexpr int kBytesPerKilobyte = 1024;
// Lower and upper bounds to the frame duration passed to vpx_codec_encode(), to
// ensure sanity. Note that the upper-bound is especially important in cases
// where the video paused for some lengthy amount of time.
constexpr Clock::duration kMinFrameDuration = milliseconds(1);
constexpr Clock::duration kMaxFrameDuration = milliseconds(125);
// Highest/lowest allowed encoding speed set to the encoder. The valid range is
// [4, 16], but experiments show that with speed higher than 12, the saving of
// the encoding time is not worth the dropping of the quality. And, with speed
// lower than 6, the increasing amount of quality is not worth the increasing
// amount of encoding time.
constexpr int kHighestEncodingSpeed = 12;
constexpr int kLowestEncodingSpeed = 6;
// This is the equivalent change in encoding speed per one quantizer step.
constexpr double kEquivalentEncodingSpeedStepPerQuantizerStep = 1 / 20.0;
} // namespace
StreamingVp8Encoder::StreamingVp8Encoder(const Parameters& params,
TaskRunner* task_runner,
Sender* sender)
: params_(params),
main_task_runner_(task_runner),
sender_(sender),
ideal_speed_setting_(kHighestEncodingSpeed),
encode_thread_([this] { ProcessWorkUnitsUntilTimeToQuit(); }) {
OSP_DCHECK_LE(1, params_.num_encode_threads);
OSP_DCHECK_LE(kMinQuantizer, params_.min_quantizer);
OSP_DCHECK_LE(params_.min_quantizer, params_.max_cpu_saver_quantizer);
OSP_DCHECK_LE(params_.max_cpu_saver_quantizer, params_.max_quantizer);
OSP_DCHECK_LE(params_.max_quantizer, kMaxQuantizer);
OSP_DCHECK_LT(0.0, params_.max_time_utilization);
OSP_DCHECK_LE(params_.max_time_utilization, 1.0);
OSP_DCHECK(main_task_runner_);
OSP_DCHECK(sender_);
const auto result =
vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &config_, 0);
OSP_CHECK_EQ(result, VPX_CODEC_OK);
// This is set to non-zero in ConfigureForNewFrameSize() later, to flag that
// the encoder has been initialized.
config_.g_threads = 0;
// Set the timebase to match that of openscreen::Clock::duration.
config_.g_timebase.num = Clock::duration::period::num;
config_.g_timebase.den = Clock::duration::period::den;
// |g_pass| and |g_lag_in_frames| must be "one pass" and zero, respectively,
// because of the way the libvpx API is used.
config_.g_pass = VPX_RC_ONE_PASS;
config_.g_lag_in_frames = 0;
// Rate control settings.
config_.rc_dropframe_thresh = 0; // The encoder may not drop any frames.
config_.rc_resize_allowed = 0;
config_.rc_end_usage = VPX_CBR;
config_.rc_target_bitrate = target_bitrate_ / kBytesPerKilobyte;
config_.rc_min_quantizer = params_.min_quantizer;
config_.rc_max_quantizer = params_.max_quantizer;
// The reasons for the values chosen here (rc_*shoot_pct and rc_buf_*_sz) are
// lost in history. They were brought-over from the legacy Chrome Cast
// Streaming Sender implemenation.
config_.rc_undershoot_pct = 100;
config_.rc_overshoot_pct = 15;
config_.rc_buf_initial_sz = 500;
config_.rc_buf_optimal_sz = 600;
config_.rc_buf_sz = 1000;
config_.kf_mode = VPX_KF_DISABLED;
}
StreamingVp8Encoder::~StreamingVp8Encoder() {
{
std::unique_lock<std::mutex> lock(mutex_);
target_bitrate_ = 0;
cv_.notify_one();
}
encode_thread_.join();
}
int StreamingVp8Encoder::GetTargetBitrate() const {
// Note: No need to lock the |mutex_| since this method should be called on
// the same thread as SetTargetBitrate().
return target_bitrate_;
}
void StreamingVp8Encoder::SetTargetBitrate(int new_bitrate) {
// Ensure that, when bps is converted to kbps downstream, that the encoder
// bitrate will not be zero.
new_bitrate = std::max(new_bitrate, kBytesPerKilobyte);
std::unique_lock<std::mutex> lock(mutex_);
// Only assign the new target bitrate if |target_bitrate_| has not yet been
// used to signal the |encode_thread_| to end.
if (target_bitrate_ > 0) {
target_bitrate_ = new_bitrate;
}
}
void StreamingVp8Encoder::EncodeAndSend(
const VideoFrame& frame,
Clock::time_point reference_time,
std::function<void(Stats)> stats_callback) {
WorkUnit work_unit;
// TODO(miu): The |VideoFrame| struct should provide the media timestamp,
// instead of this code inferring it from the reference timestamps, since: 1)
// the video capturer's clock may tick at a different rate than the system
// clock; and 2) to reduce jitter.
if (start_time_ == Clock::time_point::min()) {
start_time_ = reference_time;
work_unit.rtp_timestamp = RtpTimeTicks();
} else {
work_unit.rtp_timestamp = RtpTimeTicks::FromTimeSinceOrigin(
reference_time - start_time_, sender_->rtp_timebase());
if (work_unit.rtp_timestamp <= last_enqueued_rtp_timestamp_) {
OSP_LOG_WARN << "VIDEO[" << sender_->ssrc()
<< "] Dropping: RTP timestamp is not monotonically "
"increasing from last frame.";
return;
}
}
if (sender_->GetInFlightMediaDuration(work_unit.rtp_timestamp) >
sender_->GetMaxInFlightMediaDuration()) {
OSP_LOG_WARN << "VIDEO[" << sender_->ssrc()
<< "] Dropping: In-flight media duration would be too high.";
return;
}
Clock::duration frame_duration = frame.duration;
if (frame_duration <= Clock::duration::zero()) {
// The caller did not provide the frame duration in |frame|.
if (reference_time == start_time_) {
// Use the max for the first frame so libvpx will spend extra effort on
// its quality.
frame_duration = kMaxFrameDuration;
} else {
// Use the actual amount of time between the current and previous frame as
// a prediction for the next frame's duration.
frame_duration =
(work_unit.rtp_timestamp - last_enqueued_rtp_timestamp_)
.ToDuration<Clock::duration>(sender_->rtp_timebase());
}
}
work_unit.duration =
std::max(std::min(frame_duration, kMaxFrameDuration), kMinFrameDuration);
last_enqueued_rtp_timestamp_ = work_unit.rtp_timestamp;
work_unit.image = CloneAsVpxImage(frame);
work_unit.reference_time = reference_time;
work_unit.stats_callback = std::move(stats_callback);
const bool force_key_frame = sender_->NeedsKeyFrame();
{
std::unique_lock<std::mutex> lock(mutex_);
needs_key_frame_ |= force_key_frame;
encode_queue_.push(std::move(work_unit));
cv_.notify_one();
}
}
void StreamingVp8Encoder::DestroyEncoder() {
OSP_DCHECK_EQ(std::this_thread::get_id(), encode_thread_.get_id());
if (is_encoder_initialized()) {
vpx_codec_destroy(&encoder_);
// Flag that the encoder is not initialized. See header comments for
// is_encoder_initialized().
config_.g_threads = 0;
}
}
void StreamingVp8Encoder::ProcessWorkUnitsUntilTimeToQuit() {
OSP_DCHECK_EQ(std::this_thread::get_id(), encode_thread_.get_id());
for (;;) {
WorkUnitWithResults work_unit{};
bool force_key_frame;
int target_bitrate;
{
std::unique_lock<std::mutex> lock(mutex_);
if (target_bitrate_ <= 0) {
break; // Time to end this thread.
}
if (encode_queue_.empty()) {
cv_.wait(lock);
if (encode_queue_.empty()) {
continue;
}
}
static_cast<WorkUnit&>(work_unit) = std::move(encode_queue_.front());
encode_queue_.pop();
force_key_frame = needs_key_frame_;
needs_key_frame_ = false;
target_bitrate = target_bitrate_;
}
// Clock::now() is being called directly, instead of using a
// dependency-injected "now function," since actual wall time is being
// measured.
const Clock::time_point encode_start_time = Clock::now();
PrepareEncoder(work_unit.image->d_w, work_unit.image->d_h, target_bitrate);
EncodeFrame(force_key_frame, &work_unit);
ComputeFrameEncodeStats(Clock::now() - encode_start_time, target_bitrate,
&work_unit);
UpdateSpeedSettingForNextFrame(work_unit.stats);
main_task_runner_->PostTask(
[this, results = std::move(work_unit)]() mutable {
SendEncodedFrame(std::move(results));
});
}
DestroyEncoder();
}
void StreamingVp8Encoder::PrepareEncoder(int width,
int height,
int target_bitrate) {
OSP_DCHECK_EQ(std::this_thread::get_id(), encode_thread_.get_id());
const int target_kbps = target_bitrate / kBytesPerKilobyte;
// Translate the |ideal_speed_setting_| into the VP8E_SET_CPUUSED setting and
// the minimum quantizer to use.
int speed;
int min_quantizer;
if (ideal_speed_setting_ > kHighestEncodingSpeed) {
speed = kHighestEncodingSpeed;
const double remainder = ideal_speed_setting_ - speed;
min_quantizer = rounded_saturate_cast<int>(
remainder / kEquivalentEncodingSpeedStepPerQuantizerStep +
params_.min_quantizer);
min_quantizer = std::min(min_quantizer, params_.max_cpu_saver_quantizer);
} else {
speed = std::max(rounded_saturate_cast<int>(ideal_speed_setting_),
kLowestEncodingSpeed);
min_quantizer = params_.min_quantizer;
}
if (static_cast<int>(config_.g_w) != width ||
static_cast<int>(config_.g_h) != height) {
DestroyEncoder();
}
if (!is_encoder_initialized()) {
config_.g_threads = params_.num_encode_threads;
config_.g_w = width;
config_.g_h = height;
config_.rc_target_bitrate = target_kbps;
config_.rc_min_quantizer = min_quantizer;
encoder_ = {};
const vpx_codec_flags_t flags = 0;
const auto init_result =
vpx_codec_enc_init(&encoder_, vpx_codec_vp8_cx(), &config_, flags);
OSP_CHECK_EQ(init_result, VPX_CODEC_OK);
// Raise the threshold for considering macroblocks as static. The default is
// zero, so this setting makes the encoder less sensitive to motion. This
// lowers the probability of needing to utilize more CPU to search for
// motion vectors.
const auto ctl_result =
vpx_codec_control(&encoder_, VP8E_SET_STATIC_THRESHOLD, 1);
OSP_CHECK_EQ(ctl_result, VPX_CODEC_OK);
// Ensure the speed will be set (below).
current_speed_setting_ = ~speed;
} else if (static_cast<int>(config_.rc_target_bitrate) != target_kbps ||
static_cast<int>(config_.rc_min_quantizer) != min_quantizer) {
config_.rc_target_bitrate = target_kbps;
config_.rc_min_quantizer = min_quantizer;
const auto update_config_result =
vpx_codec_enc_config_set(&encoder_, &config_);
OSP_CHECK_EQ(update_config_result, VPX_CODEC_OK);
}
if (current_speed_setting_ != speed) {
// Pass the |speed| as a negative value to turn off VP8's automatic speed
// selection logic and force the exact setting.
const auto ctl_result =
vpx_codec_control(&encoder_, VP8E_SET_CPUUSED, -speed);
OSP_CHECK_EQ(ctl_result, VPX_CODEC_OK);
current_speed_setting_ = speed;
}
}
void StreamingVp8Encoder::EncodeFrame(bool force_key_frame,
WorkUnitWithResults* work_unit) {
OSP_DCHECK_EQ(std::this_thread::get_id(), encode_thread_.get_id());
// The presentation timestamp argument here is fixed to zero to force the
// encoder to base its single-frame bandwidth calculations entirely on
// |frame_duration| and the target bitrate setting.
const vpx_codec_pts_t pts = 0;
const vpx_enc_frame_flags_t flags = force_key_frame ? VPX_EFLAG_FORCE_KF : 0;
const auto encode_result =
vpx_codec_encode(&encoder_, work_unit->image.get(), pts,
work_unit->duration.count(), flags, VPX_DL_REALTIME);
OSP_CHECK_EQ(encode_result, VPX_CODEC_OK);
const vpx_codec_cx_pkt_t* pkt;
for (vpx_codec_iter_t iter = nullptr;;) {
pkt = vpx_codec_get_cx_data(&encoder_, &iter);
// vpx_codec_get_cx_data() returns null once the "iteration" is complete.
// However, that point should never be reached because a
// VPX_CODEC_CX_FRAME_PKT must be encountered before that.
OSP_CHECK(pkt);
if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) {
break;
}
}
// A copy of the payload data is being made here. That's okay since it has to
// be copied at some point anyway, to be passed back to the main thread.
auto* const begin = static_cast<const uint8_t*>(pkt->data.frame.buf);
auto* const end = begin + pkt->data.frame.sz;
work_unit->payload.assign(begin, end);
work_unit->is_key_frame = !!(pkt->data.frame.flags & VPX_FRAME_IS_KEY);
}
void StreamingVp8Encoder::ComputeFrameEncodeStats(
Clock::duration encode_wall_time,
int target_bitrate,
WorkUnitWithResults* work_unit) {
OSP_DCHECK_EQ(std::this_thread::get_id(), encode_thread_.get_id());
Stats& stats = work_unit->stats;
// Note: stats.frame_id is set later, in SendEncodedFrame().
stats.rtp_timestamp = work_unit->rtp_timestamp;
stats.encode_wall_time = encode_wall_time;
stats.frame_duration = work_unit->duration;
stats.encoded_size = work_unit->payload.size();
constexpr double kBytesPerBit = 1.0 / CHAR_BIT;
constexpr double kSecondsPerClockTick =
1.0 / Clock::to_duration(seconds(1)).count();
const double target_bytes_per_clock_tick =
target_bitrate * (kBytesPerBit * kSecondsPerClockTick);
stats.target_size = target_bytes_per_clock_tick * work_unit->duration.count();
// The quantizer the encoder used. This is the result of the VP8 encoder
// taking a guess at what quantizer value would produce an encoded frame size
// as close to the target as possible.
const auto get_quantizer_result = vpx_codec_control(
&encoder_, VP8E_GET_LAST_QUANTIZER_64, &stats.quantizer);
OSP_CHECK_EQ(get_quantizer_result, VPX_CODEC_OK);
// Now that the frame has been encoded and the number of bytes is known, the
// perfect quantizer value (i.e., the one that should have been used) can be
// determined.
stats.perfect_quantizer = stats.quantizer * stats.space_utilization();
}
void StreamingVp8Encoder::UpdateSpeedSettingForNextFrame(const Stats& stats) {
OSP_DCHECK_EQ(std::this_thread::get_id(), encode_thread_.get_id());
// Combine the speed setting that was used to encode the last frame, and the
// quantizer the encoder chose into a single speed metric.
const double speed = current_speed_setting_ +
kEquivalentEncodingSpeedStepPerQuantizerStep *
std::max(0, stats.quantizer - params_.min_quantizer);
// Like |Stats::perfect_quantizer|, this computes a "hindsight" speed setting
// for the last frame, one that may have potentially allowed for a
// better-quality quantizer choice by the encoder, while also keeping CPU
// utilization within budget.
const double perfect_speed =
speed * stats.time_utilization() / params_.max_time_utilization;
// Update the ideal speed setting, to be used for the next frame. An
// exponentially-decaying weighted average is used here to smooth-out noise.
// The weight is based on the duration of the frame that was encoded.
constexpr Clock::duration kDecayHalfLife = milliseconds(120);
const double ticks = stats.frame_duration.count();
const double weight = ticks / (ticks + kDecayHalfLife.count());
ideal_speed_setting_ =
weight * perfect_speed + (1.0 - weight) * ideal_speed_setting_;
OSP_DCHECK(std::isfinite(ideal_speed_setting_));
}
void StreamingVp8Encoder::SendEncodedFrame(WorkUnitWithResults results) {
OSP_DCHECK(main_task_runner_->IsRunningOnTaskRunner());
EncodedFrame frame;
frame.frame_id = sender_->GetNextFrameId();
if (results.is_key_frame) {
frame.dependency = EncodedFrame::KEY_FRAME;
frame.referenced_frame_id = frame.frame_id;
} else {
frame.dependency = EncodedFrame::DEPENDS_ON_ANOTHER;
frame.referenced_frame_id = frame.frame_id - 1;
}
frame.rtp_timestamp = results.rtp_timestamp;
frame.reference_time = results.reference_time;
frame.data = absl::Span<uint8_t>(results.payload);
if (sender_->EnqueueFrame(frame) != Sender::OK) {
// Since the frame will not be sent, the encoder's frame dependency chain
// has been broken. Force a key frame for the next frame.
std::unique_lock<std::mutex> lock(mutex_);
needs_key_frame_ = true;
}
if (results.stats_callback) {
results.stats.frame_id = frame.frame_id;
results.stats_callback(results.stats);
}
}
namespace {
void CopyPlane(const uint8_t* src,
int src_stride,
int num_rows,
uint8_t* dst,
int dst_stride) {
if (src_stride == dst_stride) {
memcpy(dst, src, src_stride * num_rows);
return;
}
const int bytes_per_row = std::min(src_stride, dst_stride);
while (--num_rows >= 0) {
memcpy(dst, src, bytes_per_row);
dst += dst_stride;
src += src_stride;
}
}
} // namespace
// static
StreamingVp8Encoder::VpxImageUniquePtr StreamingVp8Encoder::CloneAsVpxImage(
const VideoFrame& frame) {
OSP_DCHECK_GE(frame.width, 0);
OSP_DCHECK_GE(frame.height, 0);
OSP_DCHECK_GE(frame.yuv_strides[0], 0);
OSP_DCHECK_GE(frame.yuv_strides[1], 0);
OSP_DCHECK_GE(frame.yuv_strides[2], 0);
constexpr int kAlignment = 32;
VpxImageUniquePtr image(vpx_img_alloc(nullptr, VPX_IMG_FMT_I420, frame.width,
frame.height, kAlignment));
OSP_CHECK(image);
CopyPlane(frame.yuv_planes[0], frame.yuv_strides[0], frame.height,
image->planes[VPX_PLANE_Y], image->stride[VPX_PLANE_Y]);
CopyPlane(frame.yuv_planes[1], frame.yuv_strides[1], (frame.height + 1) / 2,
image->planes[VPX_PLANE_U], image->stride[VPX_PLANE_U]);
CopyPlane(frame.yuv_planes[2], frame.yuv_strides[2], (frame.height + 1) / 2,
image->planes[VPX_PLANE_V], image->stride[VPX_PLANE_V]);
return image;
}
} // namespace cast
} // namespace openscreen