You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
422 lines
16 KiB
422 lines
16 KiB
// Copyright (C) 2019 The Android Open Source Project
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include "host-common/MediaCudaVideoHelper.h"
|
|
#include "host-common/MediaCudaDriverHelper.h"
|
|
#include "host-common/MediaCudaUtils.h"
|
|
#include "host-common/YuvConverter.h"
|
|
#include "android/utils/debug.h"
|
|
|
|
extern "C" {
|
|
#define INIT_CUDA_GL 1
|
|
#include "host-common/dynlink_cuda.h"
|
|
#include "host-common/dynlink_cudaGL.h"
|
|
#include "host-common/dynlink_nvcuvid.h"
|
|
}
|
|
#define MEDIA_CUDA_DEBUG 0
|
|
|
|
#if MEDIA_CUDA_DEBUG
|
|
#define CUDA_DPRINT(fmt, ...) \
|
|
fprintf(stderr, "media-cuda-video-helper: %s:%d " fmt "\n", __func__, \
|
|
__LINE__, ##__VA_ARGS__);
|
|
#else
|
|
#define CUDA_DPRINT(fmt, ...)
|
|
#endif
|
|
|
|
#define NVDEC_API_CALL(cuvidAPI) \
|
|
do { \
|
|
CUresult errorCode = cuvidAPI; \
|
|
if (errorCode != CUDA_SUCCESS) { \
|
|
CUDA_DPRINT("%s failed with error code %d\n", #cuvidAPI, \
|
|
(int)errorCode); \
|
|
} \
|
|
} while (0)
|
|
|
|
namespace android {
|
|
namespace emulation {
|
|
|
|
bool MediaCudaVideoHelper::s_isCudaDecoderGood = true;
|
|
|
|
using TextureFrame = MediaTexturePool::TextureFrame;
|
|
using FrameInfo = MediaSnapshotState::FrameInfo;
|
|
using ColorAspects = MediaSnapshotState::ColorAspects;
|
|
|
|
MediaCudaVideoHelper::MediaCudaVideoHelper(OutputTreatmentMode oMode,
|
|
FrameStorageMode fMode,
|
|
cudaVideoCodec cudaVideoCodecType)
|
|
: mUseGpuTexture(fMode == FrameStorageMode::USE_GPU_TEXTURE),
|
|
mCudaVideoCodecType(cudaVideoCodecType) {
|
|
mIgnoreDecoderOutput = (oMode == OutputTreatmentMode::IGNORE_RESULT);
|
|
}
|
|
|
|
MediaCudaVideoHelper::~MediaCudaVideoHelper() {
|
|
deInit();
|
|
}
|
|
|
|
void MediaCudaVideoHelper::deInit() {
|
|
CUDA_DPRINT("deInit calling");
|
|
|
|
mSavedDecodedFrames.clear();
|
|
if (mCudaContext != nullptr) {
|
|
NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
|
|
if (mCudaParser != nullptr) {
|
|
NVDEC_API_CALL(cuvidDestroyVideoParser(mCudaParser));
|
|
mCudaParser = nullptr;
|
|
}
|
|
|
|
if (mCudaDecoder != nullptr) {
|
|
NVDEC_API_CALL(cuvidDestroyDecoder(mCudaDecoder));
|
|
mCudaDecoder = nullptr;
|
|
}
|
|
NVDEC_API_CALL(cuCtxPopCurrent(NULL));
|
|
NVDEC_API_CALL(cuvidCtxLockDestroy(mCtxLock));
|
|
}
|
|
|
|
if (mCudaContext != nullptr) {
|
|
CUresult myres = cuCtxDestroy(mCudaContext);
|
|
if (myres != CUDA_SUCCESS) {
|
|
CUDA_DPRINT("Failed to destroy cuda context; error code %d",
|
|
(int)myres);
|
|
}
|
|
mCudaContext = nullptr;
|
|
}
|
|
}
|
|
|
|
bool MediaCudaVideoHelper::init() {
|
|
if (!s_isCudaDecoderGood) {
|
|
CUDA_DPRINT(
|
|
"Already verified: cuda decoder does not work on this host");
|
|
return false;
|
|
}
|
|
if (!MediaCudaDriverHelper::initCudaDrivers()) {
|
|
CUDA_DPRINT("Failed to initCudaDrivers");
|
|
mIsGood = false;
|
|
mErrorCode = 1;
|
|
s_isCudaDecoderGood = false;
|
|
return false;
|
|
}
|
|
|
|
if (mCudaContext != nullptr) {
|
|
deInit();
|
|
}
|
|
|
|
// cudat stuff
|
|
const int gpuIndex = 0;
|
|
const int cudaFlags = 0;
|
|
CUdevice cudaDevice = 0;
|
|
CUresult myres = cuDeviceGet(&cudaDevice, gpuIndex);
|
|
if (myres != CUDA_SUCCESS) {
|
|
mIsGood = false;
|
|
mErrorCode = 2;
|
|
s_isCudaDecoderGood = false;
|
|
CUDA_DPRINT("Failed to get cuda device, error code %d", (int)myres);
|
|
return false;
|
|
}
|
|
|
|
char buf[1024];
|
|
myres = cuDeviceGetName(buf, sizeof(buf), cudaDevice);
|
|
if (myres != CUDA_SUCCESS) {
|
|
mIsGood = false;
|
|
mErrorCode = 3;
|
|
s_isCudaDecoderGood = false;
|
|
CUDA_DPRINT("Failed to get gpu device name, error code %d", (int)myres);
|
|
return false;
|
|
}
|
|
|
|
CUDA_DPRINT("using gpu device %s", buf);
|
|
|
|
myres = cuCtxCreate(&mCudaContext, cudaFlags, cudaDevice);
|
|
if (myres != CUDA_SUCCESS) {
|
|
mIsGood = false;
|
|
s_isCudaDecoderGood = false;
|
|
CUDA_DPRINT("Failed to create cuda context, error code %d", (int)myres);
|
|
return false;
|
|
}
|
|
|
|
NVDEC_API_CALL(cuvidCtxLockCreate(&mCtxLock, mCudaContext));
|
|
|
|
CUVIDPARSERPARAMS videoParserParameters = {};
|
|
// videoParserParameters.CodecType = (mType == MediaCodecType::VP8Codec) ?
|
|
// cudaVideoCodec_VP8 : cudaVideoCodec_VP9;
|
|
videoParserParameters.CodecType = mCudaVideoCodecType;
|
|
|
|
videoParserParameters.ulMaxNumDecodeSurfaces = 1;
|
|
videoParserParameters.ulMaxDisplayDelay = 1;
|
|
videoParserParameters.pUserData = this;
|
|
videoParserParameters.pfnSequenceCallback = HandleVideoSequenceProc;
|
|
videoParserParameters.pfnDecodePicture = HandlePictureDecodeProc;
|
|
videoParserParameters.pfnDisplayPicture = HandlePictureDisplayProc;
|
|
NVDEC_API_CALL(
|
|
cuvidCreateVideoParser(&mCudaParser, &videoParserParameters));
|
|
|
|
CUDA_DPRINT("Successfully created cuda context %p", mCudaContext);
|
|
dprint("successfully created cuda video decoder for %s, with gpu texture "
|
|
"mode %s",
|
|
mCudaVideoCodecType == cudaVideoCodec_H264
|
|
? "H264"
|
|
: (mCudaVideoCodecType == cudaVideoCodec_VP8 ? "VP8"
|
|
: "VP9"),
|
|
mUseGpuTexture ? "on" : "off");
|
|
|
|
return true;
|
|
}
|
|
|
|
void MediaCudaVideoHelper::decode(const uint8_t* frame,
|
|
size_t szBytes,
|
|
uint64_t inputPts) {
|
|
CUDA_DPRINT("%s(frame=%p, sz=%zu)", __func__, frame, szBytes);
|
|
|
|
CUVIDSOURCEDATAPACKET packet = {0};
|
|
packet.payload = frame;
|
|
packet.payload_size = szBytes;
|
|
packet.flags = CUVID_PKT_TIMESTAMP;
|
|
packet.timestamp = inputPts;
|
|
if (!frame || szBytes == 0) {
|
|
packet.flags |= CUVID_PKT_ENDOFSTREAM;
|
|
} else {
|
|
++mNumInputFrame;
|
|
}
|
|
NVDEC_API_CALL(cuvidParseVideoData(mCudaParser, &packet));
|
|
}
|
|
|
|
void MediaCudaVideoHelper::flush() {
|
|
CUDA_DPRINT("started flushing");
|
|
CUVIDSOURCEDATAPACKET packet = {0};
|
|
packet.payload = NULL;
|
|
packet.payload_size = 0;
|
|
packet.flags |= CUVID_PKT_ENDOFSTREAM;
|
|
NVDEC_API_CALL(cuvidParseVideoData(mCudaParser, &packet));
|
|
CUDA_DPRINT("done one flushing");
|
|
}
|
|
|
|
int MediaCudaVideoHelper::HandleVideoSequence(CUVIDEOFORMAT* pVideoFormat) {
|
|
int nDecodeSurface = 8; // need 8 for 4K video
|
|
|
|
CUVIDDECODECAPS decodecaps;
|
|
memset(&decodecaps, 0, sizeof(decodecaps));
|
|
|
|
decodecaps.eCodecType = pVideoFormat->codec;
|
|
decodecaps.eChromaFormat = pVideoFormat->chroma_format;
|
|
decodecaps.nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
|
|
|
|
NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
|
|
NVDEC_API_CALL(cuvidGetDecoderCaps(&decodecaps));
|
|
NVDEC_API_CALL(cuCtxPopCurrent(NULL));
|
|
|
|
if (!decodecaps.bIsSupported) {
|
|
mIsGood = false;
|
|
mErrorCode = 4;
|
|
CUDA_DPRINT("Codec not supported on this GPU.");
|
|
return nDecodeSurface;
|
|
}
|
|
|
|
if ((pVideoFormat->coded_width > decodecaps.nMaxWidth) ||
|
|
(pVideoFormat->coded_height > decodecaps.nMaxHeight)) {
|
|
CUDA_DPRINT("Resolution not supported on this GPU");
|
|
mIsGood = false;
|
|
mErrorCode = 5;
|
|
return nDecodeSurface;
|
|
}
|
|
|
|
if ((pVideoFormat->coded_width >> 4) * (pVideoFormat->coded_height >> 4) >
|
|
decodecaps.nMaxMBCount) {
|
|
CUDA_DPRINT("MBCount not supported on this GPU");
|
|
mIsGood = false;
|
|
mErrorCode = 6;
|
|
return nDecodeSurface;
|
|
}
|
|
|
|
mLumaWidth =
|
|
pVideoFormat->display_area.right - pVideoFormat->display_area.left;
|
|
mLumaHeight =
|
|
pVideoFormat->display_area.bottom - pVideoFormat->display_area.top;
|
|
mChromaHeight = mLumaHeight * 0.5; // NV12
|
|
mBPP = pVideoFormat->bit_depth_luma_minus8 > 0 ? 2 : 1;
|
|
|
|
if (mCudaVideoCodecType == cudaVideoCodec_H264) {
|
|
if (pVideoFormat->video_signal_description.video_full_range_flag)
|
|
mColorRange = 2;
|
|
else
|
|
mColorRange = 0;
|
|
|
|
mColorPrimaries =
|
|
pVideoFormat->video_signal_description.color_primaries;
|
|
mColorTransfer =
|
|
pVideoFormat->video_signal_description.transfer_characteristics;
|
|
mColorSpace =
|
|
pVideoFormat->video_signal_description.matrix_coefficients;
|
|
}
|
|
|
|
CUVIDDECODECREATEINFO videoDecodeCreateInfo = {0};
|
|
videoDecodeCreateInfo.CodecType = pVideoFormat->codec;
|
|
videoDecodeCreateInfo.ChromaFormat = pVideoFormat->chroma_format;
|
|
videoDecodeCreateInfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
|
|
CUDA_DPRINT("output format is %d", videoDecodeCreateInfo.OutputFormat);
|
|
videoDecodeCreateInfo.bitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
|
|
if (pVideoFormat->progressive_sequence)
|
|
videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
|
|
else
|
|
videoDecodeCreateInfo.DeinterlaceMode =
|
|
cudaVideoDeinterlaceMode_Adaptive;
|
|
videoDecodeCreateInfo.ulNumOutputSurfaces = 1;
|
|
// With PreferCUVID, JPEG is still decoded by CUDA while video is decoded by
|
|
// NVDEC hardware
|
|
videoDecodeCreateInfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
|
|
videoDecodeCreateInfo.ulNumDecodeSurfaces = nDecodeSurface;
|
|
videoDecodeCreateInfo.vidLock = mCtxLock;
|
|
videoDecodeCreateInfo.ulWidth = pVideoFormat->coded_width;
|
|
videoDecodeCreateInfo.ulHeight = pVideoFormat->coded_height;
|
|
if (mOutputHeight != mLumaHeight || mOutputWidth != mLumaWidth) {
|
|
CUDA_DPRINT("old width %d old height %d", mOutputWidth, mOutputHeight);
|
|
mOutputWidth = mLumaWidth;
|
|
mOutputHeight = mLumaHeight;
|
|
CUDA_DPRINT("new width %d new height %d", mOutputWidth, mOutputHeight);
|
|
unsigned int newOutBufferSize = mOutputWidth * mOutputHeight * 3 / 2;
|
|
if (mOutBufferSize < newOutBufferSize) {
|
|
mOutBufferSize = newOutBufferSize;
|
|
}
|
|
}
|
|
|
|
videoDecodeCreateInfo.ulTargetWidth = pVideoFormat->coded_width;
|
|
videoDecodeCreateInfo.ulTargetHeight = pVideoFormat->coded_height;
|
|
|
|
mSurfaceWidth = videoDecodeCreateInfo.ulTargetWidth;
|
|
mSurfaceHeight = videoDecodeCreateInfo.ulTargetHeight;
|
|
|
|
NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
|
|
if (mCudaDecoder != nullptr) {
|
|
NVDEC_API_CALL(cuvidDestroyDecoder(mCudaDecoder));
|
|
mCudaDecoder = nullptr;
|
|
}
|
|
{
|
|
size_t free, total;
|
|
cuMemGetInfo(&free, &total);
|
|
CUDA_DPRINT("free memory %g M, total %g M", free / 1048576.0,
|
|
total / 1048576.0);
|
|
}
|
|
NVDEC_API_CALL(cuCtxPopCurrent(NULL));
|
|
NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
|
|
NVDEC_API_CALL(cuvidCreateDecoder(&mCudaDecoder, &videoDecodeCreateInfo));
|
|
NVDEC_API_CALL(cuCtxPopCurrent(NULL));
|
|
CUDA_DPRINT("successfully called. decoder %p", mCudaDecoder);
|
|
return nDecodeSurface;
|
|
}
|
|
|
|
int MediaCudaVideoHelper::HandlePictureDecode(CUVIDPICPARAMS* pPicParams) {
|
|
NVDEC_API_CALL(cuvidDecodePicture(mCudaDecoder, pPicParams));
|
|
CUDA_DPRINT("successfully called.");
|
|
return 1;
|
|
}
|
|
|
|
int MediaCudaVideoHelper::HandlePictureDisplay(CUVIDPARSERDISPINFO* pDispInfo) {
|
|
if (mIgnoreDecoderOutput) {
|
|
return 1;
|
|
}
|
|
constexpr int MAX_NUM_INPUT_WITHOUT_OUTPUT = 16;
|
|
if (mNumOutputFrame == 0 && mNumInputFrame > MAX_NUM_INPUT_WITHOUT_OUTPUT) {
|
|
// after more than 16 inputs, there is still no output,
|
|
// probably corrupted stream, ignore everything from now on
|
|
dprint("WARNING: %d frames decoded witout any output, possibly bad "
|
|
"input stream. Ignore output frames (they might be corrupted) "
|
|
"from now on.",
|
|
MAX_NUM_INPUT_WITHOUT_OUTPUT);
|
|
return 0;
|
|
}
|
|
|
|
CUVIDPROCPARAMS videoProcessingParameters = {};
|
|
videoProcessingParameters.progressive_frame = pDispInfo->progressive_frame;
|
|
videoProcessingParameters.second_field = pDispInfo->repeat_first_field + 1;
|
|
videoProcessingParameters.top_field_first = pDispInfo->top_field_first;
|
|
videoProcessingParameters.unpaired_field =
|
|
pDispInfo->repeat_first_field < 0;
|
|
videoProcessingParameters.output_stream = 0;
|
|
uint64_t myOutputPts = pDispInfo->timestamp;
|
|
|
|
CUdeviceptr dpSrcFrame = 0;
|
|
unsigned int nSrcPitch = 0;
|
|
CUresult errorCode = cuvidMapVideoFrame(mCudaDecoder, pDispInfo->picture_index,
|
|
&dpSrcFrame, &nSrcPitch,
|
|
&videoProcessingParameters);
|
|
if (errorCode != CUDA_SUCCESS) {
|
|
CUDA_DPRINT("failed to call cuvidMapVideoFrame with error code %d\n", (int)errorCode);
|
|
return 0;
|
|
}
|
|
|
|
NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
|
|
unsigned int newOutBufferSize = mOutputWidth * mOutputHeight * 3 / 2;
|
|
std::vector<uint8_t> myFrame;
|
|
TextureFrame texFrame;
|
|
if (mUseGpuTexture && mTexturePool != nullptr) {
|
|
media_cuda_utils_copy_context my_copy_context{
|
|
.src_frame = dpSrcFrame,
|
|
.src_pitch = nSrcPitch,
|
|
.src_surface_height = mSurfaceHeight,
|
|
.dest_width = mOutputWidth,
|
|
.dest_height = mOutputHeight,
|
|
};
|
|
texFrame = mTexturePool->getTextureFrame(mOutputWidth, mOutputHeight);
|
|
mTexturePool->saveDecodedFrameToTexture(
|
|
texFrame, &my_copy_context,
|
|
(void*)media_cuda_utils_nv12_updater);
|
|
} else {
|
|
myFrame.resize(newOutBufferSize);
|
|
uint8_t* pDecodedFrame = &(myFrame[0]);
|
|
|
|
CUDA_MEMCPY2D m = {0};
|
|
m.srcMemoryType = CU_MEMORYTYPE_DEVICE;
|
|
m.srcDevice = dpSrcFrame;
|
|
m.srcPitch = nSrcPitch;
|
|
m.dstMemoryType = CU_MEMORYTYPE_HOST;
|
|
m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame);
|
|
m.dstPitch = mOutputWidth * mBPP;
|
|
m.WidthInBytes = mOutputWidth * mBPP;
|
|
m.Height = mLumaHeight;
|
|
CUDA_DPRINT("dstDevice %p, dstPitch %d, WidthInBytes %d Height %d",
|
|
m.dstHost, (int)m.dstPitch, (int)m.WidthInBytes,
|
|
(int)m.Height);
|
|
|
|
NVDEC_API_CALL(cuMemcpy2DAsync(&m, 0));
|
|
|
|
m.srcDevice = (CUdeviceptr)((uint8_t*)dpSrcFrame +
|
|
m.srcPitch * mSurfaceHeight);
|
|
m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame +
|
|
m.dstPitch * mLumaHeight);
|
|
m.Height = mChromaHeight;
|
|
NVDEC_API_CALL(cuMemcpy2DAsync(&m, 0));
|
|
YuvConverter<uint8_t> convert8(mOutputWidth, mOutputHeight);
|
|
convert8.UVInterleavedToPlanar(pDecodedFrame);
|
|
}
|
|
|
|
NVDEC_API_CALL(cuStreamSynchronize(0));
|
|
NVDEC_API_CALL(cuCtxPopCurrent(NULL));
|
|
|
|
NVDEC_API_CALL(cuvidUnmapVideoFrame(mCudaDecoder, dpSrcFrame));
|
|
{
|
|
std::lock_guard<std::mutex> g(mFrameLock);
|
|
|
|
mSavedDecodedFrames.push_back(MediaSnapshotState::FrameInfo{
|
|
std::move(myFrame),
|
|
std::vector<uint32_t>{texFrame.Ytex, texFrame.UVtex},
|
|
(int)mOutputWidth, (int)mOutputHeight, myOutputPts,
|
|
ColorAspects{mColorPrimaries, mColorRange, mColorTransfer,
|
|
mColorSpace}});
|
|
}
|
|
++mNumOutputFrame;
|
|
CUDA_DPRINT("successfully called.");
|
|
return 1;
|
|
}
|
|
|
|
} // namespace emulation
|
|
} // namespace android
|