You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
802 lines
29 KiB
802 lines
29 KiB
// Copyright (C) 2019 The Android Open Source Project
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
#include "host-common/MediaH264DecoderCuvid.h"
|
|
#include "host-common/H264NaluParser.h"
|
|
#include "host-common/YuvConverter.h"
|
|
#include "android/main-emugl.h"
|
|
// MediaH264DecoderCuvid.h
|
|
#include <cstdint>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
#ifdef _WIN32
|
|
#define WIN32_LEAN_AND_MEAN 1
|
|
#include <windows.h>
|
|
#include <winioctl.h>
|
|
#endif
|
|
|
|
#include <stdio.h>
|
|
#include <string.h>
|
|
|
|
extern "C" {
|
|
#define INIT_CUDA_GL 1
|
|
#include "host-common/dynlink_cuda.h"
|
|
#include "host-common/dynlink_cudaGL.h"
|
|
#include "host-common/dynlink_nvcuvid.h"
|
|
}
|
|
#define MEDIA_H264_DEBUG 0
|
|
|
|
#if MEDIA_H264_DEBUG
|
|
#define H264_DPRINT(fmt, ...) \
|
|
fprintf(stderr, "h264-cuvid-dec: %s:%d " fmt "\n", __func__, __LINE__, \
|
|
##__VA_ARGS__);
|
|
#else
|
|
#define H264_DPRINT(fmt, ...)
|
|
#endif
|
|
|
|
#define NVDEC_API_CALL(cuvidAPI) \
|
|
do { \
|
|
CUresult errorCode = cuvidAPI; \
|
|
if (errorCode != CUDA_SUCCESS) { \
|
|
H264_DPRINT("%s failed with error code %d\n", #cuvidAPI, \
|
|
(int)errorCode); \
|
|
} \
|
|
} while (0)
|
|
|
|
namespace android {
|
|
namespace emulation {
|
|
|
|
using InitContextParam = H264PingInfoParser::InitContextParam;
|
|
using DecodeFrameParam = H264PingInfoParser::DecodeFrameParam;
|
|
using ResetParam = H264PingInfoParser::ResetParam;
|
|
using GetImageParam = H264PingInfoParser::GetImageParam;
|
|
using TextureFrame = MediaHostRenderer::TextureFrame;
|
|
|
|
MediaH264DecoderCuvid::MediaH264DecoderCuvid(uint64_t id,
|
|
H264PingInfoParser parser)
|
|
: mId(id), mParser(parser) {
|
|
auto useGpuTextureEnv = android::base::System::getEnvironmentVariable(
|
|
"ANDROID_EMU_CODEC_USE_GPU_TEXTURE");
|
|
if (useGpuTextureEnv != "") {
|
|
if (mParser.version() == 200) {
|
|
if (emuglConfig_get_current_renderer() == SELECTED_RENDERER_HOST) {
|
|
mUseGpuTexture = true;
|
|
} else {
|
|
H264_DPRINT(
|
|
"cannot use gpu texture to save decoded frame in "
|
|
"non-host gpu mode");
|
|
if (emuglConfig_get_current_renderer() ==
|
|
SELECTED_RENDERER_SWIFTSHADER_INDIRECT) {
|
|
H264_DPRINT("your gpu mode is: swiftshader_indirect");
|
|
}
|
|
}
|
|
}
|
|
}
|
|
};
|
|
|
|
MediaH264DecoderPlugin* MediaH264DecoderCuvid::clone() {
|
|
return new MediaH264DecoderCuvid(mId, mParser);
|
|
};
|
|
|
|
MediaH264DecoderCuvid::~MediaH264DecoderCuvid() {
|
|
destroyH264Context();
|
|
}
|
|
|
|
void MediaH264DecoderCuvid::reset(void* ptr) {
|
|
destroyH264Context();
|
|
ResetParam param{};
|
|
mParser.parseResetParams(ptr, param);
|
|
initH264ContextInternal(param.width, param.height, param.outputWidth,
|
|
param.outputHeight, param.outputPixelFormat);
|
|
}
|
|
|
|
void MediaH264DecoderCuvid::initH264Context(void* ptr) {
|
|
InitContextParam param{};
|
|
mParser.parseInitContextParams(ptr, param);
|
|
initH264ContextInternal(param.width, param.height, param.outputWidth,
|
|
param.outputHeight, param.outputPixelFormat);
|
|
}
|
|
|
|
void MediaH264DecoderCuvid::initH264ContextInternal(unsigned int width,
|
|
unsigned int height,
|
|
unsigned int outWidth,
|
|
unsigned int outHeight,
|
|
PixelFormat outPixFmt) {
|
|
if (!initCudaDrivers()) {
|
|
H264_DPRINT("Failed to initH264Context because driver is not working");
|
|
return;
|
|
}
|
|
|
|
if (mCudaContext != nullptr) {
|
|
destroyH264Context();
|
|
}
|
|
H264_DPRINT("%s(w=%u h=%u out_w=%u out_h=%u pixfmt=%u)", __func__, width,
|
|
height, outWidth, outHeight, (uint8_t)outPixFmt);
|
|
mWidth = width;
|
|
mHeight = height;
|
|
|
|
mOutputWidth = outWidth;
|
|
mOutputHeight = outHeight;
|
|
mOutPixFmt = outPixFmt;
|
|
mOutBufferSize = outWidth * outHeight * 3 / 2;
|
|
|
|
// cudat stuff
|
|
const int gpuIndex = 0;
|
|
const int cudaFlags = 0;
|
|
CUdevice cudaDevice = 0;
|
|
CUresult myres = cuDeviceGet(&cudaDevice, gpuIndex);
|
|
if (myres != CUDA_SUCCESS) {
|
|
H264_DPRINT("Failed to get cuda device, error code %d", (int)myres);
|
|
return;
|
|
}
|
|
|
|
char buf[1024];
|
|
myres = cuDeviceGetName(buf, sizeof(buf), cudaDevice);
|
|
if (myres != CUDA_SUCCESS) {
|
|
H264_DPRINT("Failed to get gpu device name, error code %d", (int)myres);
|
|
return;
|
|
}
|
|
|
|
H264_DPRINT("using gpu device %s", buf);
|
|
|
|
myres = cuCtxCreate(&mCudaContext, cudaFlags, cudaDevice);
|
|
if (myres != CUDA_SUCCESS) {
|
|
H264_DPRINT("Failed to create cuda context, error code %d", (int)myres);
|
|
}
|
|
|
|
NVDEC_API_CALL(cuvidCtxLockCreate(&mCtxLock, mCudaContext));
|
|
|
|
CUVIDPARSERPARAMS videoParserParameters = {};
|
|
videoParserParameters.CodecType = cudaVideoCodec_H264;
|
|
videoParserParameters.ulMaxNumDecodeSurfaces = 1;
|
|
videoParserParameters.ulMaxDisplayDelay = 1;
|
|
videoParserParameters.pUserData = this;
|
|
videoParserParameters.pfnSequenceCallback = HandleVideoSequenceProc;
|
|
videoParserParameters.pfnDecodePicture = HandlePictureDecodeProc;
|
|
videoParserParameters.pfnDisplayPicture = HandlePictureDisplayProc;
|
|
NVDEC_API_CALL(
|
|
cuvidCreateVideoParser(&mCudaParser, &videoParserParameters));
|
|
|
|
H264_DPRINT("Successfully created cuda context %p", mCudaContext);
|
|
}
|
|
|
|
void MediaH264DecoderCuvid::destroyH264Context() {
|
|
H264_DPRINT("destroyH264Context calling");
|
|
|
|
for (auto texFrame : mSavedTexFrames) {
|
|
mRenderer.putTextureFrame(texFrame);
|
|
}
|
|
mRenderer.cleanUpTextures();
|
|
mSavedTexFrames.clear();
|
|
if (mCudaContext != nullptr) {
|
|
NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
|
|
if (mCudaParser != nullptr) {
|
|
NVDEC_API_CALL(cuvidDestroyVideoParser(mCudaParser));
|
|
mCudaParser = nullptr;
|
|
}
|
|
|
|
if (mCudaDecoder != nullptr) {
|
|
NVDEC_API_CALL(cuvidDestroyDecoder(mCudaDecoder));
|
|
mCudaDecoder = nullptr;
|
|
}
|
|
NVDEC_API_CALL(cuCtxPopCurrent(NULL));
|
|
NVDEC_API_CALL(cuvidCtxLockDestroy(mCtxLock));
|
|
}
|
|
|
|
if (mCudaContext != nullptr) {
|
|
CUresult myres = cuCtxDestroy(mCudaContext);
|
|
if (myres != CUDA_SUCCESS) {
|
|
H264_DPRINT("Failed to destroy cuda context; error code %d",
|
|
(int)myres);
|
|
}
|
|
mCudaContext = nullptr;
|
|
}
|
|
}
|
|
|
|
void MediaH264DecoderCuvid::decodeFrame(void* ptr) {
|
|
DecodeFrameParam param{};
|
|
mParser.parseDecodeFrameParams(ptr, param);
|
|
|
|
const uint8_t* frame = param.pData;
|
|
size_t szBytes = param.size;
|
|
uint64_t inputPts = param.pts;
|
|
|
|
const bool enableSnapshot = true;
|
|
if (enableSnapshot) {
|
|
std::vector<uint8_t> v;
|
|
v.assign(frame, frame + szBytes);
|
|
bool hasSps = H264NaluParser::checkSpsFrame(frame, szBytes);
|
|
if (hasSps) {
|
|
mSnapshotState = SnapshotState{};
|
|
mSnapshotState.saveSps(v);
|
|
} else {
|
|
bool hasPps = H264NaluParser::checkPpsFrame(frame, szBytes);
|
|
if (hasPps) {
|
|
mSnapshotState.savePps(v);
|
|
mSnapshotState.savedPackets.clear();
|
|
mSnapshotState.savedDecodedFrame.data.clear();
|
|
} else {
|
|
bool isIFrame = H264NaluParser::checkIFrame(frame, szBytes);
|
|
if (isIFrame) {
|
|
mSnapshotState.savedPackets.clear();
|
|
}
|
|
mSnapshotState.savePacket(std::move(v), inputPts);
|
|
H264_DPRINT("saving packet; total is %d",
|
|
(int)(mSnapshotState.savedPackets.size()));
|
|
}
|
|
}
|
|
}
|
|
|
|
decodeFrameInternal(param.pConsumedBytes, param.pDecoderErrorCode, frame,
|
|
szBytes, inputPts);
|
|
}
|
|
|
|
void MediaH264DecoderCuvid::decodeFrameInternal(uint64_t* pRetSzBytes,
|
|
int32_t* pRetErr,
|
|
const uint8_t* frame,
|
|
size_t szBytes,
|
|
uint64_t inputPts) {
|
|
mIsInFlush = false;
|
|
H264_DPRINT("%s(frame=%p, sz=%zu)", __func__, frame, szBytes);
|
|
Err h264Err = Err::NoErr;
|
|
|
|
CUVIDSOURCEDATAPACKET packet = {0};
|
|
packet.payload = frame;
|
|
packet.payload_size = szBytes;
|
|
packet.flags = CUVID_PKT_TIMESTAMP;
|
|
packet.timestamp = inputPts;
|
|
if (!frame || szBytes == 0) {
|
|
packet.flags |= CUVID_PKT_ENDOFSTREAM;
|
|
}
|
|
NVDEC_API_CALL(cuvidParseVideoData(mCudaParser, &packet));
|
|
if (pRetSzBytes) {
|
|
*pRetSzBytes = szBytes;
|
|
}
|
|
if (pRetErr) {
|
|
*pRetErr = (int32_t)h264Err;
|
|
}
|
|
}
|
|
|
|
void MediaH264DecoderCuvid::doFlush() {
|
|
if (!mIsInFlush) {
|
|
return;
|
|
}
|
|
H264_DPRINT("started flushing");
|
|
CUVIDSOURCEDATAPACKET packet = {0};
|
|
packet.payload = NULL;
|
|
packet.payload_size = 0;
|
|
packet.flags |= CUVID_PKT_ENDOFSTREAM;
|
|
NVDEC_API_CALL(cuvidParseVideoData(mCudaParser, &packet));
|
|
H264_DPRINT("done one flushing");
|
|
}
|
|
|
|
void MediaH264DecoderCuvid::flush(void* ptr) {
|
|
mIsInFlush = true;
|
|
doFlush();
|
|
}
|
|
|
|
void MediaH264DecoderCuvid::getImage(void* ptr) {
|
|
H264_DPRINT("getImage %p", ptr);
|
|
GetImageParam param{};
|
|
mParser.parseGetImageParams(ptr, param);
|
|
|
|
int* retErr = param.pDecoderErrorCode;
|
|
uint32_t* retWidth = param.pRetWidth;
|
|
uint32_t* retHeight = param.pRetHeight;
|
|
uint64_t* retPts = param.pRetPts;
|
|
uint32_t* retColorPrimaries = param.pRetColorPrimaries;
|
|
uint32_t* retColorRange = param.pRetColorRange;
|
|
uint32_t* retColorTransfer = param.pRetColorTransfer;
|
|
uint32_t* retColorSpace = param.pRetColorSpace;
|
|
|
|
static int numbers = 0;
|
|
H264_DPRINT("calling getImage %d colorbuffer %d", numbers++,
|
|
(int)param.hostColorBufferId);
|
|
doFlush();
|
|
uint8_t* dst = param.pDecodedFrame;
|
|
int myOutputWidth = mOutputWidth;
|
|
int myOutputHeight = mOutputHeight;
|
|
std::vector<uint8_t> decodedFrame;
|
|
TextureFrame decodedTexFrame;
|
|
{
|
|
std::lock_guard<std::mutex> g(mFrameLock);
|
|
mImageReady = !mSavedFrames.empty();
|
|
if (!mImageReady) {
|
|
H264_DPRINT("%s: no new frame yet", __func__);
|
|
*retErr = static_cast<int>(Err::NoDecodedFrame);
|
|
return;
|
|
}
|
|
|
|
std::vector<uint8_t>& myFrame = mSavedFrames.front();
|
|
std::swap(decodedFrame, myFrame);
|
|
decodedTexFrame = mSavedTexFrames.front();
|
|
mOutputPts = mSavedPts.front();
|
|
|
|
myOutputWidth = mSavedW.front();
|
|
myOutputHeight = mSavedH.front();
|
|
*retWidth = myOutputWidth;
|
|
*retHeight = myOutputHeight;
|
|
|
|
mSavedFrames.pop_front();
|
|
mSavedTexFrames.pop_front();
|
|
mSavedPts.pop_front();
|
|
mSavedW.pop_front();
|
|
mSavedH.pop_front();
|
|
}
|
|
|
|
bool needToCopyToGuest = true;
|
|
|
|
if (mUseGpuTexture) {
|
|
needToCopyToGuest = false;
|
|
} else {
|
|
YuvConverter<uint8_t> convert8(myOutputWidth, myOutputHeight);
|
|
convert8.UVInterleavedToPlanar(decodedFrame.data());
|
|
}
|
|
|
|
if (mParser.version() == 200) {
|
|
if (param.hostColorBufferId >= 0) {
|
|
needToCopyToGuest = false;
|
|
if (mUseGpuTexture) {
|
|
mRenderer.renderToHostColorBufferWithTextures(
|
|
param.hostColorBufferId, myOutputWidth, myOutputHeight,
|
|
decodedTexFrame);
|
|
} else {
|
|
mRenderer.renderToHostColorBuffer(param.hostColorBufferId,
|
|
myOutputWidth, myOutputHeight,
|
|
decodedFrame.data());
|
|
}
|
|
} else {
|
|
if (mUseGpuTexture) {
|
|
// no colorbuffer to send the textures to, just recycle
|
|
// them back to Renderer
|
|
mRenderer.putTextureFrame(decodedTexFrame);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (needToCopyToGuest) {
|
|
memcpy(dst, decodedFrame.data(),
|
|
myOutputHeight * myOutputWidth * 3 / 2);
|
|
}
|
|
|
|
mImageReady = false;
|
|
*retErr = myOutputHeight * myOutputWidth * 3 / 2;
|
|
*retPts = mOutputPts;
|
|
*retColorPrimaries = mColorPrimaries;
|
|
*retColorRange = mColorRange;
|
|
*retColorTransfer = mColorTransfer;
|
|
*retColorSpace = mColorSpace;
|
|
H264_DPRINT("Frame primary %d range %d transfer %d space %d",
|
|
(int)mColorPrimaries, (int)mColorRange, (int)mColorTransfer,
|
|
(int)mColorSpace);
|
|
H264_DPRINT("Copying completed pts %lld", (long long)mOutputPts);
|
|
}
|
|
|
|
bool MediaH264DecoderCuvid::initCudaDrivers() {
|
|
if (s_isCudaInitialized) {
|
|
return true;
|
|
}
|
|
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
|
|
typedef HMODULE CUDADRIVER;
|
|
#else
|
|
typedef void* CUDADRIVER;
|
|
#endif
|
|
CUDADRIVER hHandleDriver = 0;
|
|
if (CUDA_SUCCESS != cuInit(0, __CUDA_API_VERSION, hHandleDriver)) {
|
|
fprintf(stderr,
|
|
"Failed to call cuInit, cannot use nvidia cuvid decoder for "
|
|
"h264 stream\n");
|
|
return false;
|
|
}
|
|
if (CUDA_SUCCESS != cuvidInit(0)) {
|
|
fprintf(stderr,
|
|
"Failed to call cuvidInit, cannot use nvidia cuvid decoder for "
|
|
"h264 stream\n");
|
|
return false;
|
|
}
|
|
|
|
int numGpuCards = 0;
|
|
CUresult myres = cuDeviceGetCount(&numGpuCards);
|
|
if (myres != CUDA_SUCCESS) {
|
|
H264_DPRINT(
|
|
"Failed to get number of GPU cards installed on host; error "
|
|
"code %d",
|
|
(int)myres);
|
|
return false;
|
|
}
|
|
|
|
if (numGpuCards <= 0) {
|
|
H264_DPRINT("There are no nvidia GPU cards on this host.");
|
|
return false;
|
|
}
|
|
|
|
// lukily, we get cuda initialized.
|
|
s_isCudaInitialized = true;
|
|
|
|
return true;
|
|
}
|
|
|
|
int MediaH264DecoderCuvid::HandleVideoSequence(CUVIDEOFORMAT* pVideoFormat) {
|
|
int nDecodeSurface = 8; // need 8 for 4K video
|
|
|
|
CUVIDDECODECAPS decodecaps;
|
|
memset(&decodecaps, 0, sizeof(decodecaps));
|
|
|
|
decodecaps.eCodecType = pVideoFormat->codec;
|
|
decodecaps.eChromaFormat = pVideoFormat->chroma_format;
|
|
decodecaps.nBitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
|
|
|
|
NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
|
|
NVDEC_API_CALL(cuvidGetDecoderCaps(&decodecaps));
|
|
NVDEC_API_CALL(cuCtxPopCurrent(NULL));
|
|
|
|
if (!decodecaps.bIsSupported) {
|
|
H264_DPRINT("Codec not supported on this GPU.");
|
|
return nDecodeSurface;
|
|
}
|
|
|
|
if ((pVideoFormat->coded_width > decodecaps.nMaxWidth) ||
|
|
(pVideoFormat->coded_height > decodecaps.nMaxHeight)) {
|
|
H264_DPRINT("Resolution not supported on this GPU");
|
|
return nDecodeSurface;
|
|
}
|
|
|
|
if ((pVideoFormat->coded_width >> 4) * (pVideoFormat->coded_height >> 4) >
|
|
decodecaps.nMaxMBCount) {
|
|
H264_DPRINT("MBCount not supported on this GPU");
|
|
return nDecodeSurface;
|
|
}
|
|
|
|
mLumaWidth =
|
|
pVideoFormat->display_area.right - pVideoFormat->display_area.left;
|
|
mLumaHeight =
|
|
pVideoFormat->display_area.bottom - pVideoFormat->display_area.top;
|
|
mChromaHeight = mLumaHeight * 0.5; // NV12
|
|
mBPP = pVideoFormat->bit_depth_luma_minus8 > 0 ? 2 : 1;
|
|
|
|
if (pVideoFormat->video_signal_description.video_full_range_flag)
|
|
mColorRange = 2;
|
|
else
|
|
mColorRange = 0;
|
|
|
|
mColorPrimaries = pVideoFormat->video_signal_description.color_primaries;
|
|
mColorTransfer =
|
|
pVideoFormat->video_signal_description.transfer_characteristics;
|
|
mColorSpace = pVideoFormat->video_signal_description.matrix_coefficients;
|
|
|
|
CUVIDDECODECREATEINFO videoDecodeCreateInfo = {0};
|
|
videoDecodeCreateInfo.CodecType = pVideoFormat->codec;
|
|
videoDecodeCreateInfo.ChromaFormat = pVideoFormat->chroma_format;
|
|
videoDecodeCreateInfo.OutputFormat = cudaVideoSurfaceFormat_NV12;
|
|
H264_DPRINT("output format is %d", videoDecodeCreateInfo.OutputFormat);
|
|
videoDecodeCreateInfo.bitDepthMinus8 = pVideoFormat->bit_depth_luma_minus8;
|
|
if (pVideoFormat->progressive_sequence)
|
|
videoDecodeCreateInfo.DeinterlaceMode = cudaVideoDeinterlaceMode_Weave;
|
|
else
|
|
videoDecodeCreateInfo.DeinterlaceMode =
|
|
cudaVideoDeinterlaceMode_Adaptive;
|
|
videoDecodeCreateInfo.ulNumOutputSurfaces = 1;
|
|
// With PreferCUVID, JPEG is still decoded by CUDA while video is decoded by
|
|
// NVDEC hardware
|
|
videoDecodeCreateInfo.ulCreationFlags = cudaVideoCreate_PreferCUVID;
|
|
videoDecodeCreateInfo.ulNumDecodeSurfaces = nDecodeSurface;
|
|
videoDecodeCreateInfo.vidLock = mCtxLock;
|
|
videoDecodeCreateInfo.ulWidth = pVideoFormat->coded_width;
|
|
videoDecodeCreateInfo.ulHeight = pVideoFormat->coded_height;
|
|
if (mOutputHeight != mLumaHeight || mOutputWidth != mLumaWidth) {
|
|
H264_DPRINT("old width %d old height %d", mOutputWidth, mOutputHeight);
|
|
mOutputWidth = mLumaWidth;
|
|
mOutputHeight = mLumaHeight;
|
|
H264_DPRINT("new width %d new height %d", mOutputWidth, mOutputHeight);
|
|
unsigned int newOutBufferSize = mOutputWidth * mOutputHeight * 3 / 2;
|
|
if (mOutBufferSize < newOutBufferSize) {
|
|
mOutBufferSize = newOutBufferSize;
|
|
}
|
|
}
|
|
|
|
videoDecodeCreateInfo.ulTargetWidth = pVideoFormat->coded_width;
|
|
videoDecodeCreateInfo.ulTargetHeight = pVideoFormat->coded_height;
|
|
|
|
mSurfaceWidth = videoDecodeCreateInfo.ulTargetWidth;
|
|
mSurfaceHeight = videoDecodeCreateInfo.ulTargetHeight;
|
|
|
|
NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
|
|
if (mCudaDecoder != nullptr) {
|
|
NVDEC_API_CALL(cuvidDestroyDecoder(mCudaDecoder));
|
|
mCudaDecoder = nullptr;
|
|
}
|
|
{
|
|
size_t free, total;
|
|
cuMemGetInfo(&free, &total);
|
|
H264_DPRINT("free memory %g M, total %g M", free / 1048576.0,
|
|
total / 1048576.0);
|
|
}
|
|
NVDEC_API_CALL(cuCtxPopCurrent(NULL));
|
|
NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
|
|
NVDEC_API_CALL(cuvidCreateDecoder(&mCudaDecoder, &videoDecodeCreateInfo));
|
|
NVDEC_API_CALL(cuCtxPopCurrent(NULL));
|
|
H264_DPRINT("successfully called. decoder %p", mCudaDecoder);
|
|
return nDecodeSurface;
|
|
}
|
|
|
|
int MediaH264DecoderCuvid::HandlePictureDecode(CUVIDPICPARAMS* pPicParams) {
|
|
NVDEC_API_CALL(cuvidDecodePicture(mCudaDecoder, pPicParams));
|
|
H264_DPRINT("successfully called.");
|
|
return 1;
|
|
}
|
|
|
|
extern "C" {
|
|
|
|
#define MEDIA_H264_COPY_Y_TEXTURE 1
|
|
#define MEDIA_H264_COPY_UV_TEXTURE 2
|
|
|
|
struct h264_cuvid_copy_context {
|
|
CUdeviceptr src_frame;
|
|
unsigned int src_pitch;
|
|
|
|
// this usually >= dest_height due to padding, e.g.
|
|
// src_surface_height: 1088, dest_height: 1080
|
|
// so, when copying UV data, the src has to start at
|
|
// offset = src_pitch * src_surface_height
|
|
unsigned int src_surface_height;
|
|
|
|
unsigned int dest_width;
|
|
unsigned int dest_height;
|
|
};
|
|
|
|
void cuda_copy_decoded_frame(void* privData,
|
|
int mode,
|
|
uint32_t dest_texture_handle) {
|
|
h264_cuvid_copy_context* copy_context =
|
|
static_cast<h264_cuvid_copy_context*>(privData);
|
|
|
|
const unsigned int GL_TEXTURE_2D = 0x0DE1;
|
|
const unsigned int cudaGraphicsMapFlagsNone = 0x0;
|
|
CUgraphicsResource CudaRes{0};
|
|
H264_DPRINT("cuda copy decoded frame testure %d", (int)dest_texture_handle);
|
|
NVDEC_API_CALL(cuGraphicsGLRegisterImage(&CudaRes, dest_texture_handle,
|
|
GL_TEXTURE_2D, 0x0));
|
|
CUarray texture_ptr;
|
|
NVDEC_API_CALL(cuGraphicsMapResources(1, &CudaRes, 0));
|
|
NVDEC_API_CALL(
|
|
cuGraphicsSubResourceGetMappedArray(&texture_ptr, CudaRes, 0, 0));
|
|
CUdeviceptr dpSrcFrame = copy_context->src_frame;
|
|
CUDA_MEMCPY2D m = {0};
|
|
m.srcMemoryType = CU_MEMORYTYPE_DEVICE;
|
|
m.srcDevice = dpSrcFrame;
|
|
m.srcPitch = copy_context->src_pitch;
|
|
m.dstMemoryType = CU_MEMORYTYPE_ARRAY;
|
|
m.dstArray = texture_ptr;
|
|
m.dstPitch = copy_context->dest_width * 1;
|
|
m.WidthInBytes = copy_context->dest_width * 1;
|
|
m.Height = copy_context->dest_height;
|
|
H264_DPRINT("dstPitch %d, WidthInBytes %d Height %d surface-height %d",
|
|
(int)m.dstPitch, (int)m.WidthInBytes, (int)m.Height,
|
|
(int)copy_context->src_surface_height);
|
|
|
|
if (mode == MEDIA_H264_COPY_Y_TEXTURE) { // copy Y data
|
|
NVDEC_API_CALL(cuMemcpy2D(&m));
|
|
} else if (mode == MEDIA_H264_COPY_UV_TEXTURE) { // copy UV data
|
|
m.srcDevice =
|
|
(CUdeviceptr)((uint8_t*)dpSrcFrame +
|
|
m.srcPitch * copy_context->src_surface_height);
|
|
m.Height = m.Height / 2;
|
|
NVDEC_API_CALL(cuMemcpy2D(&m));
|
|
}
|
|
NVDEC_API_CALL(cuGraphicsUnmapResources(1, &CudaRes, 0));
|
|
NVDEC_API_CALL(cuGraphicsUnregisterResource(CudaRes));
|
|
}
|
|
|
|
void cuda_nv12_updater(void* privData, uint32_t type, uint32_t* textures) {
|
|
constexpr uint32_t kFRAMEWORK_FORMAT_NV12 = 3;
|
|
if (type != kFRAMEWORK_FORMAT_NV12) {
|
|
return;
|
|
}
|
|
H264_DPRINT("copyiong Ytex %d", textures[0]);
|
|
H264_DPRINT("copyiong UVtex %d", textures[1]);
|
|
cuda_copy_decoded_frame(privData, MEDIA_H264_COPY_Y_TEXTURE, textures[0]);
|
|
cuda_copy_decoded_frame(privData, MEDIA_H264_COPY_UV_TEXTURE, textures[1]);
|
|
}
|
|
|
|
} // end extern C
|
|
|
|
int MediaH264DecoderCuvid::HandlePictureDisplay(
|
|
CUVIDPARSERDISPINFO* pDispInfo) {
|
|
if (mIsLoadingFromSnapshot) {
|
|
return 1;
|
|
}
|
|
|
|
CUVIDPROCPARAMS videoProcessingParameters = {};
|
|
videoProcessingParameters.progressive_frame = pDispInfo->progressive_frame;
|
|
videoProcessingParameters.second_field = pDispInfo->repeat_first_field + 1;
|
|
videoProcessingParameters.top_field_first = pDispInfo->top_field_first;
|
|
videoProcessingParameters.unpaired_field =
|
|
pDispInfo->repeat_first_field < 0;
|
|
videoProcessingParameters.output_stream = 0;
|
|
uint64_t myOutputPts = pDispInfo->timestamp;
|
|
|
|
CUdeviceptr dpSrcFrame = 0;
|
|
unsigned int nSrcPitch = 0;
|
|
NVDEC_API_CALL(cuvidMapVideoFrame(mCudaDecoder, pDispInfo->picture_index,
|
|
&dpSrcFrame, &nSrcPitch,
|
|
&videoProcessingParameters));
|
|
|
|
NVDEC_API_CALL(cuCtxPushCurrent(mCudaContext));
|
|
unsigned int newOutBufferSize = mOutputWidth * mOutputHeight * 3 / 2;
|
|
std::vector<uint8_t> myFrame;
|
|
TextureFrame texFrame;
|
|
if (mUseGpuTexture) {
|
|
h264_cuvid_copy_context my_copy_context{
|
|
.src_frame = dpSrcFrame,
|
|
.src_pitch = nSrcPitch,
|
|
.src_surface_height = mSurfaceHeight,
|
|
.dest_width = mOutputWidth,
|
|
.dest_height = mOutputHeight,
|
|
};
|
|
texFrame = mRenderer.getTextureFrame(mOutputWidth, mOutputHeight);
|
|
mRenderer.saveDecodedFrameToTexture(texFrame, &my_copy_context,
|
|
(void*)cuda_nv12_updater);
|
|
} else {
|
|
myFrame.resize(newOutBufferSize);
|
|
uint8_t* pDecodedFrame = &(myFrame[0]);
|
|
|
|
CUDA_MEMCPY2D m = {0};
|
|
m.srcMemoryType = CU_MEMORYTYPE_DEVICE;
|
|
m.srcDevice = dpSrcFrame;
|
|
m.srcPitch = nSrcPitch;
|
|
m.dstMemoryType = CU_MEMORYTYPE_HOST;
|
|
m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame);
|
|
m.dstPitch = mOutputWidth * mBPP;
|
|
m.WidthInBytes = mOutputWidth * mBPP;
|
|
m.Height = mLumaHeight;
|
|
H264_DPRINT("dstDevice %p, dstPitch %d, WidthInBytes %d Height %d",
|
|
m.dstHost, (int)m.dstPitch, (int)m.WidthInBytes,
|
|
(int)m.Height);
|
|
|
|
NVDEC_API_CALL(cuMemcpy2DAsync(&m, 0));
|
|
|
|
m.srcDevice = (CUdeviceptr)((uint8_t*)dpSrcFrame +
|
|
m.srcPitch * mSurfaceHeight);
|
|
m.dstDevice = (CUdeviceptr)(m.dstHost = pDecodedFrame +
|
|
m.dstPitch * mLumaHeight);
|
|
m.Height = mChromaHeight;
|
|
NVDEC_API_CALL(cuMemcpy2DAsync(&m, 0));
|
|
}
|
|
|
|
NVDEC_API_CALL(cuStreamSynchronize(0));
|
|
NVDEC_API_CALL(cuCtxPopCurrent(NULL));
|
|
|
|
NVDEC_API_CALL(cuvidUnmapVideoFrame(mCudaDecoder, dpSrcFrame));
|
|
if (!mIsLoadingFromSnapshot) {
|
|
std::lock_guard<std::mutex> g(mFrameLock);
|
|
mSavedFrames.push_back(myFrame);
|
|
mSavedTexFrames.push_back(texFrame);
|
|
mSavedPts.push_back(myOutputPts);
|
|
mSavedW.push_back(mOutputWidth);
|
|
mSavedH.push_back(mOutputHeight);
|
|
}
|
|
mImageReady = true;
|
|
H264_DPRINT("successfully called.");
|
|
return 1;
|
|
}
|
|
|
|
void MediaH264DecoderCuvid::oneShotDecode(std::vector<uint8_t>& data,
|
|
uint64_t pts) {
|
|
H264_DPRINT("decoding pts %lld", (long long)pts);
|
|
decodeFrameInternal(nullptr, nullptr, data.data(), data.size(), pts);
|
|
}
|
|
|
|
void MediaH264DecoderCuvid::save(base::Stream* stream) const {
|
|
stream->putBe32(mParser.version());
|
|
const int useGpuTexture = mUseGpuTexture ? 1 : 0;
|
|
stream->putBe32(useGpuTexture);
|
|
|
|
stream->putBe32(mWidth);
|
|
stream->putBe32(mHeight);
|
|
stream->putBe32(mOutputWidth);
|
|
stream->putBe32(mOutputHeight);
|
|
stream->putBe32((int)mOutPixFmt);
|
|
|
|
const int hasContext = mCudaContext == nullptr ? 0 : 1;
|
|
stream->putBe32(hasContext);
|
|
|
|
mSnapshotState.savedFrames.clear();
|
|
mSnapshotState.savedDecodedFrame.data.clear();
|
|
for (size_t i = 0; i < mSavedFrames.size(); ++i) {
|
|
const std::vector<uint8_t>& myFrame = mSavedFrames.front();
|
|
int myOutputWidth = mSavedW.front();
|
|
int myOutputHeight = mSavedH.front();
|
|
int myOutputPts = mSavedPts.front();
|
|
mSnapshotState.saveDecodedFrame(
|
|
myFrame, myOutputWidth, myOutputHeight,
|
|
ColorAspects{mColorPrimaries, mColorRange, mColorTransfer,
|
|
mColorSpace},
|
|
myOutputPts);
|
|
mSavedFrames.pop_front();
|
|
mSavedTexFrames.pop_front();
|
|
mSavedW.pop_front();
|
|
mSavedH.pop_front();
|
|
mSavedPts.pop_front();
|
|
}
|
|
H264_DPRINT("saving packets now %d",
|
|
(int)(mSnapshotState.savedPackets.size()));
|
|
mSnapshotState.save(stream);
|
|
}
|
|
|
|
bool MediaH264DecoderCuvid::load(base::Stream* stream) {
|
|
mIsLoadingFromSnapshot = true;
|
|
uint32_t version = stream->getBe32();
|
|
mParser = H264PingInfoParser{version};
|
|
const int useGpuTexture = stream->getBe32();
|
|
mUseGpuTexture = useGpuTexture ? true : false;
|
|
|
|
mWidth = stream->getBe32();
|
|
mHeight = stream->getBe32();
|
|
mOutputWidth = stream->getBe32();
|
|
mOutputHeight = stream->getBe32();
|
|
mOutPixFmt = (PixelFormat)stream->getBe32();
|
|
|
|
const int hasContext = stream->getBe32();
|
|
if (hasContext) {
|
|
initH264ContextInternal(mWidth, mHeight, mWidth, mHeight, mOutPixFmt);
|
|
}
|
|
|
|
mSnapshotState.load(stream);
|
|
|
|
H264_DPRINT("loaded packets %d, now restore decoder",
|
|
(int)(mSnapshotState.savedPackets.size()));
|
|
if (hasContext && mSnapshotState.sps.size() > 0) {
|
|
oneShotDecode(mSnapshotState.sps, 0);
|
|
if (mSnapshotState.pps.size() > 0) {
|
|
oneShotDecode(mSnapshotState.pps, 0);
|
|
if (mSnapshotState.savedPackets.size() > 0) {
|
|
for (int i = 0; i < mSnapshotState.savedPackets.size(); ++i) {
|
|
PacketInfo& pkt = mSnapshotState.savedPackets[i];
|
|
oneShotDecode(pkt.data, pkt.pts);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
mImageReady = false;
|
|
for (size_t i = 0; i < mSnapshotState.savedFrames.size(); ++i) {
|
|
auto& frame = mSnapshotState.savedFrames[i];
|
|
mOutBufferSize = frame.data.size();
|
|
mOutputWidth = frame.width;
|
|
mOutputHeight = frame.height;
|
|
mColorPrimaries = frame.color.primaries;
|
|
mColorRange = frame.color.range;
|
|
mColorTransfer = frame.color.transfer;
|
|
mColorSpace = frame.color.space;
|
|
mOutputPts = frame.pts;
|
|
mSavedFrames.push_back(frame.data);
|
|
TextureFrame texFrame =
|
|
mRenderer.getTextureFrame(mOutputWidth, mOutputHeight);
|
|
mSavedTexFrames.push_back(texFrame);
|
|
mSavedW.push_back(mOutputWidth);
|
|
mSavedH.push_back(mOutputHeight);
|
|
mSavedPts.push_back(mOutputPts);
|
|
mImageReady = true;
|
|
}
|
|
mIsLoadingFromSnapshot = false;
|
|
return true;
|
|
}
|
|
|
|
bool MediaH264DecoderCuvid::s_isCudaInitialized = false;
|
|
// static
|
|
|
|
} // namespace emulation
|
|
} // namespace android
|