This commit is contained in:
mofeng-git
2025-12-28 18:19:16 +08:00
commit d143d158e4
771 changed files with 220548 additions and 0 deletions

View File

@@ -0,0 +1,693 @@
#define FFNV_LOG_FUNC
#define FFNV_DEBUG_LOG_FUNC
#include <DirectXMath.h>
#include <Samples/NvCodec/NvDecoder/NvDecoder.h>
#include <Samples/Utils/NvCodecUtils.h>
#include <algorithm>
#include <array>
#include <d3dcompiler.h>
#include <directxcolors.h>
#include <iostream>
#include <libavutil/pixfmt.h>
#include <thread>
#include "callback.h"
#include "common.h"
#include "system.h"
#include "util.h"
#define LOG_MODULE "CUVID"
#include "log.h"
#define NUMVERTICES 6
using namespace DirectX;
namespace {
#define succ(call) ((call) == 0)
class CUVIDAutoUnmapper {
CudaFunctions *cudl_ = NULL;
CUgraphicsResource *pCuResource_ = NULL;
public:
CUVIDAutoUnmapper(CudaFunctions *cudl, CUgraphicsResource *pCuResource)
: cudl_(cudl), pCuResource_(pCuResource) {
if (!succ(cudl->cuGraphicsMapResources(1, pCuResource, 0))) {
LOG_TRACE(std::string("cuGraphicsMapResources failed"));
NVDEC_THROW_ERROR("cuGraphicsMapResources failed", CUDA_ERROR_UNKNOWN);
}
}
~CUVIDAutoUnmapper() {
if (!succ(cudl_->cuGraphicsUnmapResources(1, pCuResource_, 0))) {
LOG_TRACE(std::string("cuGraphicsUnmapResources failed"));
// NVDEC_THROW_ERROR("cuGraphicsUnmapResources failed",
// CUDA_ERROR_UNKNOWN);
}
}
};
class CUVIDAutoCtxPopper {
CudaFunctions *cudl_ = NULL;
public:
CUVIDAutoCtxPopper(CudaFunctions *cudl, CUcontext cuContext) : cudl_(cudl) {
if (!succ(cudl->cuCtxPushCurrent(cuContext))) {
LOG_TRACE(std::string("cuCtxPushCurrent failed"));
NVDEC_THROW_ERROR("cuCtxPopCurrent failed", CUDA_ERROR_UNKNOWN);
}
}
~CUVIDAutoCtxPopper() {
if (!succ(cudl_->cuCtxPopCurrent(NULL))) {
LOG_TRACE(std::string("cuCtxPopCurrent failed"));
// NVDEC_THROW_ERROR("cuCtxPopCurrent failed", CUDA_ERROR_UNKNOWN);
}
}
};
void load_driver(CudaFunctions **pp_cudl, CuvidFunctions **pp_cvdl) {
if (cuda_load_functions(pp_cudl, NULL) < 0) {
LOG_TRACE(std::string("cuda_load_functions failed"));
NVDEC_THROW_ERROR("cuda_load_functions failed", CUDA_ERROR_UNKNOWN);
}
if (cuvid_load_functions(pp_cvdl, NULL) < 0) {
LOG_TRACE(std::string("cuvid_load_functions failed"));
NVDEC_THROW_ERROR("cuvid_load_functions failed", CUDA_ERROR_UNKNOWN);
}
}
void free_driver(CudaFunctions **pp_cudl, CuvidFunctions **pp_cvdl) {
if (*pp_cvdl) {
cuvid_free_functions(pp_cvdl);
*pp_cvdl = NULL;
}
if (*pp_cudl) {
cuda_free_functions(pp_cudl);
*pp_cudl = NULL;
}
}
typedef struct _VERTEX {
DirectX::XMFLOAT3 Pos;
DirectX::XMFLOAT2 TexCoord;
} VERTEX;
class CuvidDecoder {
public:
CudaFunctions *cudl_ = NULL;
CuvidFunctions *cvdl_ = NULL;
NvDecoder *dec_ = NULL;
CUcontext cuContext_ = NULL;
CUgraphicsResource cuResource_[2] = {NULL, NULL}; // r8, r8g8
ComPtr<ID3D11Texture2D> textures_[2] = {NULL, NULL};
ComPtr<ID3D11RenderTargetView> RTV_ = NULL;
ComPtr<ID3D11ShaderResourceView> SRV_[2] = {NULL, NULL};
ComPtr<ID3D11VertexShader> vertexShader_ = NULL;
ComPtr<ID3D11PixelShader> pixelShader_ = NULL;
ComPtr<ID3D11SamplerState> samplerLinear_ = NULL;
std::unique_ptr<NativeDevice> native_ = nullptr;
void *device_;
int64_t luid_;
DataFormat dataFormat_;
bool prepare_tried_ = false;
bool prepare_ok_ = false;
int width_ = 0;
int height_ = 0;
CUVIDEOFORMAT last_video_format_ = {};
public:
CuvidDecoder(void *device, int64_t luid, DataFormat dataFormat) {
device_ = device;
luid_ = luid;
dataFormat_ = dataFormat;
ZeroMemory(&last_video_format_, sizeof(last_video_format_));
load_driver(&cudl_, &cvdl_);
}
~CuvidDecoder() {}
bool init() {
if (!succ(cudl_->cuInit(0))) {
LOG_ERROR(std::string("cuInit failed"));
return false;
}
CUdevice cuDevice = 0;
native_ = std::make_unique<NativeDevice>();
if (!native_->Init(luid_, (ID3D11Device *)device_, 4)) {
LOG_ERROR(std::string("Failed to init native device"));
return false;
}
if (!succ(cudl_->cuD3D11GetDevice(&cuDevice, native_->adapter_.Get()))) {
LOG_ERROR(std::string("Failed to get cuDevice"));
return false;
}
if (!succ(cudl_->cuCtxCreate(&cuContext_, 0, cuDevice))) {
LOG_ERROR(std::string("Failed to create cuContext"));
return false;
}
if (!create_nvdecoder()) {
LOG_ERROR(std::string("Failed to create nvdecoder"));
return false;
}
return true;
}
// ref: HandlePictureDisplay
int decode(uint8_t *data, int len, DecodeCallback callback, void *obj) {
int nFrameReturned = decode_and_recreate(data, len);
if (nFrameReturned == -2) {
nFrameReturned = dec_->Decode(data, len, CUVID_PKT_ENDOFPICTURE);
}
if (nFrameReturned <= 0) {
return -1;
}
last_video_format_ = dec_->GetLatestVideoFormat();
cudaVideoSurfaceFormat format = dec_->GetOutputFormat();
int width = dec_->GetWidth();
int height = dec_->GetHeight();
if (prepare_tried_ && (width != width_ || height != height_)) {
LOG_INFO(std::string("resolution changed, (") + std::to_string(width_) + "," +
std::to_string(height_) + ") -> (" + std::to_string(width) +
"," + std::to_string(height) + ")");
reset_prepare();
width_ = width;
height_ = height;
}
if (!prepare()) {
LOG_ERROR(std::string("prepare failed"));
return -1;
}
bool decoded = false;
for (int i = 0; i < nFrameReturned; i++) {
uint8_t *pFrame = dec_->GetFrame();
native_->BeginQuery();
if (!copy_cuda_frame(pFrame)) {
LOG_ERROR(std::string("copy_cuda_frame failed"));
native_->EndQuery();
return -1;
}
if (!native_->EnsureTexture(width, height)) {
LOG_ERROR(std::string("EnsureTexture failed"));
native_->EndQuery();
return -1;
}
native_->next();
if (!set_rtv(native_->GetCurrentTexture())) {
LOG_ERROR(std::string("set_rtv failed"));
native_->EndQuery();
return -1;
}
if (!draw()) {
LOG_ERROR(std::string("draw failed"));
native_->EndQuery();
return -1;
}
native_->EndQuery();
if (!native_->Query()) {
LOG_ERROR(std::string("Query failed"));
}
if (callback)
callback(native_->GetCurrentTexture(), obj);
decoded = true;
}
return decoded ? 0 : -1;
}
void destroy() {
if (dec_) {
delete dec_;
dec_ = nullptr;
}
if (cudl_ && cuContext_) {
cudl_->cuCtxPushCurrent(cuContext_);
for (int i = 0; i < 2; i++) {
if (cuResource_[i]) {
cudl_->cuGraphicsUnregisterResource(cuResource_[i]);
cuResource_[i] = NULL;
}
}
cudl_->cuCtxPopCurrent(NULL);
cudl_->cuCtxDestroy(cuContext_);
cuContext_ = NULL;
}
free_driver(&cudl_, &cvdl_);
}
private:
void reset_prepare() {
prepare_tried_ = false;
prepare_ok_ = false;
if (cudl_ && cuContext_) {
cudl_->cuCtxPushCurrent(cuContext_);
for (int i = 0; i < 2; i++) {
if (cuResource_[i])
cudl_->cuGraphicsUnregisterResource(cuResource_[i]);
}
cudl_->cuCtxPopCurrent(NULL);
}
for (int i = 0; i < 2; i++) {
textures_[i].Reset();
SRV_[i].Reset();
}
RTV_.Reset();
vertexShader_.Reset();
pixelShader_.Reset();
samplerLinear_.Reset();
}
bool prepare() {
if (prepare_tried_) {
return prepare_ok_;
}
prepare_tried_ = true;
if (!set_srv())
return false;
if (!set_view_port())
return false;
if (!set_sample())
return false;
if (!set_shader())
return false;
if (!set_vertex_buffer())
return false;
if (!register_texture())
return false;
prepare_ok_ = true;
return true;
}
bool copy_cuda_frame(unsigned char *dpNv12) {
int width = dec_->GetWidth();
int height = dec_->GetHeight();
int chromaHeight = dec_->GetChromaHeight();
CUVIDAutoCtxPopper ctxPoper(cudl_, cuContext_);
for (int i = 0; i < 2; i++) {
CUarray dstArray;
CUVIDAutoUnmapper unmapper(cudl_, &cuResource_[i]);
if (!succ(cudl_->cuGraphicsSubResourceGetMappedArray(
&dstArray, cuResource_[i], 0, 0)))
return false;
CUDA_MEMCPY2D m = {0};
m.srcMemoryType = CU_MEMORYTYPE_DEVICE;
m.srcDevice = (CUdeviceptr)(dpNv12 + (width * height) * i);
m.srcPitch = width; // pitch
m.dstMemoryType = CU_MEMORYTYPE_ARRAY;
m.dstArray = dstArray;
m.WidthInBytes = width;
m.Height = i == 0 ? height : chromaHeight;
if (!succ(cudl_->cuMemcpy2D(&m)))
return false;
}
return true;
}
bool draw() {
native_->context_->Draw(NUMVERTICES, 0);
native_->context_->Flush();
return true;
}
// return:
// >=0: nFrameReturned
// -1: failed
// -2: recreated, please decode again
int decode_and_recreate(uint8_t *data, int len) {
try {
int nFrameReturned = dec_->Decode(data, len, CUVID_PKT_ENDOFPICTURE);
if (nFrameReturned <= 0)
return -1;
CUVIDEOFORMAT video_format = dec_->GetLatestVideoFormat();
auto d1 = last_video_format_.display_area;
auto d2 = video_format.display_area;
// reconfigure may cause wrong display area
if (last_video_format_.coded_width != 0 &&
(d1.left != d2.left || d1.right != d2.right || d1.top != d2.top ||
d1.bottom != d2.bottom)) {
LOG_INFO(
std::string("recreate, display area changed from (") + std::to_string(d1.left) +
", " + std::to_string(d1.top) + ", " + std::to_string(d1.right) +
", " + std::to_string(d1.bottom) + ") to (" +
std::to_string(d2.left) + ", " + std::to_string(d2.top) + ", " +
std::to_string(d2.right) + ", " + std::to_string(d2.bottom) + ")");
if (create_nvdecoder()) {
return -2;
} else {
LOG_ERROR(std::string("create_nvdecoder failed"));
}
return -1;
} else {
return nFrameReturned;
}
} catch (const std::exception &e) {
unsigned int maxWidth = dec_->GetMaxWidth();
unsigned int maxHeight = dec_->GetMaxHeight();
CUVIDEOFORMAT video_format = dec_->GetLatestVideoFormat();
// https://github.com/NVIDIA/DALI/blob/4f5ee72b287cfbbe0d400734416ff37bd8027099/dali/operators/reader/loader/video/frames_decoder_gpu.cc#L212
if (maxWidth > 0 && (video_format.coded_width > maxWidth ||
video_format.coded_height > maxHeight)) {
LOG_INFO(std::string("recreate, exceed maxWidth/maxHeight: (") +
std::to_string(video_format.coded_width) + ", " +
std::to_string(video_format.coded_height) + " > (" +
std::to_string(maxWidth) + ", " + std::to_string(maxHeight) +
")");
if (create_nvdecoder()) {
return -2;
} else {
LOG_ERROR(std::string("create_nvdecoder failed"));
}
} else {
LOG_ERROR(std::string("Exception decode_and_recreate: ") + e.what());
}
}
return -1;
}
bool set_srv() {
int width = dec_->GetWidth();
int height = dec_->GetHeight();
int chromaHeight = dec_->GetChromaHeight();
LOG_TRACE(std::string("width:") + std::to_string(width) +
", height:" + std::to_string(height) +
", chromaHeight:" + std::to_string(chromaHeight));
D3D11_TEXTURE2D_DESC desc;
ZeroMemory(&desc, sizeof(desc));
desc.Width = width;
desc.Height = height;
desc.MipLevels = 1;
desc.ArraySize = 1;
desc.Format = DXGI_FORMAT_R8_UNORM;
desc.SampleDesc.Count = 1;
desc.SampleDesc.Quality = 0;
desc.MiscFlags = 0;
desc.Usage = D3D11_USAGE_DEFAULT;
desc.BindFlags = D3D11_BIND_SHADER_RESOURCE;
desc.CPUAccessFlags = 0;
HRB(native_->device_->CreateTexture2D(
&desc, nullptr, textures_[0].ReleaseAndGetAddressOf()));
desc.Format = DXGI_FORMAT_R8G8_UNORM;
desc.Width = width / 2;
desc.Height = chromaHeight;
HRB(native_->device_->CreateTexture2D(
&desc, nullptr, textures_[1].ReleaseAndGetAddressOf()));
D3D11_SHADER_RESOURCE_VIEW_DESC srvDesc;
srvDesc = CD3D11_SHADER_RESOURCE_VIEW_DESC(textures_[0].Get(),
D3D11_SRV_DIMENSION_TEXTURE2D,
DXGI_FORMAT_R8_UNORM);
HRB(native_->device_->CreateShaderResourceView(
textures_[0].Get(), &srvDesc, SRV_[0].ReleaseAndGetAddressOf()));
srvDesc = CD3D11_SHADER_RESOURCE_VIEW_DESC(textures_[1].Get(),
D3D11_SRV_DIMENSION_TEXTURE2D,
DXGI_FORMAT_R8G8_UNORM);
HRB(native_->device_->CreateShaderResourceView(
textures_[1].Get(), &srvDesc, SRV_[1].ReleaseAndGetAddressOf()));
// set SRV
std::array<ID3D11ShaderResourceView *, 2> const textureViews = {
SRV_[0].Get(), SRV_[1].Get()};
native_->context_->PSSetShaderResources(0, textureViews.size(),
textureViews.data());
return true;
}
bool set_rtv(ID3D11Texture2D *texture) {
D3D11_RENDER_TARGET_VIEW_DESC rtDesc;
rtDesc.Format = DXGI_FORMAT_B8G8R8A8_UNORM;
rtDesc.ViewDimension = D3D11_RTV_DIMENSION_TEXTURE2D;
rtDesc.Texture2D.MipSlice = 0;
HRB(native_->device_->CreateRenderTargetView(
texture, &rtDesc, RTV_.ReleaseAndGetAddressOf()));
const float clearColor[4] = {0.0f, 0.0f, 0.0f, 0.0f}; // clear as black
native_->context_->ClearRenderTargetView(RTV_.Get(), clearColor);
native_->context_->OMSetRenderTargets(1, RTV_.GetAddressOf(), NULL);
return true;
}
bool set_view_port() {
int width = dec_->GetWidth();
int height = dec_->GetHeight();
D3D11_VIEWPORT vp;
vp.Width = (FLOAT)(width);
vp.Height = (FLOAT)(height);
vp.MinDepth = 0.0f;
vp.MaxDepth = 1.0f;
vp.TopLeftX = 0;
vp.TopLeftY = 0;
native_->context_->RSSetViewports(1, &vp);
return true;
}
bool set_sample() {
D3D11_SAMPLER_DESC sampleDesc = CD3D11_SAMPLER_DESC(CD3D11_DEFAULT());
HRB(native_->device_->CreateSamplerState(
&sampleDesc, samplerLinear_.ReleaseAndGetAddressOf()));
native_->context_->PSSetSamplers(0, 1, samplerLinear_.GetAddressOf());
return true;
}
bool set_shader() {
// https://gist.github.com/RomiTT/9c05d36fe339b899793a3252297a5624
#include "pixel_shader_601.h"
#include "vertex_shader.h"
native_->device_->CreateVertexShader(
g_VS, ARRAYSIZE(g_VS), nullptr, vertexShader_.ReleaseAndGetAddressOf());
native_->device_->CreatePixelShader(g_PS, ARRAYSIZE(g_PS), nullptr,
pixelShader_.ReleaseAndGetAddressOf());
// set InputLayout
constexpr std::array<D3D11_INPUT_ELEMENT_DESC, 2> Layout = {{
{"POSITION", 0, DXGI_FORMAT_R32G32B32_FLOAT, 0, 0,
D3D11_INPUT_PER_VERTEX_DATA, 0},
{"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 12,
D3D11_INPUT_PER_VERTEX_DATA, 0},
}};
ComPtr<ID3D11InputLayout> inputLayout = NULL;
HRB(native_->device_->CreateInputLayout(Layout.data(), Layout.size(), g_VS,
ARRAYSIZE(g_VS),
inputLayout.GetAddressOf()));
native_->context_->IASetInputLayout(inputLayout.Get());
native_->context_->VSSetShader(vertexShader_.Get(), NULL, 0);
native_->context_->PSSetShader(pixelShader_.Get(), NULL, 0);
return true;
}
bool set_vertex_buffer() {
UINT Stride = sizeof(VERTEX);
UINT Offset = 0;
FLOAT blendFactor[4] = {0.f, 0.f, 0.f, 0.f};
native_->context_->OMSetBlendState(nullptr, blendFactor, 0xffffffff);
native_->context_->IASetPrimitiveTopology(
D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
// set VertexBuffers
VERTEX Vertices[NUMVERTICES] = {
{XMFLOAT3(-1.0f, -1.0f, 0), XMFLOAT2(0.0f, 1.0f)},
{XMFLOAT3(-1.0f, 1.0f, 0), XMFLOAT2(0.0f, 0.0f)},
{XMFLOAT3(1.0f, -1.0f, 0), XMFLOAT2(1.0f, 1.0f)},
{XMFLOAT3(1.0f, -1.0f, 0), XMFLOAT2(1.0f, 1.0f)},
{XMFLOAT3(-1.0f, 1.0f, 0), XMFLOAT2(0.0f, 0.0f)},
{XMFLOAT3(1.0f, 1.0f, 0), XMFLOAT2(1.0f, 0.0f)},
};
D3D11_BUFFER_DESC BufferDesc;
RtlZeroMemory(&BufferDesc, sizeof(BufferDesc));
BufferDesc.Usage = D3D11_USAGE_DEFAULT;
BufferDesc.ByteWidth = sizeof(VERTEX) * NUMVERTICES;
BufferDesc.BindFlags = D3D11_BIND_VERTEX_BUFFER;
BufferDesc.CPUAccessFlags = 0;
D3D11_SUBRESOURCE_DATA InitData;
RtlZeroMemory(&InitData, sizeof(InitData));
InitData.pSysMem = Vertices;
ComPtr<ID3D11Buffer> VertexBuffer = nullptr;
// Create vertex buffer
HRB(native_->device_->CreateBuffer(&BufferDesc, &InitData, &VertexBuffer));
native_->context_->IASetVertexBuffers(0, 1, VertexBuffer.GetAddressOf(),
&Stride, &Offset);
return true;
}
bool register_texture() {
CUVIDAutoCtxPopper ctxPoper(cudl_, cuContext_);
bool ret = true;
for (int i = 0; i < 2; i++) {
if (!succ(cudl_->cuGraphicsD3D11RegisterResource(
&cuResource_[i], textures_[i].Get(),
CU_GRAPHICS_REGISTER_FLAGS_NONE))) {
ret = false;
break;
}
if (!succ(cudl_->cuGraphicsResourceSetMapFlags(
cuResource_[i], CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD))) {
ret = false;
break;
}
}
return ret;
}
bool dataFormat_to_cuCodecID(DataFormat dataFormat, cudaVideoCodec &cuda) {
switch (dataFormat) {
case H264:
cuda = cudaVideoCodec_H264;
break;
case H265:
cuda = cudaVideoCodec_HEVC;
break;
default:
return false;
}
return true;
}
bool create_nvdecoder() {
LOG_TRACE(std::string("create nvdecoder"));
bool bUseDeviceFrame = true;
bool bLowLatency = true;
bool bDeviceFramePitched = false; // width=pitch
cudaVideoCodec cudaCodecID;
if (!dataFormat_to_cuCodecID(dataFormat_, cudaCodecID)) {
return false;
}
if (dec_) {
delete dec_;
dec_ = nullptr;
}
dec_ = new NvDecoder(cudl_, cvdl_, cuContext_, bUseDeviceFrame, cudaCodecID,
bLowLatency, bDeviceFramePitched);
return true;
}
};
} // namespace
extern "C" {
int nv_decode_driver_support() {
try {
CudaFunctions *cudl = NULL;
CuvidFunctions *cvdl = NULL;
load_driver(&cudl, &cvdl);
free_driver(&cudl, &cvdl);
return 0;
} catch (const std::exception &e) {
}
return -1;
}
int nv_destroy_decoder(void *decoder) {
try {
CuvidDecoder *p = (CuvidDecoder *)decoder;
if (p) {
p->destroy();
delete p;
p = NULL;
}
return 0;
} catch (const std::exception &e) {
LOG_ERROR(std::string("destroy failed: ") + e.what());
}
return -1;
}
void *nv_new_decoder(void *device, int64_t luid,
DataFormat dataFormat) {
CuvidDecoder *p = NULL;
try {
p = new CuvidDecoder(device, luid, dataFormat);
if (!p) {
goto _exit;
}
if (p->init())
return p;
} catch (const std::exception &ex) {
LOG_ERROR(std::string("destroy failed: ") + ex.what());
goto _exit;
}
_exit:
if (p) {
p->destroy();
delete p;
p = NULL;
}
return NULL;
}
int nv_decode(void *decoder, uint8_t *data, int len, DecodeCallback callback,
void *obj) {
try {
CuvidDecoder *p = (CuvidDecoder *)decoder;
if (p->decode(data, len, callback, obj) == 0 ) {
return HWCODEC_SUCCESS;
}
} catch (const std::exception &e) {
LOG_ERROR(std::string("decode failed: ") + e.what());
}
return HWCODEC_ERR_COMMON;
}
int nv_test_decode(int64_t *outLuids, int32_t *outVendors, int32_t maxDescNum,
int32_t *outDescNum, DataFormat dataFormat,
uint8_t *data, int32_t length, const int64_t *excludedLuids, const int32_t *excludeFormats, int32_t excludeCount) {
try {
Adapters adapters;
if (!adapters.Init(ADAPTER_VENDOR_NVIDIA))
return -1;
int count = 0;
for (auto &adapter : adapters.adapters_) {
int64_t currentLuid = LUID(adapter.get()->desc1_);
if (util::skip_test(excludedLuids, excludeFormats, excludeCount, currentLuid, dataFormat)) {
continue;
}
CuvidDecoder *p = (CuvidDecoder *)nv_new_decoder(
nullptr, currentLuid, dataFormat);
if (!p)
continue;
auto start = util::now();
bool succ = nv_decode(p, data, length, nullptr, nullptr) == 0;
int64_t elapsed = util::elapsed_ms(start);
if (succ && elapsed < TEST_TIMEOUT_MS) {
outLuids[count] = currentLuid;
outVendors[count] = VENDOR_NV;
count += 1;
}
p->destroy();
delete p;
p = nullptr;
if (count >= maxDescNum)
break;
}
*outDescNum = count;
return 0;
} catch (const std::exception &e) {
LOG_ERROR(std::string("test failed: ") + e.what());
}
return -1;
}
} // extern "C"

View File

@@ -0,0 +1,464 @@
#define FFNV_LOG_FUNC
#define FFNV_DEBUG_LOG_FUNC
#include <Samples/NvCodec/NvEncoder/NvEncoderD3D11.h>
#include <Samples/Utils/Logger.h>
#include <Samples/Utils/NvCodecUtils.h>
#include <Samples/Utils/NvEncoderCLIOptions.h>
#include <dynlink_cuda.h>
#include <dynlink_loader.h>
#include <fstream>
#include <iostream>
#include <libavutil/pixfmt.h>
#include <memory>
#include <d3d11.h>
#include <d3d9.h>
#include <wrl/client.h>
using Microsoft::WRL::ComPtr;
#include "callback.h"
#include "common.h"
#include "system.h"
#include "util.h"
#define LOG_MODULE "NVENC"
#include "log.h"
simplelogger::Logger *logger =
simplelogger::LoggerFactory::CreateConsoleLogger();
namespace {
// #define CONFIG_NV_OPTIMUS_FOR_DEV
#define succ(call) ((call) == 0)
void load_driver(CudaFunctions **pp_cuda_dl, NvencFunctions **pp_nvenc_dl) {
if (cuda_load_functions(pp_cuda_dl, NULL) < 0) {
LOG_TRACE(std::string("cuda_load_functions failed"));
NVENC_THROW_ERROR("cuda_load_functions failed", NV_ENC_ERR_GENERIC);
}
if (nvenc_load_functions(pp_nvenc_dl, NULL) < 0) {
LOG_TRACE(std::string("nvenc_load_functions failed"));
NVENC_THROW_ERROR("nvenc_load_functions failed", NV_ENC_ERR_GENERIC);
}
}
void free_driver(CudaFunctions **pp_cuda_dl, NvencFunctions **pp_nvenc_dl) {
if (*pp_nvenc_dl) {
nvenc_free_functions(pp_nvenc_dl);
*pp_nvenc_dl = NULL;
}
if (*pp_cuda_dl) {
cuda_free_functions(pp_cuda_dl);
*pp_cuda_dl = NULL;
}
}
class NvencEncoder {
public:
std::unique_ptr<NativeDevice> native_ = nullptr;
NvEncoderD3D11 *pEnc_ = nullptr;
CudaFunctions *cuda_dl_ = nullptr;
NvencFunctions *nvenc_dl_ = nullptr;
void *handle_ = nullptr;
int64_t luid_;
DataFormat dataFormat_;
int32_t width_;
int32_t height_;
int32_t kbs_;
int32_t framerate_;
int32_t gop_;
bool full_range_ = false;
bool bt709_ = false;
NV_ENC_CONFIG encodeConfig_ = {0};
NvencEncoder(void *handle, int64_t luid, DataFormat dataFormat,
int32_t width, int32_t height, int32_t kbs, int32_t framerate,
int32_t gop) {
handle_ = handle;
luid_ = luid;
dataFormat_ = dataFormat;
width_ = width;
height_ = height;
kbs_ = kbs;
framerate_ = framerate;
gop_ = gop;
load_driver(&cuda_dl_, &nvenc_dl_);
}
~NvencEncoder() {}
bool init() {
GUID guidCodec;
switch (dataFormat_) {
case H264:
guidCodec = NV_ENC_CODEC_H264_GUID;
break;
case H265:
guidCodec = NV_ENC_CODEC_HEVC_GUID;
break;
default:
LOG_ERROR(std::string("dataFormat not support, dataFormat: ") +
std::to_string(dataFormat_));
return false;
}
if (!succ(cuda_dl_->cuInit(0))) {
LOG_TRACE(std::string("cuInit failed"));
return false;
}
native_ = std::make_unique<NativeDevice>();
#ifdef CONFIG_NV_OPTIMUS_FOR_DEV
if (!native_->Init(luid_, nullptr))
return false;
#else
if (!native_->Init(luid_, (ID3D11Device *)handle_)) {
LOG_ERROR(std::string("d3d device init failed"));
return false;
}
#endif
CUdevice cuDevice = 0;
if (!succ(cuda_dl_->cuD3D11GetDevice(&cuDevice, native_->adapter_.Get()))) {
LOG_ERROR(std::string("Failed to get cuDevice"));
return false;
}
int nExtraOutputDelay = 0;
pEnc_ = new NvEncoderD3D11(cuda_dl_, nvenc_dl_, native_->device_.Get(),
width_, height_, NV_ENC_BUFFER_FORMAT_ARGB,
nExtraOutputDelay, false, false); // no delay
NV_ENC_INITIALIZE_PARAMS initializeParams = {0};
ZeroMemory(&initializeParams, sizeof(initializeParams));
ZeroMemory(&encodeConfig_, sizeof(encodeConfig_));
initializeParams.encodeConfig = &encodeConfig_;
pEnc_->CreateDefaultEncoderParams(
&initializeParams, guidCodec,
NV_ENC_PRESET_P3_GUID /*NV_ENC_PRESET_LOW_LATENCY_HP_GUID*/,
NV_ENC_TUNING_INFO_LOW_LATENCY);
// no delay
initializeParams.encodeConfig->frameIntervalP = 1;
initializeParams.encodeConfig->rcParams.lookaheadDepth = 0;
// bitrate
initializeParams.encodeConfig->rcParams.averageBitRate = kbs_ * 1000;
// framerate
initializeParams.frameRateNum = framerate_;
initializeParams.frameRateDen = 1;
// gop
initializeParams.encodeConfig->gopLength =
(gop_ > 0 && gop_ < MAX_GOP) ? gop_ : NVENC_INFINITE_GOPLENGTH;
// rc method
initializeParams.encodeConfig->rcParams.rateControlMode =
NV_ENC_PARAMS_RC_CBR;
// color
if (dataFormat_ == H264) {
setup_h264(initializeParams.encodeConfig);
} else {
setup_hevc(initializeParams.encodeConfig);
}
pEnc_->CreateEncoder(&initializeParams);
return true;
}
int encode(void *texture, EncodeCallback callback, void *obj, int64_t ms) {
bool encoded = false;
std::vector<NvPacket> vPacket;
const NvEncInputFrame *pEncInput = pEnc_->GetNextInputFrame();
// TODO: sdk can ensure the inputPtr's width, height same as width_,
// height_, does capture's frame can ensure width height same with width_,
// height_ ?
ID3D11Texture2D *pBgraTextyure =
reinterpret_cast<ID3D11Texture2D *>(pEncInput->inputPtr);
#ifdef CONFIG_NV_OPTIMUS_FOR_DEV
copy_texture(texture, pBgraTextyure);
#else
native_->context_->CopyResource(
pBgraTextyure, reinterpret_cast<ID3D11Texture2D *>(texture));
#endif
NV_ENC_PIC_PARAMS picParams = {0};
picParams.inputTimeStamp = ms;
pEnc_->EncodeFrame(vPacket);
for (NvPacket &packet : vPacket) {
int32_t key = (packet.pictureType == NV_ENC_PIC_TYPE_IDR ||
packet.pictureType == NV_ENC_PIC_TYPE_I)
? 1
: 0;
if (packet.data.size() > 0) {
if (callback)
callback(packet.data.data(), packet.data.size(), key, obj, ms);
encoded = true;
}
}
return encoded ? 0 : -1;
}
void destroy() {
if (pEnc_) {
pEnc_->DestroyEncoder();
delete pEnc_;
pEnc_ = nullptr;
}
free_driver(&cuda_dl_, &nvenc_dl_);
}
void setup_h264(NV_ENC_CONFIG *encodeConfig) {
NV_ENC_CODEC_CONFIG *encodeCodecConfig = &encodeConfig->encodeCodecConfig;
NV_ENC_CONFIG_H264 *h264 = &encodeCodecConfig->h264Config;
NV_ENC_CONFIG_H264_VUI_PARAMETERS *vui = &h264->h264VUIParameters;
vui->videoFullRangeFlag = !!full_range_;
vui->colourMatrix = bt709_ ? NV_ENC_VUI_MATRIX_COEFFS_BT709 : NV_ENC_VUI_MATRIX_COEFFS_SMPTE170M;
vui->colourPrimaries = bt709_ ? NV_ENC_VUI_COLOR_PRIMARIES_BT709 : NV_ENC_VUI_COLOR_PRIMARIES_SMPTE170M;
vui->transferCharacteristics =
bt709_ ? NV_ENC_VUI_TRANSFER_CHARACTERISTIC_BT709 : NV_ENC_VUI_TRANSFER_CHARACTERISTIC_SMPTE170M;
vui->colourDescriptionPresentFlag = 1;
vui->videoSignalTypePresentFlag = 1;
h264->sliceMode = 3;
h264->sliceModeData = 1;
h264->repeatSPSPPS = 1;
// Specifies the chroma format. Should be set to 1 for yuv420 input, 3 for
// yuv444 input
h264->chromaFormatIDC = 1;
h264->level = NV_ENC_LEVEL_AUTOSELECT;
encodeConfig->profileGUID = NV_ENC_H264_PROFILE_MAIN_GUID;
}
void setup_hevc(NV_ENC_CONFIG *encodeConfig) {
NV_ENC_CODEC_CONFIG *encodeCodecConfig = &encodeConfig->encodeCodecConfig;
NV_ENC_CONFIG_HEVC *hevc = &encodeCodecConfig->hevcConfig;
NV_ENC_CONFIG_HEVC_VUI_PARAMETERS *vui = &hevc->hevcVUIParameters;
vui->videoFullRangeFlag = !!full_range_;
vui->colourMatrix = bt709_ ? NV_ENC_VUI_MATRIX_COEFFS_BT709 : NV_ENC_VUI_MATRIX_COEFFS_SMPTE170M;
vui->colourPrimaries = bt709_ ? NV_ENC_VUI_COLOR_PRIMARIES_BT709 : NV_ENC_VUI_COLOR_PRIMARIES_SMPTE170M;
vui->transferCharacteristics =
bt709_ ? NV_ENC_VUI_TRANSFER_CHARACTERISTIC_BT709 : NV_ENC_VUI_TRANSFER_CHARACTERISTIC_SMPTE170M;
vui->colourDescriptionPresentFlag = 1;
vui->videoSignalTypePresentFlag = 1;
hevc->sliceMode = 3;
hevc->sliceModeData = 1;
hevc->repeatSPSPPS = 1;
// Specifies the chroma format. Should be set to 1 for yuv420 input, 3 for
// yuv444 input
hevc->chromaFormatIDC = 1;
hevc->level = NV_ENC_LEVEL_AUTOSELECT;
hevc->outputPictureTimingSEI = 1;
hevc->tier = NV_ENC_TIER_HEVC_MAIN;
encodeConfig->profileGUID = NV_ENC_HEVC_PROFILE_MAIN_GUID;
}
private:
#ifdef CONFIG_NV_OPTIMUS_FOR_DEV
int copy_texture(void *src, void *dst) {
ComPtr<ID3D11Device> src_device = (ID3D11Device *)handle_;
ComPtr<ID3D11DeviceContext> src_deviceContext;
src_device->GetImmediateContext(src_deviceContext.ReleaseAndGetAddressOf());
ComPtr<ID3D11Texture2D> src_tex = (ID3D11Texture2D *)src;
ComPtr<ID3D11Texture2D> dst_tex = (ID3D11Texture2D *)dst;
HRESULT hr;
D3D11_TEXTURE2D_DESC desc;
ZeroMemory(&desc, sizeof(desc));
src_tex->GetDesc(&desc);
desc.Usage = D3D11_USAGE_STAGING;
desc.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
desc.BindFlags = 0;
desc.MiscFlags = 0;
ComPtr<ID3D11Texture2D> staging_tex;
src_device->CreateTexture2D(&desc, NULL,
staging_tex.ReleaseAndGetAddressOf());
src_deviceContext->CopyResource(staging_tex.Get(), src_tex.Get());
D3D11_MAPPED_SUBRESOURCE map;
src_deviceContext->Map(staging_tex.Get(), 0, D3D11_MAP_READ, 0, &map);
std::unique_ptr<uint8_t[]> buffer(
new uint8_t[desc.Width * desc.Height * 4]);
memcpy(buffer.get(), map.pData, desc.Width * desc.Height * 4);
src_deviceContext->Unmap(staging_tex.Get(), 0);
D3D11_BOX Box;
Box.left = 0;
Box.right = desc.Width;
Box.top = 0;
Box.bottom = desc.Height;
Box.front = 0;
Box.back = 1;
native_->context_->UpdateSubresource(dst_tex.Get(), 0, &Box, buffer.get(),
desc.Width * 4,
desc.Width * desc.Height * 4);
return 0;
}
#endif
};
} // namespace
extern "C" {
int nv_encode_driver_support() {
try {
CudaFunctions *cuda_dl = NULL;
NvencFunctions *nvenc_dl = NULL;
load_driver(&cuda_dl, &nvenc_dl);
free_driver(&cuda_dl, &nvenc_dl);
return 0;
} catch (const std::exception &e) {
LOG_TRACE(std::string("driver not support, ") + e.what());
}
return -1;
}
int nv_destroy_encoder(void *encoder) {
try {
NvencEncoder *e = (NvencEncoder *)encoder;
if (e) {
e->destroy();
delete e;
e = NULL;
}
return 0;
} catch (const std::exception &e) {
LOG_ERROR(std::string("destroy failed: ") + e.what());
}
return -1;
}
void *nv_new_encoder(void *handle, int64_t luid, DataFormat dataFormat,
int32_t width, int32_t height, int32_t kbs,
int32_t framerate, int32_t gop) {
NvencEncoder *e = NULL;
try {
e = new NvencEncoder(handle, luid, dataFormat, width, height, kbs,
framerate, gop);
if (!e->init()) {
goto _exit;
}
return e;
} catch (const std::exception &ex) {
LOG_ERROR(std::string("new failed: ") + ex.what());
goto _exit;
}
_exit:
if (e) {
e->destroy();
delete e;
e = NULL;
}
return NULL;
}
int nv_encode(void *encoder, void *texture, EncodeCallback callback, void *obj,
int64_t ms) {
try {
NvencEncoder *e = (NvencEncoder *)encoder;
return e->encode(texture, callback, obj, ms);
} catch (const std::exception &e) {
LOG_ERROR(std::string("encode failed: ") + e.what());
}
return -1;
}
// ref: Reconfigure API
#define RECONFIGURE_HEAD \
NvencEncoder *enc = (NvencEncoder *)e; \
NV_ENC_CONFIG sEncodeConfig = {0}; \
NV_ENC_INITIALIZE_PARAMS sInitializeParams = {0}; \
sInitializeParams.encodeConfig = &sEncodeConfig; \
enc->pEnc_->GetInitializeParams(&sInitializeParams); \
NV_ENC_RECONFIGURE_PARAMS params = {0}; \
params.version = NV_ENC_RECONFIGURE_PARAMS_VER; \
params.reInitEncodeParams = sInitializeParams;
#define RECONFIGURE_TAIL \
if (enc->pEnc_->Reconfigure(&params)) { \
return 0; \
}
int nv_test_encode(int64_t *outLuids, int32_t *outVendors, int32_t maxDescNum, int32_t *outDescNum,
DataFormat dataFormat, int32_t width,
int32_t height, int32_t kbs, int32_t framerate,
int32_t gop, const int64_t *excludedLuids, const int32_t *excludeFormats, int32_t excludeCount) {
try {
Adapters adapters;
if (!adapters.Init(ADAPTER_VENDOR_NVIDIA))
return -1;
int count = 0;
for (auto &adapter : adapters.adapters_) {
int64_t currentLuid = LUID(adapter.get()->desc1_);
if (util::skip_test(excludedLuids, excludeFormats, excludeCount, currentLuid, dataFormat)) {
continue;
}
NvencEncoder *e = (NvencEncoder *)nv_new_encoder(
(void *)adapter.get()->device_.Get(), currentLuid,
dataFormat, width, height, kbs, framerate, gop);
if (!e)
continue;
if (e->native_->EnsureTexture(e->width_, e->height_)) {
e->native_->next();
int32_t key_obj = 0;
auto start = util::now();
bool succ = nv_encode(e, e->native_->GetCurrentTexture(), util_encode::vram_encode_test_callback, &key_obj,
0) == 0 && key_obj == 1;
int64_t elapsed = util::elapsed_ms(start);
if (succ && elapsed < TEST_TIMEOUT_MS) {
outLuids[count] = currentLuid;
outVendors[count] = VENDOR_NV;
count += 1;
}
}
e->destroy();
delete e;
e = nullptr;
if (count >= maxDescNum)
break;
}
*outDescNum = count;
return 0;
} catch (const std::exception &e) {
LOG_ERROR(std::string("test failed: ") + e.what());
}
return -1;
}
int nv_set_bitrate(void *e, int32_t kbs) {
try {
RECONFIGURE_HEAD
params.reInitEncodeParams.encodeConfig->rcParams.averageBitRate =
kbs * 1000;
RECONFIGURE_TAIL
} catch (const std::exception &e) {
LOG_ERROR(std::string("set bitrate to ") + std::to_string(kbs) +
"k failed: " + e.what());
}
return -1;
}
int nv_set_framerate(void *e, int32_t framerate) {
try {
RECONFIGURE_HEAD
params.reInitEncodeParams.frameRateNum = framerate;
params.reInitEncodeParams.frameRateDen = 1;
RECONFIGURE_TAIL
} catch (const std::exception &e) {
LOG_ERROR(std::string("set framerate failed: ") + e.what());
}
return -1;
}
} // extern "C"

View File

@@ -0,0 +1,40 @@
#ifndef NV_FFI_H
#define NV_FFI_H
#include "../common/callback.h"
#include <stdbool.h>
int nv_encode_driver_support();
int nv_decode_driver_support();
void *nv_new_encoder(void *handle, int64_t luid,
int32_t dataFormat, int32_t width, int32_t height,
int32_t bitrate, int32_t framerate, int32_t gop);
int nv_encode(void *encoder, void *tex, EncodeCallback callback, void *obj,
int64_t ms);
int nv_destroy_encoder(void *encoder);
void *nv_new_decoder(void *device, int64_t luid, int32_t codecID);
int nv_decode(void *decoder, uint8_t *data, int len, DecodeCallback callback,
void *obj);
int nv_destroy_decoder(void *decoder);
int nv_test_encode(int64_t *outLuids, int32_t *outVendors, int32_t maxDescNum, int32_t *outDescNum,
int32_t dataFormat, int32_t width,
int32_t height, int32_t kbs, int32_t framerate, int32_t gop,
const int64_t *excludedLuids, const int32_t *excludeFormats, int32_t excludeCount);
int nv_test_decode(int64_t *outLuids, int32_t *outVendors, int32_t maxDescNum, int32_t *outDescNum,
int32_t dataFormat, uint8_t *data,
int32_t length, const int64_t *excludedLuids, const int32_t *excludeFormats, int32_t excludeCount);
int nv_set_bitrate(void *encoder, int32_t kbs);
int nv_set_framerate(void *encoder, int32_t framerate);
#endif // NV_FFI_H