diff --git a/src/torchcodec/_core/BetaCudaDeviceInterface.cpp b/src/torchcodec/_core/BetaCudaDeviceInterface.cpp index 07ed92126..b0caa9705 100644 --- a/src/torchcodec/_core/BetaCudaDeviceInterface.cpp +++ b/src/torchcodec/_core/BetaCudaDeviceInterface.cpp @@ -15,7 +15,7 @@ #include "src/torchcodec/_core/FFMPEGCommon.h" #include "src/torchcodec/_core/NVDECCache.h" -// #include // For cudaStreamSynchronize +#include "src/torchcodec/_core/NVCUVIDRuntimeLoader.h" #include "src/torchcodec/_core/nvcuvid_include/cuviddec.h" #include "src/torchcodec/_core/nvcuvid_include/nvcuvid.h" @@ -155,6 +155,7 @@ std::optional validateCodecSupport(AVCodecID codecId) { bool nativeNVDECSupport(const SharedAVCodecContext& codecContext) { // Return true iff the input video stream is supported by our NVDEC // implementation. + auto codecType = validateCodecSupport(codecContext->codec_id); if (!codecType.has_value()) { return false; @@ -222,6 +223,8 @@ BetaCudaDeviceInterface::BetaCudaDeviceInterface(const torch::Device& device) initializeCudaContextWithPytorch(device_); nppCtx_ = getNppStreamContext(device_); + + nvcuvidAvailable_ = loadNVCUVIDLibrary(); } BetaCudaDeviceInterface::~BetaCudaDeviceInterface() { @@ -249,7 +252,7 @@ void BetaCudaDeviceInterface::initialize( const AVStream* avStream, const UniqueDecodingAVFormatContext& avFormatCtx, [[maybe_unused]] const SharedAVCodecContext& codecContext) { - if (!nativeNVDECSupport(codecContext)) { + if (!nvcuvidAvailable_ || !nativeNVDECSupport(codecContext)) { cpuFallback_ = createDeviceInterface(torch::kCPU); TORCH_CHECK( cpuFallback_ != nullptr, "Failed to create CPU device interface"); @@ -700,8 +703,16 @@ void BetaCudaDeviceInterface::convertAVFrameToFrameOutput( } std::string BetaCudaDeviceInterface::getDetails() { - return std::string("Beta CUDA Device Interface. Using ") + - (cpuFallback_ ? "CPU fallback." : "NVDEC."); + std::string details = "Beta CUDA Device Interface."; + if (cpuFallback_) { + details += " Using CPU fallback."; + if (!nvcuvidAvailable_) { + details += " NVCUVID not available!"; + } + } else { + details += " Using NVDEC."; + } + return details; } } // namespace facebook::torchcodec diff --git a/src/torchcodec/_core/BetaCudaDeviceInterface.h b/src/torchcodec/_core/BetaCudaDeviceInterface.h index 3a9520867..29511df50 100644 --- a/src/torchcodec/_core/BetaCudaDeviceInterface.h +++ b/src/torchcodec/_core/BetaCudaDeviceInterface.h @@ -98,6 +98,7 @@ class BetaCudaDeviceInterface : public DeviceInterface { UniqueNppContext nppCtx_; std::unique_ptr cpuFallback_; + bool nvcuvidAvailable_ = false; }; } // namespace facebook::torchcodec diff --git a/src/torchcodec/_core/CMakeLists.txt b/src/torchcodec/_core/CMakeLists.txt index 75d1b036c..6b4ccb5d4 100644 --- a/src/torchcodec/_core/CMakeLists.txt +++ b/src/torchcodec/_core/CMakeLists.txt @@ -99,7 +99,7 @@ function(make_torchcodec_libraries ) if(ENABLE_CUDA) - list(APPEND core_sources CudaDeviceInterface.cpp BetaCudaDeviceInterface.cpp NVDECCache.cpp CUDACommon.cpp) + list(APPEND core_sources CudaDeviceInterface.cpp BetaCudaDeviceInterface.cpp NVDECCache.cpp CUDACommon.cpp NVCUVIDRuntimeLoader.cpp) endif() set(core_library_dependencies @@ -108,27 +108,9 @@ function(make_torchcodec_libraries ) if(ENABLE_CUDA) - # Try to find NVCUVID. Try the normal way first. This should work locally. - find_library(NVCUVID_LIBRARY NAMES nvcuvid) - # If not found, try with version suffix, or hardcoded path. Appears - # to be necessary on the CI. - if(NOT NVCUVID_LIBRARY) - find_library(NVCUVID_LIBRARY NAMES nvcuvid.1 PATHS /usr/lib64 /usr/lib) - endif() - if(NOT NVCUVID_LIBRARY) - set(NVCUVID_LIBRARY "/usr/lib64/libnvcuvid.so.1") - endif() - - if(NVCUVID_LIBRARY) - message(STATUS "Found NVCUVID: ${NVCUVID_LIBRARY}") - else() - message(FATAL_ERROR "Could not find NVCUVID library") - endif() - list(APPEND core_library_dependencies ${CUDA_nppi_LIBRARY} ${CUDA_nppicc_LIBRARY} - ${NVCUVID_LIBRARY} ) endif() diff --git a/src/torchcodec/_core/NVCUVIDRuntimeLoader.cpp b/src/torchcodec/_core/NVCUVIDRuntimeLoader.cpp new file mode 100644 index 000000000..2bb501fc2 --- /dev/null +++ b/src/torchcodec/_core/NVCUVIDRuntimeLoader.cpp @@ -0,0 +1,320 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#ifdef FBCODE_CAFFE2 +// No need to do anything on fbcode. NVCUVID is available there, we can take a +// hard dependency on it. +// The FBCODE_CAFFE2 macro is defined in the upstream fbcode build of torch, so +// we can rely on it, that's what torch does too. + +namespace facebook::torchcodec { +bool loadNVCUVIDLibrary() { + return true; +} +} // namespace facebook::torchcodec +#else + +#include "src/torchcodec/_core/NVCUVIDRuntimeLoader.h" + +#include "src/torchcodec/_core/nvcuvid_include/cuviddec.h" +#include "src/torchcodec/_core/nvcuvid_include/nvcuvid.h" + +#include +#include +#include + +#if defined(WIN64) || defined(_WIN64) +#include +typedef HMODULE tHandle; +#else +#include +typedef void* tHandle; +#endif + +namespace facebook::torchcodec { + +/* clang-format off */ +// This file defines the logic to load the NVCUVID library **at runtime**, +// along with the corresponding NVCUVID functions that we'll need. +// +// We do this because we *do not want* to link (statically or dynamically) +// against libnvcuvid.so: it is not always available on the users machine! If we +// were to link against libnvcuvid.so, that would mean that our +// libtorchcodec_coreN.so would try to look for it when loaded at import time. +// And if it's not on the users machine, that causes `import torchcodec` to +// fail. Source: that's what we did, and we got user reports. +// +// So, we don't link against libnvcuvid.so. But we still want to call its +// functions. So here's how it's done, we'll use cuvidCreateVideoParser as an +// example, but it works the same for all. We are largely following the +// instructions from the NVCUVID docs: +// https://docs.nvidia.com/video-technologies/video-codec-sdk/13.0/nvdec-video-decoder-api-prog-guide/index.html#dynamic-loading-nvidia-components +// +// This: +// typedef CUresult CUDAAPI tcuvidCreateVideoParser(CUvideoparser*, CUVIDPARSERPARAMS*); +// defines tcuvidCreateVideoParser, which is the *type* of a *function*. +// We define such a function of that type just below with: +// static tcuvidCreateVideoParser* dl_cuvidCreateVideoParser = nullptr; +// "dl" is for "dynamically loaded. For now dl_cuvidCreateVideoParser is +// nullptr, but later it will be a proper function [pointer] that can be called +// with dl_cuvidCreateVideoParser(...); +// +// For that to happen we need to call loadNVCUVIDLibrary(): in there, we first +// dlopen(libnvcuvid.so) which loads the .so somewhere in memory. Then we call +// dlsym(...), which binds dl_cuvidCreateVideoParser to its actual address: it +// literally sets the value of the dl_cuvidCreateVideoParser pointer to the +// address of the actual code section. If all went well, by now, we can safely +// call dl_cuvidCreateVideoParser(...); +// All of that happens at runtime *after* import time, when the first instance +// of the Beta CUDA interface is created, i.e. only when the user explicitly +// requests it. +// +// At the bottom of this file we have an `extern "C"` section with function +// definitions like: +// +// CUresult CUDAAPI cuvidCreateVideoParser( +// CUvideoparser* videoParser, +// CUVIDPARSERPARAMS* parserParams) {...} +// +// These are the actual functions that are compiled against and called by the +// Beta CUDA interface code. Crucially, these functions signature match exactly +// the NVCUVID functions (as defined in cuviddec.h). Inside of +// cuvidCreateVideoParser(...) we simply call the dl_cuvidCreateVideoParser +// function [pointer] that we dynamically loaded earlier. +// +// At runtime, within the Beta CUDA interface code we have a fallback mechanism +// to switch back to the CPU backend if any of the NVCUVID functions are not +// available, or if libnvcuvid.so itself couldn't be found. This is what FFmpeg +// does too. + + +// Function pointers types +typedef CUresult CUDAAPI tcuvidCreateVideoParser(CUvideoparser*, CUVIDPARSERPARAMS*); +typedef CUresult CUDAAPI tcuvidParseVideoData(CUvideoparser, CUVIDSOURCEDATAPACKET*); +typedef CUresult CUDAAPI tcuvidDestroyVideoParser(CUvideoparser); +typedef CUresult CUDAAPI tcuvidGetDecoderCaps(CUVIDDECODECAPS*); +typedef CUresult CUDAAPI tcuvidCreateDecoder(CUvideodecoder*, CUVIDDECODECREATEINFO*); +typedef CUresult CUDAAPI tcuvidDestroyDecoder(CUvideodecoder); +typedef CUresult CUDAAPI tcuvidDecodePicture(CUvideodecoder, CUVIDPICPARAMS*); +typedef CUresult CUDAAPI tcuvidMapVideoFrame(CUvideodecoder, int, unsigned int*, unsigned int*, CUVIDPROCPARAMS*); +typedef CUresult CUDAAPI tcuvidUnmapVideoFrame(CUvideodecoder, unsigned int); +typedef CUresult CUDAAPI tcuvidMapVideoFrame64(CUvideodecoder, int, unsigned long long*, unsigned int*, CUVIDPROCPARAMS*); +typedef CUresult CUDAAPI tcuvidUnmapVideoFrame64(CUvideodecoder, unsigned long long); +/* clang-format on */ + +// Global function pointers - will be dynamically loaded +static tcuvidCreateVideoParser* dl_cuvidCreateVideoParser = nullptr; +static tcuvidParseVideoData* dl_cuvidParseVideoData = nullptr; +static tcuvidDestroyVideoParser* dl_cuvidDestroyVideoParser = nullptr; +static tcuvidGetDecoderCaps* dl_cuvidGetDecoderCaps = nullptr; +static tcuvidCreateDecoder* dl_cuvidCreateDecoder = nullptr; +static tcuvidDestroyDecoder* dl_cuvidDestroyDecoder = nullptr; +static tcuvidDecodePicture* dl_cuvidDecodePicture = nullptr; +static tcuvidMapVideoFrame* dl_cuvidMapVideoFrame = nullptr; +static tcuvidUnmapVideoFrame* dl_cuvidUnmapVideoFrame = nullptr; +static tcuvidMapVideoFrame64* dl_cuvidMapVideoFrame64 = nullptr; +static tcuvidUnmapVideoFrame64* dl_cuvidUnmapVideoFrame64 = nullptr; + +static tHandle g_nvcuvid_handle = nullptr; +static std::mutex g_nvcuvid_mutex; + +bool isLoaded() { + return ( + g_nvcuvid_handle && dl_cuvidCreateVideoParser && dl_cuvidParseVideoData && + dl_cuvidDestroyVideoParser && dl_cuvidGetDecoderCaps && + dl_cuvidCreateDecoder && dl_cuvidDestroyDecoder && + dl_cuvidDecodePicture && dl_cuvidMapVideoFrame && + dl_cuvidUnmapVideoFrame && dl_cuvidMapVideoFrame64 && + dl_cuvidUnmapVideoFrame64); +} + +template +T* bindFunction(const char* functionName) { +#if defined(WIN64) || defined(_WIN64) + return reinterpret_cast(GetProcAddress(g_nvcuvid_handle, functionName)); +#else + return reinterpret_cast(dlsym(g_nvcuvid_handle, functionName)); +#endif +} + +bool _loadLibrary() { + // Helper that just calls dlopen or equivalent on Windows. In a separate + // function because of the #ifdef uglyness. +#if defined(WIN64) || defined(_WIN64) +#ifdef UNICODE + static LPCWSTR nvcuvidDll = L"nvcuvid.dll"; +#else + static LPCSTR nvcuvidDll = "nvcuvid.dll"; +#endif + g_nvcuvid_handle = LoadLibrary(nvcuvidDll); + if (g_nvcuvid_handle == nullptr) { + return false; + } +#else + g_nvcuvid_handle = dlopen("libnvcuvid.so", RTLD_NOW); + if (g_nvcuvid_handle == nullptr) { + g_nvcuvid_handle = dlopen("libnvcuvid.so.1", RTLD_NOW); + } + if (g_nvcuvid_handle == nullptr) { + return false; + } +#endif + + return true; +} + +bool loadNVCUVIDLibrary() { + // Loads NVCUVID library and all required function pointers. + // Returns true on success, false on failure. + std::lock_guard lock(g_nvcuvid_mutex); + + if (isLoaded()) { + return true; + } + + if (!_loadLibrary()) { + return false; + } + + // Load all function pointers. They'll be set to nullptr if not found. + dl_cuvidCreateVideoParser = + bindFunction("cuvidCreateVideoParser"); + dl_cuvidParseVideoData = + bindFunction("cuvidParseVideoData"); + dl_cuvidDestroyVideoParser = + bindFunction("cuvidDestroyVideoParser"); + dl_cuvidGetDecoderCaps = + bindFunction("cuvidGetDecoderCaps"); + dl_cuvidCreateDecoder = + bindFunction("cuvidCreateDecoder"); + dl_cuvidDestroyDecoder = + bindFunction("cuvidDestroyDecoder"); + dl_cuvidDecodePicture = + bindFunction("cuvidDecodePicture"); + dl_cuvidMapVideoFrame = + bindFunction("cuvidMapVideoFrame"); + dl_cuvidUnmapVideoFrame = + bindFunction("cuvidUnmapVideoFrame"); + dl_cuvidMapVideoFrame64 = + bindFunction("cuvidMapVideoFrame64"); + dl_cuvidUnmapVideoFrame64 = + bindFunction("cuvidUnmapVideoFrame64"); + + return isLoaded(); +} + +} // namespace facebook::torchcodec + +extern "C" { + +CUresult CUDAAPI cuvidCreateVideoParser( + CUvideoparser* videoParser, + CUVIDPARSERPARAMS* parserParams) { + TORCH_CHECK( + facebook::torchcodec::dl_cuvidCreateVideoParser, + "cuvidCreateVideoParser called but NVCUVID not loaded!"); + return facebook::torchcodec::dl_cuvidCreateVideoParser( + videoParser, parserParams); +} + +CUresult CUDAAPI cuvidParseVideoData( + CUvideoparser videoParser, + CUVIDSOURCEDATAPACKET* cuvidPacket) { + TORCH_CHECK( + facebook::torchcodec::dl_cuvidParseVideoData, + "cuvidParseVideoData called but NVCUVID not loaded!"); + return facebook::torchcodec::dl_cuvidParseVideoData(videoParser, cuvidPacket); +} + +CUresult CUDAAPI cuvidDestroyVideoParser(CUvideoparser videoParser) { + TORCH_CHECK( + facebook::torchcodec::dl_cuvidDestroyVideoParser, + "cuvidDestroyVideoParser called but NVCUVID not loaded!"); + return facebook::torchcodec::dl_cuvidDestroyVideoParser(videoParser); +} + +CUresult CUDAAPI cuvidGetDecoderCaps(CUVIDDECODECAPS* caps) { + TORCH_CHECK( + facebook::torchcodec::dl_cuvidGetDecoderCaps, + "cuvidGetDecoderCaps called but NVCUVID not loaded!"); + return facebook::torchcodec::dl_cuvidGetDecoderCaps(caps); +} + +CUresult CUDAAPI cuvidCreateDecoder( + CUvideodecoder* decoder, + CUVIDDECODECREATEINFO* decoderParams) { + TORCH_CHECK( + facebook::torchcodec::dl_cuvidCreateDecoder, + "cuvidCreateDecoder called but NVCUVID not loaded!"); + return facebook::torchcodec::dl_cuvidCreateDecoder(decoder, decoderParams); +} + +CUresult CUDAAPI cuvidDestroyDecoder(CUvideodecoder decoder) { + TORCH_CHECK( + facebook::torchcodec::dl_cuvidDestroyDecoder, + "cuvidDestroyDecoder called but NVCUVID not loaded!"); + return facebook::torchcodec::dl_cuvidDestroyDecoder(decoder); +} + +CUresult CUDAAPI +cuvidDecodePicture(CUvideodecoder decoder, CUVIDPICPARAMS* picParams) { + TORCH_CHECK( + facebook::torchcodec::dl_cuvidDecodePicture, + "cuvidDecodePicture called but NVCUVID not loaded!"); + return facebook::torchcodec::dl_cuvidDecodePicture(decoder, picParams); +} + +#if !defined(__CUVID_DEVPTR64) || defined(__CUVID_INTERNAL) +// We need to protect the definition of the 32bit versions under the above +// conditions (see cuviddec.h). Defining them unconditionally would cause +// conflict compilation errors when cuviddec.h redefines those to the 64bit +// versions. +CUresult CUDAAPI cuvidMapVideoFrame( + CUvideodecoder decoder, + int pixIndex, + unsigned int* framePtr, + unsigned int* pitch, + CUVIDPROCPARAMS* procParams) { + TORCH_CHECK( + facebook::torchcodec::dl_cuvidMapVideoFrame, + "cuvidMapVideoFrame called but NVCUVID not loaded!"); + return facebook::torchcodec::dl_cuvidMapVideoFrame( + decoder, pixIndex, framePtr, pitch, procParams); +} + +CUresult CUDAAPI +cuvidUnmapVideoFrame(CUvideodecoder decoder, unsigned int framePtr) { + TORCH_CHECK( + facebook::torchcodec::dl_cuvidUnmapVideoFrame, + "cuvidUnmapVideoFrame called but NVCUVID not loaded!"); + return facebook::torchcodec::dl_cuvidUnmapVideoFrame(decoder, framePtr); +} +#endif + +CUresult CUDAAPI cuvidMapVideoFrame64( + CUvideodecoder decoder, + int pixIndex, + unsigned long long* framePtr, + unsigned int* pitch, + CUVIDPROCPARAMS* procParams) { + TORCH_CHECK( + facebook::torchcodec::dl_cuvidMapVideoFrame64, + "cuvidMapVideoFrame64 called but NVCUVID not loaded!"); + return facebook::torchcodec::dl_cuvidMapVideoFrame64( + decoder, pixIndex, framePtr, pitch, procParams); +} + +CUresult CUDAAPI +cuvidUnmapVideoFrame64(CUvideodecoder decoder, unsigned long long framePtr) { + TORCH_CHECK( + facebook::torchcodec::dl_cuvidUnmapVideoFrame64, + "cuvidUnmapVideoFrame64 called but NVCUVID not loaded!"); + return facebook::torchcodec::dl_cuvidUnmapVideoFrame64(decoder, framePtr); +} + +} // extern "C" + +#endif // FBCODE_CAFFE2 diff --git a/src/torchcodec/_core/NVCUVIDRuntimeLoader.h b/src/torchcodec/_core/NVCUVIDRuntimeLoader.h new file mode 100644 index 000000000..e6ee40a05 --- /dev/null +++ b/src/torchcodec/_core/NVCUVIDRuntimeLoader.h @@ -0,0 +1,14 @@ +// Copyright (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +namespace facebook::torchcodec { + +// See note in corresponding cpp file +bool loadNVCUVIDLibrary(); + +} // namespace facebook::torchcodec diff --git a/src/torchcodec/_core/NVDECCache.h b/src/torchcodec/_core/NVDECCache.h index b248ebc68..a0f2fb862 100644 --- a/src/torchcodec/_core/NVDECCache.h +++ b/src/torchcodec/_core/NVDECCache.h @@ -12,6 +12,8 @@ #include #include + +#include "src/torchcodec/_core/NVCUVIDRuntimeLoader.h" #include "src/torchcodec/_core/nvcuvid_include/cuviddec.h" #include "src/torchcodec/_core/nvcuvid_include/nvcuvid.h"