Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Video decoder in plugin #5477

Merged
merged 53 commits into from
Jun 17, 2024
Merged
Show file tree
Hide file tree
Changes from 49 commits
Commits
Show all changes
53 commits
Select commit Hold shift + click to select a range
e2ecee6
Add video plugin
jantonguirao Dec 18, 2023
582eaa6
Use VideoCodecSDK from NGC link
jantonguirao Feb 8, 2024
adf1843
Rearrange video plugin operators to use VideoCodecSDK dlopened cuvid …
jantonguirao Feb 19, 2024
739ce7b
Add dynlink_nvcuvid
jantonguirao Mar 4, 2024
7683718
New decoder draft
jantonguirao Mar 1, 2024
3d643d8
Remove legacy ops
jantonguirao Mar 4, 2024
7249161
Fix
jantonguirao Mar 4, 2024
b8bfd24
Remove ffmpeg find code
jantonguirao Mar 4, 2024
e350ece
Remove CUDA from the name of the plugin
jantonguirao Mar 5, 2024
2eae25a
Fix plugin installation
jantonguirao Mar 6, 2024
76575b8
Rearrange plugin
jantonguirao Mar 7, 2024
604553d
Remove placeholder __init__ file
jantonguirao Mar 7, 2024
0997110
Deliver python stubs
jantonguirao Mar 7, 2024
cb4476d
Plugin autodiscovery
jantonguirao Mar 8, 2024
6c50533
Fix ffmpeg build
jantonguirao Mar 11, 2024
258afe7
Plugin preload
jantonguirao Mar 12, 2024
14fd16a
BUILD_FFMPEG option
jantonguirao Mar 13, 2024
4b198e2
Use ENV variables for source tarballs
jantonguirao Mar 13, 2024
6447dd1
Fix compile flags
jantonguirao Mar 15, 2024
67487ff
Code review fixes
jantonguirao Mar 15, 2024
ef82efe
more review fixes
jantonguirao Mar 15, 2024
01da4ba
Update docker
jantonguirao Mar 25, 2024
3975798
Lint
jantonguirao Mar 25, 2024
f2d96da
Cleanup
jantonguirao Apr 29, 2024
8950adf
add qa test about video plugin installation
jantonguirao Apr 29, 2024
472660a
Lint fix
jantonguirao Apr 29, 2024
4f481d9
copy all sdist tar.gz artifacts to runner docker
jantonguirao Apr 30, 2024
cd8375b
Add pip dependencies to video_plugin test
jantonguirao Apr 30, 2024
a3ed3b0
Fix ffmpeg dependency
jantonguirao Apr 30, 2024
c9194c9
Use PyNvVideoCodec 1.0.2
jantonguirao May 2, 2024
249f13a
Undo unnecessary deps
jantonguirao May 2, 2024
ed8cef4
Document plugin, rename to plugin.video.decoder
jantonguirao May 3, 2024
018b747
Remove prints
jantonguirao May 6, 2024
9892cf6
Remove Python 3.13 from setup.py
jantonguirao May 7, 2024
da2e777
Preload libcuda stub
jantonguirao May 7, 2024
4aa9a90
Undo doc change from this PR
jantonguirao May 10, 2024
502f11b
Add linting
jantonguirao May 20, 2024
dea2133
Basic test working
awolant May 20, 2024
f89c04b
Merge remote-tracking branch 'nvidia/main' into video_decoder_plugin
awolant May 21, 2024
8bbff16
Invoke tests
awolant May 21, 2024
c69c5a6
Add end_frame
awolant May 21, 2024
44aba05
Fix linter
awolant May 21, 2024
61f75e4
Fix linter
awolant May 21, 2024
63027b3
Remove unused stuff
awolant May 22, 2024
fcb0eda
Refactor tests
awolant May 22, 2024
73e86a9
Update dates
awolant Jun 6, 2024
ea63a70
Address review comments
awolant Jun 6, 2024
f6e04a6
Merge remote-tracking branch 'nvidia/main' into video_decoder_plugin
awolant Jun 6, 2024
99a6865
Remove unused code
awolant Jun 6, 2024
af7c207
Fix tests
awolant Jun 6, 2024
dffd5b7
Fix error
awolant Jun 6, 2024
2264892
Sync
awolant Jun 10, 2024
ccb0ee6
Remove broken tests
awolant Jun 17, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 106 additions & 0 deletions dali/test/python/test_dali_video_plugin_decoder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
# Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


from nvidia.dali import pipeline_def
import nvidia.dali.types as types
import nvidia.dali.fn as fn

import cv2
import numpy as np

import nvidia.dali.fn.plugin.video as video
stiepan marked this conversation as resolved.
Show resolved Hide resolved

from test_utils import get_dali_extra_path


@pipeline_def(device_id=0, num_threads=4, batch_size=1)
def video_decoder_pipeline(data_path):
data = fn.external_source(
source=lambda: [
np.fromfile(data_path, np.uint8),
],
dtype=types.UINT8,
)
return video.decoder(data, device="mixed", end_frame=50)


def run_video_decoding_test(test_file_path, frame_list_file_path, frames_directory_path):
pipeline = video_decoder_pipeline(test_file_path)
pipeline.build()

(output,) = pipeline.run()
frames = output.as_cpu().as_array()

with open(frame_list_file_path) as f:
frame_list = f.read().splitlines()

for i, frame in enumerate(frames[0]):
# Check if the frame is equal to the ground truth frame.
# Due to differences in how the decoding is implemented in
# different video codecs, we can't guarantee that the frames
# will be exactly the same. Main purpose of this test is to
# check if the decoding is working and we hit the correct frames.
ground_truth = cv2.imread(f"{frames_directory_path}/{frame_list[i]}")
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
if not np.average(frame - ground_truth) < 15:
assert False, f"Frame {i} is not equal"


def test_cfr_h264_mp4_decoding():
run_video_decoding_test(
get_dali_extra_path() + "/db/video/cfr/test_1.mp4",
f"{get_dali_extra_path()}/db/video/cfr/frames_1/frames_list.txt",
f"{get_dali_extra_path()}/db/video/cfr/frames_1",
)


def test_cfr_h264_raw_decoding():
run_video_decoding_test(
get_dali_extra_path() + "/db/video/cfr/test_1.h264",
f"{get_dali_extra_path()}/db/video/cfr/frames_1/frames_list.txt",
f"{get_dali_extra_path()}/db/video/cfr/frames_1",
)


def test_cfr_h265_mp4_decoding():
run_video_decoding_test(
get_dali_extra_path() + "/db/video/cfr/test_1_hevc.mp4",
f"{get_dali_extra_path()}/db/video/cfr/frames_1/frames_list.txt",
f"{get_dali_extra_path()}/db/video/cfr/frames_1",
)


def test_cfr_h265_raw_decoding():
run_video_decoding_test(
get_dali_extra_path() + "/db/video/cfr/test_1.h265",
f"{get_dali_extra_path()}/db/video/cfr/frames_1/frames_list.txt",
f"{get_dali_extra_path()}/db/video/cfr/frames_1",
)


def test_vfr_h264_mp4_decoding():
run_video_decoding_test(
get_dali_extra_path() + "/db/video/vfr/test_1.mp4",
f"{get_dali_extra_path()}/db/video/vfr/frames_1/frames_list.txt",
f"{get_dali_extra_path()}/db/video/vfr/frames_1",
)


def test_vfr_hevc_mp4_decoding():
run_video_decoding_test(
get_dali_extra_path() + "/db/video/vfr/test_1_hevc.mp4",
f"{get_dali_extra_path()}/db/video/vfr/frames_1_hevc/frames_list.txt",
f"{get_dali_extra_path()}/db/video/vfr/frames_1_hevc",
)
1 change: 1 addition & 0 deletions plugins/video/pkg_src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ add_subdirectory("${pynvvideocodec_SOURCE_DIR}/src/VideoCodecSDKUtils/")

set(VIDEO_PLUGIN_SOURCES
src/decoder/video_decoder_mixed.cc
src/decoder/color_space.cu
)

add_library(dali_${PLUGIN_NAME} SHARED ${VIDEO_PLUGIN_SOURCES})
Expand Down
86 changes: 86 additions & 0 deletions plugins/video/pkg_src/src/decoder/color_space.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
// Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "decoder/color_space.h"

#include <cuda_runtime.h>
#include "dali/kernels/imgproc/sampler.h"
#include "dali/kernels/imgproc/color_manipulation/color_space_conversion_impl.h"

namespace dali {

template <bool full_range>
__global__ static void yuv_to_rgb_kernel(
const uint8_t *yuv, int yuv_pitch, uint8_t *rgb, int rgb_pitch, int width, int height) {
int halfx = (threadIdx.x + blockIdx.x * blockDim.x);
int halfy = (threadIdx.y + blockIdx.y * blockDim.y);
int x = 2 * halfx;
int y = 2 * halfy;
if (x >= width || y >= height) {
return;
}

kernels::Surface2D<const uint8_t> Y_surf, UV_surf;
kernels::Surface2D<uint8_t> RGB;
const uint8_t *chroma = yuv + height * yuv_pitch;

Y_surf = { yuv, width, height, 1, 1, yuv_pitch, 1 };
UV_surf = { chroma, width / 2, height / 2, 2, 2, yuv_pitch, 1 };

RGB = { rgb, width, height, 3, 3, rgb_pitch, 1 };

auto Y = kernels::make_sampler<DALI_INTERP_NN>(Y_surf);
auto UV = kernels::make_sampler<DALI_INTERP_LINEAR>(UV_surf);

#pragma unroll
for (int i = 0; i < 2; i++) {
float cy = halfy + i * 0.5f + 0.25f;
#pragma unroll
for (int j = 0; j < 2; j++) {
float cx = halfx + j * 0.5f + 0.25f;
u8vec3 yuv_val;
yuv_val[0] = Y.at(ivec2{x + j, y + i}, 0, kernels::BorderClamp());

UV(&yuv_val[1], vec2(cx, cy), kernels::BorderClamp());

u8vec3 rgb_val;
if (full_range)
rgb_val = dali::kernels::color::jpeg::ycbcr_to_rgb<uint8_t>(yuv_val);
else
rgb_val = dali::kernels::color::itu_r_bt_601::ycbcr_to_rgb<uint8_t>(yuv_val);

RGB({x + j, y + i, 0}) = rgb_val.x;
RGB({x + j, y + i, 1}) = rgb_val.y;
RGB({x + j, y + i, 2}) = rgb_val.z;
}
}
}

} // namespace dali

void yuv_to_rgb(uint8_t *yuv, int yuv_pitch, uint8_t *rgb, int rgb_pitch, int width, int height,
bool full_range, cudaStream_t stream) {
auto grid_layout = dim3((width + 63) / 32 / 2, (height + 3));
auto block_layout = dim3(32, 2);

if (full_range) {
dali::yuv_to_rgb_kernel<true>
<<<grid_layout, block_layout, 0, stream>>>
(yuv, yuv_pitch, rgb, rgb_pitch, width, height);
} else {
dali::yuv_to_rgb_kernel<false>
<<<grid_layout, block_layout, 0, stream>>>
(yuv, yuv_pitch, rgb, rgb_pitch, width, height);
}
}
30 changes: 30 additions & 0 deletions plugins/video/pkg_src/src/decoder/color_space.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef PLUGINS_VIDEO_PKG_SRC_SRC_DECODER_COLOR_SPACE_H_
#define PLUGINS_VIDEO_PKG_SRC_SRC_DECODER_COLOR_SPACE_H_

#include <stdint.h>

void yuv_to_rgb(
uint8_t *yuv,
int yuv_pitch,
uint8_t *rgb,
int rgb_pitch,
int width,
int height,
bool full_range,
cudaStream_t stream);

#endif // PLUGINS_VIDEO_PKG_SRC_SRC_DECODER_COLOR_SPACE_H_
88 changes: 51 additions & 37 deletions plugins/video/pkg_src/src/decoder/video_decoder_mixed.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand All @@ -15,12 +15,14 @@
#include "decoder/video_decoder_mixed.h"
#include "dali/core/tensor_shape.h"

#include "decoder/color_space.h"

namespace dali_video {

class MemoryVideoFile : public FFmpegDemuxer::DataProvider {
public:
MemoryVideoFile(const void *data, int64_t size)
: data_(static_cast<const uint8_t*>(data)), size_(size), position_(0) {}
: data_(static_cast<const uint8_t *>(data)), size_(size), position_(0) {}


int GetData(uint8_t *buffer, int buffer_size) override {
Expand All @@ -41,8 +43,8 @@ class MemoryVideoFile : public FFmpegDemuxer::DataProvider {
int64_t position_;
};

bool VideoDecoderMixed::SetupImpl(
std::vector<dali::OutputDesc> &output_desc, const dali::Workspace &ws) {
bool VideoDecoderMixed::SetupImpl(std::vector<dali::OutputDesc> &output_desc,
const dali::Workspace &ws) {
ValidateInput(ws);
const auto &input = ws.Input<dali::CPUBackend>(0);
int batch_size = input.num_samples();
Expand All @@ -51,13 +53,13 @@ bool VideoDecoderMixed::SetupImpl(
samples_.resize(batch_size);
dali::TensorListShape<> sh(batch_size, 4);
for (int i = 0; i < batch_size; i++) {
auto& sample = samples_[i];
auto &sample = samples_[i];
sample.data_provider_ =
std::make_unique<MemoryVideoFile>(input.raw_tensor(i), input[i].shape().num_elements());
sample.demuxer_ = std::make_unique<FFmpegDemuxer>(sample.data_provider_.get());
sample.current_packet_ = std::make_unique<PacketData>();
sh.set_tensor_shape(
i, dali::TensorShape<>(10, sample.demuxer_->GetHeight(), sample.demuxer_->GetWidth(), 3));
sh.set_tensor_shape(i, dali::TensorShape<>(end_frame_, sample.demuxer_->GetHeight(),
sample.demuxer_->GetWidth(), 3));
}
output_desc.resize(1);
output_desc[0].shape = sh;
Expand All @@ -69,47 +71,58 @@ void VideoDecoderMixed::Run(dali::Workspace &ws) {
auto &output = ws.Output<dali::GPUBackend>(0);
const auto &input = ws.Input<dali::CPUBackend>(0);
int batch_size = input.num_samples();
int s = 0;

cuInit(0);
int nGpu = 0;
cuDeviceGetCount(&nGpu);

bool m_bDestroyContext = false;
CUcontext cuContext = nullptr;
CUstream cuStream = nullptr;

for (int i = 0; i < batch_size; i++) {
cuCtxGetCurrent(&cuContext);
if (!cuContext) {
createCudaContext(&cuContext, device_id_, 0);
m_bDestroyContext = true;
}
cuCtxPopCurrent(&cuContext);
}
CUstream cuStream = ws.stream();
cuCtxGetCurrent(&cuContext);

if (!cuContext)
if (!cuContext) {
throw std::runtime_error("Failed to create a cuda context");

cuCtxPushCurrent(cuContext);
cuStreamCreate(&cuStream, 0);
}

for (int i = 0; i < batch_size; i++) {
auto output_sample = output[i];
uint8_t *output_data = output_sample.template mutable_data<uint8_t>();

auto &sample = samples_[i];
sample.decoder_ = std::make_unique<NvDecoder>(
cuStream, cuContext, true, FFmpeg2NvCodecId(sample.demuxer_->GetVideoCodec()), false,
false /*_enableasyncallocations*/, false);


uint8_t *pVideo = NULL;
int nVideoBytes = 0;
while (sample.demuxer_->Demux(&pVideo, &nVideoBytes)) {
if (nVideoBytes) {
auto vecTupFrame = sample.decoder_->Decode(pVideo, nVideoBytes);
uint8_t *pVideo = NULL, *pFrame = nullptr;
int nVideoBytes = 0, nFrameReturned = 0, nFrame = 0;

int num_frames = 0;

do {
sample.demuxer_->Demux(&pVideo, &nVideoBytes);
nFrameReturned = sample.decoder_->Decode(pVideo, nVideoBytes);

for (int i = 0; i < nFrameReturned; i++) {
pFrame = sample.decoder_->GetFrame();

uint8_t *dpFrame = output_data + num_frames * sample.demuxer_->GetHeight() *
sample.demuxer_->GetWidth() * 3;
int nWidth = sample.decoder_->GetWidth();
int nPitch = sample.decoder_->GetWidth();
int iMatrix =
sample.decoder_->GetVideoFormatInfo().video_signal_description.matrix_coefficients;
bool full_range =
sample.decoder_->GetVideoFormatInfo().video_signal_description.video_full_range_flag;


yuv_to_rgb(pFrame, nPitch, reinterpret_cast<uint8_t *>(dpFrame),
sample.decoder_->GetWidth() * 3, sample.decoder_->GetWidth(),
sample.decoder_->GetHeight(), full_range, cuStream);

++num_frames;
if (end_frame_ > 0 && num_frames >= end_frame_) {
break;
}
}
}
} while (nVideoBytes);
}
cuCtxPopCurrent(&cuContext);
}


Expand All @@ -123,12 +136,13 @@ The video streams can be in most of the container file formats. FFmpeg is used t
.NumInput(1)
.NumOutput(1)
.InputDox(0, "buffer", "TensorList", "Data buffer with a loaded video file.")
.AddOptionalArg("end_frame", R"code(Index of the end frame to be decoded.)code", 0)
.AddOptionalArg("affine",
R"code(Applies only to the mixed backend type.
R"code(Applies only to the mixed backend type.

If set to True, each thread in the internal thread pool will be tied to a specific CPU core.
Otherwise, the threads can be reassigned to any CPU core by the operating system.)code", true);

Otherwise, the threads can be reassigned to any CPU core by the operating system.)code",
true);


DALI_REGISTER_OPERATOR(plugin__video__Decoder, VideoDecoderMixed, dali::Mixed);
Expand Down
Loading
Loading