Skip to content

Commit

Permalink
Support NV12 format in video decoding (#2330)
Browse files Browse the repository at this point in the history
Summary:
Support NV12 format in Streamer API.

NV12 is a biplanar format with a full sized Y plane followed by a single chroma plane with weaved U and V values.
https://chromium.googlesource.com/libyuv/libyuv/+/HEAD/docs/formats.md#nv12-and-nv21

The original UV plane is smaller than Y plane, so in this implmentation,
UV plane is upsampled to match the size of Y plane.

Pull Request resolved: #2330

Reviewed By: hwangjeff

Differential Revision: D35632351

Pulled By: mthrok

fbshipit-source-id: aab4fbc0ce2bb7a1fb67264c27208b610fb56e27
  • Loading branch information
mthrok authored and facebook-github-bot committed Apr 14, 2022
1 parent 2f70e2f commit 7972be9
Showing 1 changed file with 119 additions and 1 deletion.
120 changes: 119 additions & 1 deletion torchaudio/csrc/ffmpeg/buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@
#include <stdexcept>
#include <vector>

#ifdef USE_CUDA
#include <c10/cuda/CUDAStream.h>
#endif

namespace torchaudio {
namespace ffmpeg {

Expand Down Expand Up @@ -213,6 +217,94 @@ torch::Tensor convert_yuv420p(AVFrame* pFrame) {
return t.permute({0, 3, 1, 2}); // NCHW
}

torch::Tensor convert_nv12_cpu(AVFrame* pFrame) {
int width = pFrame->width;
int height = pFrame->height;

auto options = torch::TensorOptions()
.dtype(torch::kUInt8)
.layout(torch::kStrided)
.device(torch::kCPU);

torch::Tensor y = torch::empty({1, height, width, 1}, options);
{
uint8_t* tgt = y.data_ptr<uint8_t>();
uint8_t* src = pFrame->data[0];
int linesize = pFrame->linesize[0];
for (int h = 0; h < height; ++h) {
memcpy(tgt, src, width);
tgt += width;
src += linesize;
}
}
torch::Tensor uv = torch::empty({1, height / 2, width / 2, 2}, options);
{
uint8_t* tgt = uv.data_ptr<uint8_t>();
uint8_t* src = pFrame->data[1];
int linesize = pFrame->linesize[1];
for (int h = 0; h < height / 2; ++h) {
memcpy(tgt, src, width);
tgt += width;
src += linesize;
}
}
// Upsample width and height
uv = uv.repeat_interleave(2, -2).repeat_interleave(2, -3);
torch::Tensor t = torch::cat({y, uv}, -1);
return t.permute({0, 3, 1, 2}); // NCHW
}

#ifdef USE_CUDA
torch::Tensor convert_nv12_cuda(AVFrame* pFrame) {
int width = pFrame->width;
int height = pFrame->height;

auto options = torch::TensorOptions()
.dtype(torch::kUInt8)
.layout(torch::kStrided)
.device(torch::kCUDA);

torch::Tensor y = torch::empty({1, height, width, 1}, options);
{
uint8_t* tgt = y.data_ptr<uint8_t>();
CUdeviceptr src = (CUdeviceptr)pFrame->data[0];
int linesize = pFrame->linesize[0];
if (cudaSuccess !=
cudaMemcpy2D(
(void*)tgt,
width,
(const void*)src,
linesize,
width,
height,
cudaMemcpyDeviceToDevice)) {
throw std::runtime_error("Failed to copy Y plane to Cuda tensor.");
}
}
torch::Tensor uv = torch::empty({1, height / 2, width / 2, 2}, options);
{
uint8_t* tgt = uv.data_ptr<uint8_t>();
CUdeviceptr src = (CUdeviceptr)pFrame->data[1];
int linesize = pFrame->linesize[1];
if (cudaSuccess !=
cudaMemcpy2D(
(void*)tgt,
width,
(const void*)src,
linesize,
width,
height / 2,
cudaMemcpyDeviceToDevice)) {
throw std::runtime_error("Failed to copy UV plane to Cuda tensor.");
}
}
// Upsample width and height
uv = uv.repeat_interleave(2, -2).repeat_interleave(2, -3);
torch::Tensor t = torch::cat({y, uv}, -1);
return t.permute({0, 3, 1, 2}); // NCHW
}
#endif

torch::Tensor convert_image_tensor(AVFrame* pFrame) {
// ref:
// https://ffmpeg.org/doxygen/4.1/filtering__video_8c_source.html#l00179
Expand Down Expand Up @@ -240,9 +332,35 @@ torch::Tensor convert_image_tensor(AVFrame* pFrame) {
break;
case AV_PIX_FMT_YUV420P:
return convert_yuv420p(pFrame);
case AV_PIX_FMT_NV12:
return convert_nv12_cpu(pFrame);
#ifdef USE_CUDA
case AV_PIX_FMT_CUDA: {
AVHWFramesContext* hwctx =
(AVHWFramesContext*)pFrame->hw_frames_ctx->data;
AVPixelFormat sw_format = hwctx->sw_format;
// cuvid decoder (nvdec frontend of ffmpeg) only supports the following
// output formats
// https://github.com/FFmpeg/FFmpeg/blob/072101bd52f7f092ee976f4e6e41c19812ad32fd/libavcodec/cuviddec.c#L1121-L1124
switch (sw_format) {
case AV_PIX_FMT_NV12:
return convert_nv12_cuda(pFrame);
case AV_PIX_FMT_P010:
case AV_PIX_FMT_P016:
throw std::runtime_error(
"Unsupported video format found in CUDA HW: " +
std::string(av_get_pix_fmt_name(sw_format)));
default:
throw std::runtime_error(
"Unexpected video format found in CUDA HW: " +
std::string(av_get_pix_fmt_name(sw_format)));
}
}
#endif
default:
throw std::runtime_error(
"Unexpected format: " + std::string(av_get_pix_fmt_name(format)));
"Unexpected video format: " +
std::string(av_get_pix_fmt_name(format)));
}

torch::Tensor t;
Expand Down

0 comments on commit 7972be9

Please sign in to comment.