Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for memory video file in FramesDecoder #4184

Merged
merged 6 commits into from
Aug 24, 2022
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions dali/operators/reader/loader/video/frames_decoder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,15 +19,64 @@


namespace dali {
int MemoryVideoFile::Read(unsigned char *buffer, int buffer_size) {
int left_in_file = size_ - position_;
if (left_in_file == 0) {
return AVERROR_EOF;
}

int to_read = std::min(left_in_file, buffer_size);
std::copy(data_ + position_, data_ + position_ + to_read, buffer);
position_ += to_read;
return to_read;
}

/**
* @brief Method for seeking the memory video. It sets position according to provided arguments.
*
* @param new_position Requested new_position.
* @param mode Chosen method of seeking. This argument changes how new_position is interpreted and how seeking is performed.
* @return int64_t actual new position in the file.
*/
int64_t MemoryVideoFile::Seek(int64_t new_position, int mode) {
switch (mode) {
case SEEK_SET:
position_ = new_position;
break;
case AVSEEK_SIZE:
return size_;

default:
DALI_FAIL(
make_string(
"Unsupported seeking method in FramesDecoder from memory file. Seeking method: ",
mode));
}

return position_;
}

namespace detail {
std::string av_error_string(int ret) {
static char msg[AV_ERROR_MAX_STRING_SIZE];
memset(msg, 0, sizeof(msg));
return std::string(av_make_error_string(msg, AV_ERROR_MAX_STRING_SIZE, ret));
}

int read_memory_video_file(void *data_ptr, uint8_t *av_io_buffer, int av_io_buffer_size) {
MemoryVideoFile *memory_video_file = static_cast<MemoryVideoFile *>(data_ptr);

return memory_video_file->Read(av_io_buffer, av_io_buffer_size);
}

int64_t seek_memory_video_file(void *data_ptr, int64_t new_position, int origin) {
MemoryVideoFile *memory_video_file = static_cast<MemoryVideoFile *>(data_ptr);

return memory_video_file->Seek(new_position, origin);
}

} // namespace detail

using AVPacketScope = std::unique_ptr<AVPacket, decltype(&av_packet_unref)>;

const std::vector<AVCodecID> FramesDecoder::SupportedCodecs = {
Expand Down Expand Up @@ -106,6 +155,45 @@ FramesDecoder::FramesDecoder(const std::string &filename)
DetectVfr();
}



FramesDecoder::FramesDecoder(const char *memory_file, int memory_file_size)
: av_state_(std::make_unique<AvState>()),
memory_video_file_(MemoryVideoFile(memory_file, memory_file_size)) {
av_log_set_level(AV_LOG_ERROR);

av_state_->ctx_ = avformat_alloc_context();
DALI_ENFORCE(av_state_->ctx_, "Could not alloc avformat context");

uint8_t *av_io_buffer = static_cast<uint8_t *>(av_malloc(default_av_buffer_size));

AVIOContext *av_io_context = avio_alloc_context(
av_io_buffer,
default_av_buffer_size,
0,
&memory_video_file_.value(),
detail::read_memory_video_file,
nullptr,
detail::seek_memory_video_file);

av_state_->ctx_->pb = av_io_context;

int ret = avformat_open_input(&av_state_->ctx_, "", nullptr, nullptr);
DALI_ENFORCE(ret == 0, make_string("Failed to open video file from memory due to ",
detail::av_error_string(ret)));

FindVideoStream();
DALI_ENFORCE(
CheckCodecSupport(),
make_string(
"Unsupported video codec: ",
av_state_->codec_->name,
". Supported codecs: h264, HEVC."));
InitAvState();
BuildIndex();
DetectVfr();
}

void FramesDecoder::BuildIndex() {
// TODO(awolant): Optimize this function for:
// - CFR
Expand Down
38 changes: 37 additions & 1 deletion dali/operators/reader/loader/video/frames_decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ extern "C" {
#include <vector>
#include <string>
#include <memory>
#include <optional>

#include "dali/core/common.h"

Expand Down Expand Up @@ -70,6 +71,23 @@ struct AvState {
}
};

/**
* @brief Helper representing video file kept in memory. Allows reading and seeking.
*
*/
struct MemoryVideoFile {
MemoryVideoFile(const char *data, int64_t size)
: data_(data), size_(size), position_(0) {}

int Read(unsigned char *buffer, int buffer_size);

int64_t Seek(int64_t new_position, int origin);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any short docu explaining what is the second argument?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done.


const char *data_;
const int64_t size_;
int64_t position_;
};

/**
* @brief Object representing a video file. Allows access to frames and seeking.
*
Expand All @@ -85,6 +103,18 @@ class DLL_PUBLIC FramesDecoder {
*/
explicit FramesDecoder(const std::string &filename);


/**
* @brief Construct a new FramesDecoder object.
*
* @param memory_file Pointer to memory with video file data.
* @param memory_file_size Size of memory_file in bytes.
*
* @note This constructor assumes that the `memory_file` and
* `memory_file_size` arguments cover the entire video file, including the header.
*/
FramesDecoder(const char *memory_file, int memory_file_size);

/**
* @brief Number of frames in the video
*
Expand Down Expand Up @@ -215,9 +245,15 @@ class DLL_PUBLIC FramesDecoder {

int channels_ = 3;
bool flush_state_ = false;
std::string filename_;
bool is_vfr_ = false;

std::string filename_ = "";
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about making this string also optional? The rationale would be that when we have a video from memory it would not have a filename. Unless of course the existing implementation assumes, that the filename need to always be there, in that case I wouldn't bother with such "enhancement".

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. After the constructor we use it only to report errors to let user know which file caused the error. I refactored this code a bit, so now we have a function that returns name of the file or "memory file" instead.

std::optional<MemoryVideoFile> memory_video_file_ = {};

// Default size of the buffer used to load video files from memory to FFMPEG
const int default_av_buffer_size = (1 << 15);
};

} // namespace dali

#endif // DALI_OPERATORS_READER_LOADER_VIDEO_FRAMES_DECODER_H_
103 changes: 57 additions & 46 deletions dali/operators/reader/loader/video/frames_decoder_gpu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,56 +77,67 @@ cudaVideoCodec FramesDecoderGpu::GetCodecType() {
return cudaVideoCodec_H264;
}

void FramesDecoderGpu::InitGpuDecoder() {
nvdecode_state_ = std::make_unique<NvDecodeState>();

InitBitStreamFilter();

filtered_packet_ = av_packet_alloc();
DALI_ENFORCE(filtered_packet_, "Could not allocate av packet");

auto codec_type = GetCodecType();

// Create nv decoder
CUVIDDECODECREATEINFO decoder_info;
memset(&decoder_info, 0, sizeof(CUVIDDECODECREATEINFO));

decoder_info.bitDepthMinus8 = 0;
decoder_info.ChromaFormat = cudaVideoChromaFormat_420;
decoder_info.CodecType = codec_type;
decoder_info.ulHeight = Height();
decoder_info.ulWidth = Width();
decoder_info.ulMaxHeight = Height();
decoder_info.ulMaxWidth = Width();
decoder_info.ulTargetHeight = Height();
decoder_info.ulTargetWidth = Width();
decoder_info.ulNumDecodeSurfaces = num_decode_surfaces_;
decoder_info.ulNumOutputSurfaces = 2;

CUDA_CALL(cuvidCreateDecoder(&nvdecode_state_->decoder, &decoder_info));

// Create nv parser
CUVIDPARSERPARAMS parser_info;
memset(&parser_info, 0, sizeof(CUVIDPARSERPARAMS));
parser_info.CodecType = codec_type;
parser_info.ulMaxNumDecodeSurfaces = num_decode_surfaces_;
parser_info.ulMaxDisplayDelay = 0;
parser_info.pUserData = this;
parser_info.pfnSequenceCallback = detail::process_video_sequence;
parser_info.pfnDecodePicture = detail::process_picture_decode;
parser_info.pfnDisplayPicture = nullptr;

CUDA_CALL(cuvidCreateVideoParser(&nvdecode_state_->parser, &parser_info));

// Init internal frame buffer
// TODO(awolant): Check, if continuous buffer would be faster
for (size_t i = 0; i < frame_buffer_.size(); ++i) {
frame_buffer_[i].frame_.resize(FrameSize());
frame_buffer_[i].pts_ = -1;
}
}

Comment on lines +80 to +128
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added new constructor in this PR. This is a part that is common for both constructors, so I extracted it to a separate method.

FramesDecoderGpu::FramesDecoderGpu(const std::string &filename, cudaStream_t stream) :
FramesDecoder(filename),
frame_buffer_(num_decode_surfaces_),
stream_(stream) {
nvdecode_state_ = std::make_unique<NvDecodeState>();

InitBitStreamFilter();

filtered_packet_ = av_packet_alloc();
DALI_ENFORCE(filtered_packet_, "Could not allocate av packet");

auto codec_type = GetCodecType();

// Create nv decoder
CUVIDDECODECREATEINFO decoder_info;
memset(&decoder_info, 0, sizeof(CUVIDDECODECREATEINFO));

decoder_info.bitDepthMinus8 = 0;
decoder_info.ChromaFormat = cudaVideoChromaFormat_420;
decoder_info.CodecType = codec_type;
decoder_info.ulHeight = Height();
decoder_info.ulWidth = Width();
decoder_info.ulMaxHeight = Height();
decoder_info.ulMaxWidth = Width();
decoder_info.ulTargetHeight = Height();
decoder_info.ulTargetWidth = Width();
decoder_info.ulNumDecodeSurfaces = num_decode_surfaces_;
decoder_info.ulNumOutputSurfaces = 2;

CUDA_CALL(cuvidCreateDecoder(&nvdecode_state_->decoder, &decoder_info));

// Create nv parser
CUVIDPARSERPARAMS parser_info;
memset(&parser_info, 0, sizeof(CUVIDPARSERPARAMS));
parser_info.CodecType = codec_type;
parser_info.ulMaxNumDecodeSurfaces = num_decode_surfaces_;
parser_info.ulMaxDisplayDelay = 0;
parser_info.pUserData = this;
parser_info.pfnSequenceCallback = detail::process_video_sequence;
parser_info.pfnDecodePicture = detail::process_picture_decode;
parser_info.pfnDisplayPicture = nullptr;

CUDA_CALL(cuvidCreateVideoParser(&nvdecode_state_->parser, &parser_info));

// Init internal frame buffer
// TODO(awolant): Check, if continuous buffer would be faster
for (size_t i = 0; i < frame_buffer_.size(); ++i) {
frame_buffer_[i].frame_.resize(FrameSize());
frame_buffer_[i].pts_ = -1;
}
InitGpuDecoder();
}

FramesDecoderGpu::FramesDecoderGpu(const char *memory_file, int memory_file_size, cudaStream_t stream) :
FramesDecoder(memory_file, memory_file_size),
frame_buffer_(num_decode_surfaces_),
stream_(stream) {
InitGpuDecoder();
}

int FramesDecoderGpu::ProcessPictureDecode(void *user_data, CUVIDPICPARAMS *picture_params) {
Expand Down
13 changes: 13 additions & 0 deletions dali/operators/reader/loader/video/frames_decoder_gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,17 @@ class DLL_PUBLIC FramesDecoderGpu : public FramesDecoder {
*/
explicit FramesDecoderGpu(const std::string &filename, cudaStream_t stream = 0);

/**
* @brief Construct a new FramesDecoder object.
*
* @param memory_file Pointer to memory with video file data.
* @param memory_file_size Size of memory_file in bytes.
*
* @note This constructor assumes that the `memory_file` and
* `memory_file_size` arguments cover the entire video file, including the header.
*/
FramesDecoderGpu(const char *memory_file, int memory_file_size, cudaStream_t stream = 0);

bool ReadNextFrame(uint8_t *data, bool copy_to_output = true) override;

void SeekFrame(int frame_id) override;
Expand Down Expand Up @@ -100,6 +111,8 @@ class DLL_PUBLIC FramesDecoderGpu : public FramesDecoder {
void InitBitStreamFilter();

cudaVideoCodec GetCodecType();

void InitGpuDecoder();
};

} // namespace dali
Expand Down
42 changes: 42 additions & 0 deletions dali/operators/reader/loader/video/frames_decoder_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -236,4 +236,46 @@ TEST_F(FramesDecoderGpuTest, VariableFrameRateHevc) {
RunTest(decoder, vfr_hevc_videos_[1]);
}

TEST_F(FramesDecoderTest_CpuOnlyTests, InMemoryCfrVideo) {
auto memory_video = MemoryVideo(cfr_videos_paths_[1]);

FramesDecoder decoder(memory_video.data(), memory_video.size());
RunTest(decoder, cfr_videos_[1]);
}

TEST_F(FramesDecoderGpuTest, InMemoryCfrVideo) {
auto memory_video = MemoryVideo(cfr_videos_paths_[0]);

FramesDecoderGpu decoder(memory_video.data(), memory_video.size());
RunTest(decoder, cfr_videos_[0]);
}

TEST_F(FramesDecoderTest_CpuOnlyTests, InMemoryVfrVideo) {
auto memory_video = MemoryVideo(vfr_videos_paths_[1]);

FramesDecoder decoder(memory_video.data(), memory_video.size());
RunTest(decoder, vfr_videos_[1]);
}

TEST_F(FramesDecoderGpuTest, InMemoryVfrVideo) {
auto memory_video = MemoryVideo(vfr_videos_paths_[0]);

FramesDecoderGpu decoder(memory_video.data(), memory_video.size());
RunTest(decoder, vfr_videos_[0]);
}

TEST_F(FramesDecoderTest_CpuOnlyTests, InMemoryVfrHevcVideo) {
auto memory_video = MemoryVideo(vfr_videos_paths_[0]);

FramesDecoder decoder(memory_video.data(), memory_video.size());
RunTest(decoder, vfr_videos_[0]);
}

TEST_F(FramesDecoderGpuTest, InMemoryVfrVfrHevcVideo) {
auto memory_video = MemoryVideo(vfr_hevc_videos_paths_[1]);

FramesDecoderGpu decoder(memory_video.data(), memory_video.size());
RunTest(decoder, vfr_hevc_videos_[1]);
}

} // namespace dali
14 changes: 14 additions & 0 deletions dali/operators/reader/loader/video/video_test_base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -155,4 +155,18 @@ void VideoTestBase::RunFailureTest(std::function<void()> body, std::string expec
}
}

std::vector<char> VideoTestBase::MemoryVideo(const std::string &path) const {
std::ifstream video_file(path, std::ios::binary | std::ios::ate);
auto size = video_file.tellg();
video_file.seekg(0, std::ios::beg);

std::vector<char> memory_video(size);
if (!video_file.read(memory_video.data(), size)) {
// We can't use FAIL() because this function returns value
throw ::testing::AssertionFailure() << "Could not load video file to memory.";
}

return memory_video;
}

} // namespace dali
2 changes: 2 additions & 0 deletions dali/operators/reader/loader/video/video_test_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ class VideoTestBase : public ::testing::Test {
return std::max(cfr_videos_[0].FrameSize(), cfr_videos_[1].FrameSize());
}

std::vector<char> MemoryVideo(const std::string &path) const;

/**
* @brief Utility to save decoded frame as a PNG file.
* Frame is saved to the folder given as an argument.
Expand Down