Skip to content

Commit 29ba8a2

Browse files
authored
Add support for memory video file in FramesDecoder (#4184)
* Add support for decoding video file form memory in FramesDecoder and FramesDecoderGpu. Signed-off-by: Albert Wolant <awolant@nvidia.com>
1 parent 3417c90 commit 29ba8a2

7 files changed

+265
-54
lines changed

dali/operators/reader/loader/video/frames_decoder.cc

+94-6
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,64 @@
1919

2020

2121
namespace dali {
22+
int MemoryVideoFile::Read(unsigned char *buffer, int buffer_size) {
23+
int left_in_file = size_ - position_;
24+
if (left_in_file == 0) {
25+
return AVERROR_EOF;
26+
}
27+
28+
int to_read = std::min(left_in_file, buffer_size);
29+
std::copy(data_ + position_, data_ + position_ + to_read, buffer);
30+
position_ += to_read;
31+
return to_read;
32+
}
33+
34+
/**
35+
* @brief Method for seeking the memory video. It sets position according to provided arguments.
36+
*
37+
* @param new_position Requested new_position.
38+
* @param mode Chosen method of seeking. This argument changes how new_position is interpreted and how seeking is performed.
39+
* @return int64_t actual new position in the file.
40+
*/
41+
int64_t MemoryVideoFile::Seek(int64_t new_position, int mode) {
42+
switch (mode) {
43+
case SEEK_SET:
44+
position_ = new_position;
45+
break;
46+
case AVSEEK_SIZE:
47+
return size_;
48+
49+
default:
50+
DALI_FAIL(
51+
make_string(
52+
"Unsupported seeking method in FramesDecoder from memory file. Seeking method: ",
53+
mode));
54+
}
55+
56+
return position_;
57+
}
2258

2359
namespace detail {
2460
std::string av_error_string(int ret) {
2561
static char msg[AV_ERROR_MAX_STRING_SIZE];
2662
memset(msg, 0, sizeof(msg));
2763
return std::string(av_make_error_string(msg, AV_ERROR_MAX_STRING_SIZE, ret));
2864
}
65+
66+
int read_memory_video_file(void *data_ptr, uint8_t *av_io_buffer, int av_io_buffer_size) {
67+
MemoryVideoFile *memory_video_file = static_cast<MemoryVideoFile *>(data_ptr);
68+
69+
return memory_video_file->Read(av_io_buffer, av_io_buffer_size);
2970
}
3071

72+
int64_t seek_memory_video_file(void *data_ptr, int64_t new_position, int origin) {
73+
MemoryVideoFile *memory_video_file = static_cast<MemoryVideoFile *>(data_ptr);
74+
75+
return memory_video_file->Seek(new_position, origin);
76+
}
77+
78+
} // namespace detail
79+
3180
using AVPacketScope = std::unique_ptr<AVPacket, decltype(&av_packet_unref)>;
3281

3382
const std::vector<AVCodecID> FramesDecoder::SupportedCodecs = {
@@ -78,7 +127,7 @@ void FramesDecoder::FindVideoStream() {
78127
}
79128
}
80129

81-
DALI_FAIL(make_string("Could not find a valid video stream in a file ", filename_));
130+
DALI_FAIL(make_string("Could not find a valid video stream in a file ", Filename()));
82131
}
83132

84133
FramesDecoder::FramesDecoder(const std::string &filename)
@@ -89,8 +138,8 @@ FramesDecoder::FramesDecoder(const std::string &filename)
89138
av_state_->ctx_ = avformat_alloc_context();
90139
DALI_ENFORCE(av_state_->ctx_, "Could not alloc avformat context");
91140

92-
int ret = avformat_open_input(&av_state_->ctx_, filename.c_str(), nullptr, nullptr);
93-
DALI_ENFORCE(ret == 0, make_string("Failed to open video file at path ", filename, "due to ",
141+
int ret = avformat_open_input(&av_state_->ctx_, Filename().c_str(), nullptr, nullptr);
142+
DALI_ENFORCE(ret == 0, make_string("Failed to open video file ", Filename(), "due to ",
94143
detail::av_error_string(ret)));
95144

96145
FindVideoStream();
@@ -99,13 +148,52 @@ FramesDecoder::FramesDecoder(const std::string &filename)
99148
make_string(
100149
"Unsupported video codec: ",
101150
av_state_->codec_->name,
102-
" in file: ", filename,
151+
" in file: ", Filename(),
103152
" Supported codecs: h264, HEVC."));
104153
InitAvState();
105154
BuildIndex();
106155
DetectVfr();
107156
}
108157

158+
159+
160+
FramesDecoder::FramesDecoder(const char *memory_file, int memory_file_size)
161+
: av_state_(std::make_unique<AvState>()),
162+
memory_video_file_(MemoryVideoFile(memory_file, memory_file_size)) {
163+
av_log_set_level(AV_LOG_ERROR);
164+
165+
av_state_->ctx_ = avformat_alloc_context();
166+
DALI_ENFORCE(av_state_->ctx_, "Could not alloc avformat context");
167+
168+
uint8_t *av_io_buffer = static_cast<uint8_t *>(av_malloc(default_av_buffer_size));
169+
170+
AVIOContext *av_io_context = avio_alloc_context(
171+
av_io_buffer,
172+
default_av_buffer_size,
173+
0,
174+
&memory_video_file_.value(),
175+
detail::read_memory_video_file,
176+
nullptr,
177+
detail::seek_memory_video_file);
178+
179+
av_state_->ctx_->pb = av_io_context;
180+
181+
int ret = avformat_open_input(&av_state_->ctx_, "", nullptr, nullptr);
182+
DALI_ENFORCE(ret == 0, make_string("Failed to open video file ", Filename(), "due to ",
183+
detail::av_error_string(ret)));
184+
185+
FindVideoStream();
186+
DALI_ENFORCE(
187+
CheckCodecSupport(),
188+
make_string(
189+
"Unsupported video codec: ",
190+
av_state_->codec_->name,
191+
". Supported codecs: h264, HEVC."));
192+
InitAvState();
193+
BuildIndex();
194+
DetectVfr();
195+
}
196+
109197
void FramesDecoder::BuildIndex() {
110198
// TODO(awolant): Optimize this function for:
111199
// - CFR
@@ -247,7 +335,7 @@ void FramesDecoder::Reset() {
247335
ret >= 0,
248336
make_string(
249337
"Could not seek to the first frame of video ",
250-
filename_,
338+
Filename(),
251339
"due to",
252340
detail::av_error_string(ret)));
253341
avcodec_flush_buffers(av_state_->codec_ctx_);
@@ -284,7 +372,7 @@ void FramesDecoder::SeekFrame(int frame_id) {
284372
"with keyframe",
285373
keyframe_id,
286374
"in video ",
287-
filename_,
375+
Filename(),
288376
"due to ",
289377
detail::av_error_string(ret)));
290378

dali/operators/reader/loader/video/frames_decoder.h

+41-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ extern "C" {
2525
#include <vector>
2626
#include <string>
2727
#include <memory>
28+
#include <optional>
2829

2930
#include "dali/core/common.h"
3031

@@ -70,6 +71,23 @@ struct AvState {
7071
}
7172
};
7273

74+
/**
75+
* @brief Helper representing video file kept in memory. Allows reading and seeking.
76+
*
77+
*/
78+
struct MemoryVideoFile {
79+
MemoryVideoFile(const char *data, int64_t size)
80+
: data_(data), size_(size), position_(0) {}
81+
82+
int Read(unsigned char *buffer, int buffer_size);
83+
84+
int64_t Seek(int64_t new_position, int origin);
85+
86+
const char *data_;
87+
const int64_t size_;
88+
int64_t position_;
89+
};
90+
7391
/**
7492
* @brief Object representing a video file. Allows access to frames and seeking.
7593
*
@@ -85,6 +103,18 @@ class DLL_PUBLIC FramesDecoder {
85103
*/
86104
explicit FramesDecoder(const std::string &filename);
87105

106+
107+
/**
108+
* @brief Construct a new FramesDecoder object.
109+
*
110+
* @param memory_file Pointer to memory with video file data.
111+
* @param memory_file_size Size of memory_file in bytes.
112+
*
113+
* @note This constructor assumes that the `memory_file` and
114+
* `memory_file_size` arguments cover the entire video file, including the header.
115+
*/
116+
FramesDecoder(const char *memory_file, int memory_file_size);
117+
88118
/**
89119
* @brief Number of frames in the video
90120
*
@@ -213,11 +243,21 @@ class DLL_PUBLIC FramesDecoder {
213243

214244
void DetectVfr();
215245

246+
std::string Filename() {
247+
return filename_.has_value() ? filename_.value() : "memory file";
248+
}
249+
216250
int channels_ = 3;
217251
bool flush_state_ = false;
218-
std::string filename_;
219252
bool is_vfr_ = false;
253+
254+
std::optional<const std::string> filename_ = {};
255+
std::optional<MemoryVideoFile> memory_video_file_ = {};
256+
257+
// Default size of the buffer used to load video files from memory to FFMPEG
258+
const int default_av_buffer_size = (1 << 15);
220259
};
260+
221261
} // namespace dali
222262

223263
#endif // DALI_OPERATORS_READER_LOADER_VIDEO_FRAMES_DECODER_H_

dali/operators/reader/loader/video/frames_decoder_gpu.cc

+58-46
Original file line numberDiff line numberDiff line change
@@ -77,56 +77,68 @@ cudaVideoCodec FramesDecoderGpu::GetCodecType() {
7777
return cudaVideoCodec_H264;
7878
}
7979

80+
void FramesDecoderGpu::InitGpuDecoder() {
81+
nvdecode_state_ = std::make_unique<NvDecodeState>();
82+
83+
InitBitStreamFilter();
84+
85+
filtered_packet_ = av_packet_alloc();
86+
DALI_ENFORCE(filtered_packet_, "Could not allocate av packet");
87+
88+
auto codec_type = GetCodecType();
89+
90+
// Create nv decoder
91+
CUVIDDECODECREATEINFO decoder_info;
92+
memset(&decoder_info, 0, sizeof(CUVIDDECODECREATEINFO));
93+
94+
decoder_info.bitDepthMinus8 = 0;
95+
decoder_info.ChromaFormat = cudaVideoChromaFormat_420;
96+
decoder_info.CodecType = codec_type;
97+
decoder_info.ulHeight = Height();
98+
decoder_info.ulWidth = Width();
99+
decoder_info.ulMaxHeight = Height();
100+
decoder_info.ulMaxWidth = Width();
101+
decoder_info.ulTargetHeight = Height();
102+
decoder_info.ulTargetWidth = Width();
103+
decoder_info.ulNumDecodeSurfaces = num_decode_surfaces_;
104+
decoder_info.ulNumOutputSurfaces = 2;
105+
106+
CUDA_CALL(cuvidCreateDecoder(&nvdecode_state_->decoder, &decoder_info));
107+
108+
// Create nv parser
109+
CUVIDPARSERPARAMS parser_info;
110+
memset(&parser_info, 0, sizeof(CUVIDPARSERPARAMS));
111+
parser_info.CodecType = codec_type;
112+
parser_info.ulMaxNumDecodeSurfaces = num_decode_surfaces_;
113+
parser_info.ulMaxDisplayDelay = 0;
114+
parser_info.pUserData = this;
115+
parser_info.pfnSequenceCallback = detail::process_video_sequence;
116+
parser_info.pfnDecodePicture = detail::process_picture_decode;
117+
parser_info.pfnDisplayPicture = nullptr;
118+
119+
CUDA_CALL(cuvidCreateVideoParser(&nvdecode_state_->parser, &parser_info));
120+
121+
// Init internal frame buffer
122+
// TODO(awolant): Check, if continuous buffer would be faster
123+
for (size_t i = 0; i < frame_buffer_.size(); ++i) {
124+
frame_buffer_[i].frame_.resize(FrameSize());
125+
frame_buffer_[i].pts_ = -1;
126+
}
127+
}
128+
80129
FramesDecoderGpu::FramesDecoderGpu(const std::string &filename, cudaStream_t stream) :
81130
FramesDecoder(filename),
82131
frame_buffer_(num_decode_surfaces_),
83132
stream_(stream) {
84-
nvdecode_state_ = std::make_unique<NvDecodeState>();
85-
86-
InitBitStreamFilter();
87-
88-
filtered_packet_ = av_packet_alloc();
89-
DALI_ENFORCE(filtered_packet_, "Could not allocate av packet");
90-
91-
auto codec_type = GetCodecType();
92-
93-
// Create nv decoder
94-
CUVIDDECODECREATEINFO decoder_info;
95-
memset(&decoder_info, 0, sizeof(CUVIDDECODECREATEINFO));
96-
97-
decoder_info.bitDepthMinus8 = 0;
98-
decoder_info.ChromaFormat = cudaVideoChromaFormat_420;
99-
decoder_info.CodecType = codec_type;
100-
decoder_info.ulHeight = Height();
101-
decoder_info.ulWidth = Width();
102-
decoder_info.ulMaxHeight = Height();
103-
decoder_info.ulMaxWidth = Width();
104-
decoder_info.ulTargetHeight = Height();
105-
decoder_info.ulTargetWidth = Width();
106-
decoder_info.ulNumDecodeSurfaces = num_decode_surfaces_;
107-
decoder_info.ulNumOutputSurfaces = 2;
108-
109-
CUDA_CALL(cuvidCreateDecoder(&nvdecode_state_->decoder, &decoder_info));
110-
111-
// Create nv parser
112-
CUVIDPARSERPARAMS parser_info;
113-
memset(&parser_info, 0, sizeof(CUVIDPARSERPARAMS));
114-
parser_info.CodecType = codec_type;
115-
parser_info.ulMaxNumDecodeSurfaces = num_decode_surfaces_;
116-
parser_info.ulMaxDisplayDelay = 0;
117-
parser_info.pUserData = this;
118-
parser_info.pfnSequenceCallback = detail::process_video_sequence;
119-
parser_info.pfnDecodePicture = detail::process_picture_decode;
120-
parser_info.pfnDisplayPicture = nullptr;
121-
122-
CUDA_CALL(cuvidCreateVideoParser(&nvdecode_state_->parser, &parser_info));
123-
124-
// Init internal frame buffer
125-
// TODO(awolant): Check, if continuous buffer would be faster
126-
for (size_t i = 0; i < frame_buffer_.size(); ++i) {
127-
frame_buffer_[i].frame_.resize(FrameSize());
128-
frame_buffer_[i].pts_ = -1;
129-
}
133+
InitGpuDecoder();
134+
}
135+
136+
FramesDecoderGpu::FramesDecoderGpu(
137+
const char *memory_file, int memory_file_size, cudaStream_t stream) :
138+
FramesDecoder(memory_file, memory_file_size),
139+
frame_buffer_(num_decode_surfaces_),
140+
stream_(stream) {
141+
InitGpuDecoder();
130142
}
131143

132144
int FramesDecoderGpu::ProcessPictureDecode(void *user_data, CUVIDPICPARAMS *picture_params) {

dali/operators/reader/loader/video/frames_decoder_gpu.h

+13
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,17 @@ class DLL_PUBLIC FramesDecoderGpu : public FramesDecoder {
5858
*/
5959
explicit FramesDecoderGpu(const std::string &filename, cudaStream_t stream = 0);
6060

61+
/**
62+
* @brief Construct a new FramesDecoder object.
63+
*
64+
* @param memory_file Pointer to memory with video file data.
65+
* @param memory_file_size Size of memory_file in bytes.
66+
*
67+
* @note This constructor assumes that the `memory_file` and
68+
* `memory_file_size` arguments cover the entire video file, including the header.
69+
*/
70+
FramesDecoderGpu(const char *memory_file, int memory_file_size, cudaStream_t stream = 0);
71+
6172
bool ReadNextFrame(uint8_t *data, bool copy_to_output = true) override;
6273

6374
void SeekFrame(int frame_id) override;
@@ -100,6 +111,8 @@ class DLL_PUBLIC FramesDecoderGpu : public FramesDecoder {
100111
void InitBitStreamFilter();
101112

102113
cudaVideoCodec GetCodecType();
114+
115+
void InitGpuDecoder();
103116
};
104117

105118
} // namespace dali

0 commit comments

Comments
 (0)