NVIDIA · awolant · Aug 24, 2022 · Aug 22, 2022 · Aug 22, 2022 · Aug 23, 2022
diff --git a/dali/operators/reader/loader/video/frames_decoder.cc b/dali/operators/reader/loader/video/frames_decoder.cc
@@ -19,15 +19,64 @@
 
 
 namespace dali {
+int MemoryVideoFile::Read(unsigned char *buffer, int buffer_size) {
+  int left_in_file = size_ - position_;
+  if (left_in_file == 0) {
+    return AVERROR_EOF;
+  }
+
+  int to_read = std::min(left_in_file, buffer_size);
+  std::copy(data_ + position_, data_ + position_ + to_read, buffer);
+  position_ += to_read;
+  return to_read;
+}
+
+/**
+ * @brief Method for seeking the memory video. It sets position according to provided arguments.
+ * 
+ * @param new_position Requested new_position.
+ * @param mode Chosen method of seeking. This argument changes how new_position is interpreted and how seeking is performed.
+ * @return int64_t actual new position in the file.
+ */
+int64_t MemoryVideoFile::Seek(int64_t new_position, int mode) {
+  switch (mode) {
+  case SEEK_SET:
+    position_ = new_position;
+    break;
+  case AVSEEK_SIZE:
+    return size_;
+
+  default:
+    DALI_FAIL(
+      make_string(
+        "Unsupported seeking method in FramesDecoder from memory file. Seeking method: ",
+        mode));
+  }
+
+  return position_;
+}
 
 namespace detail {
 std::string av_error_string(int ret) {
     static char msg[AV_ERROR_MAX_STRING_SIZE];
     memset(msg, 0, sizeof(msg));
     return std::string(av_make_error_string(msg, AV_ERROR_MAX_STRING_SIZE, ret));
 }
+
+int read_memory_video_file(void *data_ptr, uint8_t *av_io_buffer, int av_io_buffer_size) {
+  MemoryVideoFile *memory_video_file = static_cast<MemoryVideoFile *>(data_ptr);
+
+  return memory_video_file->Read(av_io_buffer, av_io_buffer_size);
 }
 
+int64_t seek_memory_video_file(void *data_ptr, int64_t new_position, int origin) {
+  MemoryVideoFile *memory_video_file = static_cast<MemoryVideoFile *>(data_ptr);
+
+  return memory_video_file->Seek(new_position, origin);
+}
+
+}   // namespace detail
+
 using AVPacketScope = std::unique_ptr<AVPacket, decltype(&av_packet_unref)>;
 
 const std::vector<AVCodecID> FramesDecoder::SupportedCodecs = {
@@ -106,6 +155,45 @@ FramesDecoder::FramesDecoder(const std::string &filename)
   DetectVfr();
 }
 
+
+
+FramesDecoder::FramesDecoder(const char *memory_file, int memory_file_size)
+  : av_state_(std::make_unique<AvState>()),
+    memory_video_file_(MemoryVideoFile(memory_file, memory_file_size)) {
+  av_log_set_level(AV_LOG_ERROR);
+
+  av_state_->ctx_ = avformat_alloc_context();
+  DALI_ENFORCE(av_state_->ctx_, "Could not alloc avformat context");
+
+  uint8_t *av_io_buffer = static_cast<uint8_t *>(av_malloc(default_av_buffer_size));
+
+  AVIOContext *av_io_context = avio_alloc_context(
+    av_io_buffer,
+    default_av_buffer_size,
+    0,
+    &memory_video_file_.value(),
+    detail::read_memory_video_file,
+    nullptr,
+    detail::seek_memory_video_file);
+
+  av_state_->ctx_->pb = av_io_context;
+
+  int ret = avformat_open_input(&av_state_->ctx_, "", nullptr, nullptr);
+  DALI_ENFORCE(ret == 0, make_string("Failed to open video file from memory due to ",
+                                     detail::av_error_string(ret)));
+
+  FindVideoStream();
+  DALI_ENFORCE(
+    CheckCodecSupport(),
+    make_string(
+      "Unsupported video codec: ",
+      av_state_->codec_->name,
+      ". Supported codecs: h264, HEVC."));
+  InitAvState();
+  BuildIndex();
+  DetectVfr();
+}
+
 void FramesDecoder::BuildIndex() {
   // TODO(awolant): Optimize this function for:
   //  - CFR

diff --git a/dali/operators/reader/loader/video/frames_decoder.h b/dali/operators/reader/loader/video/frames_decoder.h
@@ -25,6 +25,7 @@ extern "C" {
 #include <vector>
 #include <string>
 #include <memory>
+#include <optional>
 
 #include "dali/core/common.h"
 
@@ -70,6 +71,23 @@ struct AvState {
   }
 };
 
+/**
+ * @brief Helper representing video file kept in memory. Allows reading and seeking.
+ * 
+ */
+struct MemoryVideoFile {
+  MemoryVideoFile(const char *data, int64_t size)
+    : data_(data), size_(size), position_(0) {}
+
+  int Read(unsigned char *buffer, int buffer_size);
+
+  int64_t Seek(int64_t new_position, int origin);
+
+  const char *data_;
+  const int64_t size_;
+  int64_t position_;
+};
+
 /**
  * @brief Object representing a video file. Allows access to frames and seeking.
  * 
@@ -85,6 +103,18 @@ class DLL_PUBLIC FramesDecoder {
    */
   explicit FramesDecoder(const std::string &filename);
 
+
+  /**
+   * @brief Construct a new FramesDecoder object.
+   * 
+   * @param memory_file Pointer to memory with video file data.
+   * @param memory_file_size Size of memory_file in bytes.
+   * 
+   * @note This constructor assumes that the `memory_file` and
+   * `memory_file_size` arguments cover the entire video file, including the header.
+   */
+  FramesDecoder(const char *memory_file, int memory_file_size);
+
   /**
    * @brief Number of frames in the video
    * 
@@ -215,9 +245,15 @@ class DLL_PUBLIC FramesDecoder {
 
   int channels_ = 3;
   bool flush_state_ = false;
-  std::string filename_;
   bool is_vfr_ = false;
+
+  std::string filename_ = "";
+  std::optional<MemoryVideoFile> memory_video_file_ = {};
+
+  // Default size of the buffer used to load video files from memory to FFMPEG
+  const int default_av_buffer_size = (1 << 15);
 };
+
 }  // namespace dali
 
 #endif  // DALI_OPERATORS_READER_LOADER_VIDEO_FRAMES_DECODER_H_
diff --git a/dali/operators/reader/loader/video/frames_decoder_gpu.cc b/dali/operators/reader/loader/video/frames_decoder_gpu.cc
@@ -77,56 +77,67 @@ cudaVideoCodec FramesDecoderGpu::GetCodecType() {
   return cudaVideoCodec_H264;
 }
 
+void FramesDecoderGpu::InitGpuDecoder() {
+  nvdecode_state_ = std::make_unique<NvDecodeState>();
+
+  InitBitStreamFilter();
+
+  filtered_packet_ = av_packet_alloc();
+  DALI_ENFORCE(filtered_packet_, "Could not allocate av packet");
+
+  auto codec_type = GetCodecType();
+
+  // Create nv decoder
+  CUVIDDECODECREATEINFO decoder_info;
+  memset(&decoder_info, 0, sizeof(CUVIDDECODECREATEINFO));
+
+  decoder_info.bitDepthMinus8 = 0;
+  decoder_info.ChromaFormat = cudaVideoChromaFormat_420;
+  decoder_info.CodecType = codec_type;
+  decoder_info.ulHeight = Height();
+  decoder_info.ulWidth = Width();
+  decoder_info.ulMaxHeight = Height();
+  decoder_info.ulMaxWidth = Width();
+  decoder_info.ulTargetHeight = Height();
+  decoder_info.ulTargetWidth = Width();
+  decoder_info.ulNumDecodeSurfaces = num_decode_surfaces_;
+  decoder_info.ulNumOutputSurfaces = 2;
+
+  CUDA_CALL(cuvidCreateDecoder(&nvdecode_state_->decoder, &decoder_info));
+
+  // Create nv parser
+  CUVIDPARSERPARAMS parser_info;
+  memset(&parser_info, 0, sizeof(CUVIDPARSERPARAMS));
+  parser_info.CodecType = codec_type;
+  parser_info.ulMaxNumDecodeSurfaces = num_decode_surfaces_;
+  parser_info.ulMaxDisplayDelay = 0;
+  parser_info.pUserData = this;
+  parser_info.pfnSequenceCallback = detail::process_video_sequence;
+  parser_info.pfnDecodePicture = detail::process_picture_decode;
+  parser_info.pfnDisplayPicture = nullptr;
+
+  CUDA_CALL(cuvidCreateVideoParser(&nvdecode_state_->parser, &parser_info));
+
+  // Init internal frame buffer
+  // TODO(awolant): Check, if continuous buffer would be faster
+  for (size_t i = 0; i < frame_buffer_.size(); ++i) {
+    frame_buffer_[i].frame_.resize(FrameSize());
+    frame_buffer_[i].pts_ = -1;
+  }
+}
+
 FramesDecoderGpu::FramesDecoderGpu(const std::string &filename, cudaStream_t stream) :
     FramesDecoder(filename),
     frame_buffer_(num_decode_surfaces_),
     stream_(stream) {
-    nvdecode_state_ = std::make_unique<NvDecodeState>();
-
-    InitBitStreamFilter();
-
-    filtered_packet_ = av_packet_alloc();
-    DALI_ENFORCE(filtered_packet_, "Could not allocate av packet");
-
-    auto codec_type = GetCodecType();
-
-    // Create nv decoder
-    CUVIDDECODECREATEINFO decoder_info;
-    memset(&decoder_info, 0, sizeof(CUVIDDECODECREATEINFO));
-
-    decoder_info.bitDepthMinus8 = 0;
-    decoder_info.ChromaFormat = cudaVideoChromaFormat_420;
-    decoder_info.CodecType = codec_type;
-    decoder_info.ulHeight = Height();
-    decoder_info.ulWidth = Width();
-    decoder_info.ulMaxHeight = Height();
-    decoder_info.ulMaxWidth = Width();
-    decoder_info.ulTargetHeight = Height();
-    decoder_info.ulTargetWidth = Width();
-    decoder_info.ulNumDecodeSurfaces = num_decode_surfaces_;
-    decoder_info.ulNumOutputSurfaces = 2;
-
-    CUDA_CALL(cuvidCreateDecoder(&nvdecode_state_->decoder, &decoder_info));
-
-    // Create nv parser
-    CUVIDPARSERPARAMS parser_info;
-    memset(&parser_info, 0, sizeof(CUVIDPARSERPARAMS));
-    parser_info.CodecType = codec_type;
-    parser_info.ulMaxNumDecodeSurfaces = num_decode_surfaces_;
-    parser_info.ulMaxDisplayDelay = 0;
-    parser_info.pUserData = this;
-    parser_info.pfnSequenceCallback = detail::process_video_sequence;
-    parser_info.pfnDecodePicture = detail::process_picture_decode;
-    parser_info.pfnDisplayPicture = nullptr;
-
-    CUDA_CALL(cuvidCreateVideoParser(&nvdecode_state_->parser, &parser_info));
-
-    // Init internal frame buffer
-    // TODO(awolant): Check, if continuous buffer would be faster
-    for (size_t i = 0; i < frame_buffer_.size(); ++i) {
-      frame_buffer_[i].frame_.resize(FrameSize());
-      frame_buffer_[i].pts_ = -1;
-    }
+    InitGpuDecoder();
+}
+
+FramesDecoderGpu::FramesDecoderGpu(const char *memory_file, int memory_file_size, cudaStream_t stream) :
+    FramesDecoder(memory_file, memory_file_size),
+    frame_buffer_(num_decode_surfaces_),
+    stream_(stream) {
+    InitGpuDecoder();
 }
 
 int FramesDecoderGpu::ProcessPictureDecode(void *user_data, CUVIDPICPARAMS *picture_params) {

diff --git a/dali/operators/reader/loader/video/frames_decoder_gpu.h b/dali/operators/reader/loader/video/frames_decoder_gpu.h
@@ -58,6 +58,17 @@ class DLL_PUBLIC FramesDecoderGpu : public FramesDecoder {
    */
   explicit FramesDecoderGpu(const std::string &filename, cudaStream_t stream = 0);
 
+  /**
+ * @brief Construct a new FramesDecoder object.
+ * 
+ * @param memory_file Pointer to memory with video file data.
+ * @param memory_file_size Size of memory_file in bytes.
+ * 
+ * @note This constructor assumes that the `memory_file` and
+ * `memory_file_size` arguments cover the entire video file, including the header.
+ */
+  FramesDecoderGpu(const char *memory_file, int memory_file_size, cudaStream_t stream = 0);
+
   bool ReadNextFrame(uint8_t *data, bool copy_to_output = true) override;
 
   void SeekFrame(int frame_id) override;
@@ -100,6 +111,8 @@ class DLL_PUBLIC FramesDecoderGpu : public FramesDecoder {
   void InitBitStreamFilter();
 
   cudaVideoCodec GetCodecType();
+
+  void InitGpuDecoder();
 };
 
 }  // namespace dali

diff --git a/dali/operators/reader/loader/video/frames_decoder_test.cc b/dali/operators/reader/loader/video/frames_decoder_test.cc
@@ -236,4 +236,46 @@ TEST_F(FramesDecoderGpuTest, VariableFrameRateHevc) {
   RunTest(decoder, vfr_hevc_videos_[1]);
 }
 
+TEST_F(FramesDecoderTest_CpuOnlyTests, InMemoryCfrVideo) {
+  auto memory_video = MemoryVideo(cfr_videos_paths_[1]);
+
+  FramesDecoder decoder(memory_video.data(), memory_video.size());
+  RunTest(decoder, cfr_videos_[1]);
+}
+
+TEST_F(FramesDecoderGpuTest, InMemoryCfrVideo) {
+  auto memory_video = MemoryVideo(cfr_videos_paths_[0]);
+
+  FramesDecoderGpu decoder(memory_video.data(), memory_video.size());
+  RunTest(decoder, cfr_videos_[0]);
+}
+
+TEST_F(FramesDecoderTest_CpuOnlyTests, InMemoryVfrVideo) {
+  auto memory_video = MemoryVideo(vfr_videos_paths_[1]);
+
+  FramesDecoder decoder(memory_video.data(), memory_video.size());
+  RunTest(decoder, vfr_videos_[1]);
+}
+
+TEST_F(FramesDecoderGpuTest, InMemoryVfrVideo) {
+  auto memory_video = MemoryVideo(vfr_videos_paths_[0]);
+
+  FramesDecoderGpu decoder(memory_video.data(), memory_video.size());
+  RunTest(decoder, vfr_videos_[0]);
+}
+
+TEST_F(FramesDecoderTest_CpuOnlyTests, InMemoryVfrHevcVideo) {
+  auto memory_video = MemoryVideo(vfr_videos_paths_[0]);
+
+  FramesDecoder decoder(memory_video.data(), memory_video.size());
+  RunTest(decoder, vfr_videos_[0]);
+}
+
+TEST_F(FramesDecoderGpuTest, InMemoryVfrVfrHevcVideo) {
+  auto memory_video = MemoryVideo(vfr_hevc_videos_paths_[1]);
+
+  FramesDecoderGpu decoder(memory_video.data(), memory_video.size());
+  RunTest(decoder, vfr_hevc_videos_[1]);
+}
+
 }  // namespace dali
diff --git a/dali/operators/reader/loader/video/video_test_base.cc b/dali/operators/reader/loader/video/video_test_base.cc
@@ -155,4 +155,18 @@ void VideoTestBase::RunFailureTest(std::function<void()> body, std::string expec
   }
 }
 
+std::vector<char> VideoTestBase::MemoryVideo(const std::string &path) const {
+  std::ifstream video_file(path, std::ios::binary | std::ios::ate);
+  auto size = video_file.tellg();
+  video_file.seekg(0, std::ios::beg);
+
+  std::vector<char> memory_video(size);
+  if (!video_file.read(memory_video.data(), size)) {
+    // We can't use FAIL() because this function returns value
+    throw ::testing::AssertionFailure() << "Could not load video file to memory.";
+  }
+
+  return memory_video;
+}
+
 }  // namespace dali
diff --git a/dali/operators/reader/loader/video/video_test_base.h b/dali/operators/reader/loader/video/video_test_base.h
@@ -74,6 +74,8 @@ class VideoTestBase : public ::testing::Test {
     return std::max(cfr_videos_[0].FrameSize(), cfr_videos_[1].FrameSize());
   }
 
+  std::vector<char> MemoryVideo(const std::string &path) const;
+
   /**
    * @brief Utility to save decoded frame as a PNG file.
    * Frame is saved to the folder given as an argument.