From 3022f5d467b717ba6b54f36d6199b769d7b5ba0e Mon Sep 17 00:00:00 2001 From: moto <855818+mthrok@users.noreply.github.com> Date: Sat, 7 May 2022 19:45:00 -0700 Subject: [PATCH] Refactor the constructors of pointer wrappers (#2373) Summary: This commit refactor the constructor of wrapper classes so that wrapper classes are only responsible for deallocation of underlying FFmpeg custom structures. The responsibility of custom initialization is moved to helper functions. Context: FFmpeg API uses bunch of raw pointers, which require dedicated allocater and deallcoator. In torchaudio we wrap these pointers with `std::unique_ptr<>` to adopt RAII semantics. Currently all of the customization logics required for `Streamer` are handled by the constructor of wrapper class. Like the following; ``` AVFormatContextPtr( const std::string& src, const std::string& device, const std::map& option); ``` This constructor allocates the raw `AVFormatContext*` pointer, while initializing it with the given option, then it parses the input media. As we consider the write/encode features, which require different way of initializing the `AVFormatContext*`, making it the responsibility of constructors of `AVFormatContextPtr` reduce the flexibility. Thus this commit moves the customization to helper factory function. - `AVFormatContextPtr(...)` -> `get_input_format_context(...)` - `AVCodecContextPtr(...)` -> `get_decode_context(...)` Pull Request resolved: https://github.com/pytorch/audio/pull/2373 Differential Revision: D36230148 Pulled By: mthrok fbshipit-source-id: 285892a3aa0a8676592bf4ac996aa8e8642f38f8 --- torchaudio/csrc/ffmpeg/decoder.cpp | 4 +- torchaudio/csrc/ffmpeg/ffmpeg.cpp | 64 +++++++++++++---------------- torchaudio/csrc/ffmpeg/ffmpeg.h | 28 +++++++++---- torchaudio/csrc/ffmpeg/streamer.cpp | 6 ++- 4 files changed, 56 insertions(+), 46 deletions(-) diff --git a/torchaudio/csrc/ffmpeg/decoder.cpp b/torchaudio/csrc/ffmpeg/decoder.cpp index b9ff2356864..7df9fd71728 100644 --- a/torchaudio/csrc/ffmpeg/decoder.cpp +++ b/torchaudio/csrc/ffmpeg/decoder.cpp @@ -10,7 +10,9 @@ Decoder::Decoder( AVCodecParameters* pParam, const std::string& decoder_name, const std::map& decoder_option) - : pCodecContext(pParam, decoder_name, decoder_option) {} + : pCodecContext(get_decode_context(pParam->codec_id, decoder_name)) { + init_decode_context(pCodecContext, pParam, decoder_name, decoder_option); +} int Decoder::process_packet(AVPacket* pPacket) { return avcodec_send_packet(pCodecContext, pPacket); diff --git a/torchaudio/csrc/ffmpeg/ffmpeg.cpp b/torchaudio/csrc/ffmpeg/ffmpeg.cpp index bc418bd0df5..f25fe42ea6e 100644 --- a/torchaudio/csrc/ffmpeg/ffmpeg.cpp +++ b/torchaudio/csrc/ffmpeg/ffmpeg.cpp @@ -61,7 +61,9 @@ std::string join(std::vector vars) { #define AVINPUT_FORMAT_CONST #endif -AVFormatContext* get_format_context( +} // namespace + +AVFormatContextPtr get_input_format_context( const std::string& src, const std::string& device, const std::map& option) { @@ -82,19 +84,11 @@ AVFormatContext* get_format_context( throw std::runtime_error( "Failed to open the input \"" + src + "\" (" + av_err2string(ret) + ")."); - return pFormat; + return AVFormatContextPtr(pFormat); } -} // namespace -AVFormatContextPtr::AVFormatContextPtr( - const std::string& src, - const std::string& device, - const std::map& option) - : Wrapper( - get_format_context(src, device, option)) { - if (avformat_find_stream_info(ptr.get(), NULL) < 0) - throw std::runtime_error("Failed to find stream information."); -} +AVFormatContextPtr::AVFormatContextPtr(AVFormatContext* p) + : Wrapper(p) {} //////////////////////////////////////////////////////////////////////////////// // AVPacket @@ -151,42 +145,46 @@ void AVCodecContextDeleter::operator()(AVCodecContext* p) { }; namespace { -AVCodecContext* get_codec_context( +const AVCodec* get_decode_codec( enum AVCodecID codec_id, - const std::string& decoder_name) { - const AVCodec* pCodec = decoder_name.empty() + const std::string& codec_name) { + const AVCodec* pCodec = codec_name.empty() ? avcodec_find_decoder(codec_id) - : avcodec_find_decoder_by_name(decoder_name.c_str()); + : avcodec_find_decoder_by_name(codec_name.c_str()); if (!pCodec) { std::stringstream ss; - if (decoder_name.empty()) { + if (codec_name.empty()) { ss << "Unsupported codec: \"" << avcodec_get_name(codec_id) << "\", (" << codec_id << ")."; } else { - ss << "Unsupported codec: \"" << decoder_name << "\"."; + ss << "Unsupported codec: \"" << codec_name << "\"."; } throw std::runtime_error(ss.str()); } + return pCodec; +} + +} // namespace + +AVCodecContextPtr get_decode_context( + enum AVCodecID codec_id, + const std::string& codec_name) { + const AVCodec* pCodec = get_decode_codec(codec_id, codec_name); AVCodecContext* pCodecContext = avcodec_alloc_context3(pCodec); if (!pCodecContext) { throw std::runtime_error("Failed to allocate CodecContext."); } - return pCodecContext; + return AVCodecContextPtr(pCodecContext); } -void init_codec_context( +void init_decode_context( AVCodecContext* pCodecContext, AVCodecParameters* pParams, - const std::string& decoder_name, + const std::string& codec_name, const std::map& decoder_option) { - const AVCodec* pCodec = decoder_name.empty() - ? avcodec_find_decoder(pParams->codec_id) - : avcodec_find_decoder_by_name(decoder_name.c_str()); - - // No need to check if pCodec is null as it's been already checked in - // get_codec_context + const AVCodec* pCodec = get_decode_codec(pParams->codec_id, codec_name); if (avcodec_parameters_to_context(pCodecContext, pParams) < 0) { throw std::runtime_error("Failed to set CodecContext parameter."); @@ -206,16 +204,10 @@ void init_codec_context( pParams->channel_layout = av_get_default_channel_layout(pCodecContext->channels); } -} // namespace -AVCodecContextPtr::AVCodecContextPtr( - AVCodecParameters* pParam, - const std::string& decoder_name, - const std::map& decoder_option) - : Wrapper( - get_codec_context(pParam->codec_id, decoder_name)) { - init_codec_context(ptr.get(), pParam, decoder_name, decoder_option); -} +AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p) + : Wrapper(p) {} + //////////////////////////////////////////////////////////////////////////////// // AVFilterGraph //////////////////////////////////////////////////////////////////////////////// diff --git a/torchaudio/csrc/ffmpeg/ffmpeg.h b/torchaudio/csrc/ffmpeg/ffmpeg.h index ed6c581b0a0..5c6c4a19e7d 100644 --- a/torchaudio/csrc/ffmpeg/ffmpeg.h +++ b/torchaudio/csrc/ffmpeg/ffmpeg.h @@ -64,12 +64,15 @@ struct AVFormatContextDeleter { struct AVFormatContextPtr : public Wrapper { - AVFormatContextPtr( - const std::string& src, - const std::string& device, - const std::map& option); + explicit AVFormatContextPtr(AVFormatContext* p); }; +// create format context for reading media +AVFormatContextPtr get_input_format_context( + const std::string& src, + const std::string& device, + const std::map& option); + //////////////////////////////////////////////////////////////////////////////// // AVPacket //////////////////////////////////////////////////////////////////////////////// @@ -118,12 +121,21 @@ struct AVCodecContextDeleter { }; struct AVCodecContextPtr : public Wrapper { - AVCodecContextPtr( - AVCodecParameters* pParam, - const std::string& decoder, - const std::map& decoder_option); + explicit AVCodecContextPtr(AVCodecContext* p); }; +// Allocate codec context from either decoder name or ID +AVCodecContextPtr get_decode_context( + enum AVCodecID codec_id, + const std::string& codec_name); + +// Initialize codec context with the parameters +void init_decode_context( + AVCodecContext* pCodecContext, + AVCodecParameters* pParams, + const std::string& codec_name, + const std::map& decoder_option); + //////////////////////////////////////////////////////////////////////////////// // AVFilterGraph //////////////////////////////////////////////////////////////////////////////// diff --git a/torchaudio/csrc/ffmpeg/streamer.cpp b/torchaudio/csrc/ffmpeg/streamer.cpp index dc87d135de9..ab8ff023ca4 100644 --- a/torchaudio/csrc/ffmpeg/streamer.cpp +++ b/torchaudio/csrc/ffmpeg/streamer.cpp @@ -46,7 +46,11 @@ Streamer::Streamer( const std::string& src, const std::string& device, const std::map& option) - : pFormatContext(src, device, option) { + : pFormatContext(get_input_format_context(src, device, option)) { + if (avformat_find_stream_info(pFormatContext, nullptr) < 0) { + throw std::runtime_error("Failed to find stream information."); + } + processors = std::vector>(pFormatContext->nb_streams); for (int i = 0; i < pFormatContext->nb_streams; ++i) {