Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor Streamer implementation #2402

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions test/torchaudio_unittest/io/stream_reader_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ def test_basic_audio_stream(self):

sinfo = s.get_out_stream_info(0)
assert sinfo.source_index == s.default_audio_stream
assert sinfo.filter_description == ""
assert sinfo.filter_description == "anull"

sinfo = s.get_out_stream_info(1)
assert sinfo.source_index == s.default_audio_stream
Expand All @@ -185,7 +185,7 @@ def test_basic_video_stream(self):

sinfo = s.get_out_stream_info(0)
assert sinfo.source_index == s.default_video_stream
assert sinfo.filter_description == ""
assert sinfo.filter_description == "null"

sinfo = s.get_out_stream_info(1)
assert sinfo.source_index == s.default_video_stream
Expand Down
1 change: 1 addition & 0 deletions torchaudio/csrc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,7 @@ if(USE_FFMPEG)
ffmpeg/sink.cpp
ffmpeg/stream_processor.cpp
ffmpeg/streamer.cpp
ffmpeg/stream_reader_wrapper.cpp
)
message(STATUS "FFMPEG_ROOT=$ENV{FFMPEG_ROOT}")
find_package(FFMPEG 4.1 REQUIRED COMPONENTS avdevice avfilter avformat avcodec avutil)
Expand Down
4 changes: 2 additions & 2 deletions torchaudio/csrc/ffmpeg/decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ namespace ffmpeg {
////////////////////////////////////////////////////////////////////////////////
Decoder::Decoder(
AVCodecParameters* pParam,
const std::string& decoder_name,
const std::map<std::string, std::string>& decoder_option,
const c10::optional<std::string>& decoder_name,
const OptionDict& decoder_option,
const torch::Device& device)
: pCodecContext(get_decode_context(pParam->codec_id, decoder_name)) {
init_codec_context(
Expand Down
4 changes: 2 additions & 2 deletions torchaudio/csrc/ffmpeg/decoder.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@ class Decoder {
// Default constructable
Decoder(
AVCodecParameters* pParam,
const std::string& decoder_name,
const std::map<std::string, std::string>& decoder_option,
const c10::optional<std::string>& decoder_name,
const OptionDict& decoder_option,
const torch::Device& device);
// Custom destructor to clean up the resources
~Decoder() = default;
Expand Down
42 changes: 27 additions & 15 deletions torchaudio/csrc/ffmpeg/ffmpeg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,9 @@ void AVFormatContextDeleter::operator()(AVFormatContext* p) {

namespace {

AVDictionary* get_option_dict(
const std::map<std::string, std::string>& option) {
AVDictionary* get_option_dict(const OptionDict& option) {
AVDictionary* opt = nullptr;
for (auto& it : option) {
for (const auto& it : option) {
av_dict_set(&opt, it.first.c_str(), it.second.c_str(), 0);
}
return opt;
Expand Down Expand Up @@ -66,12 +65,25 @@ std::string join(std::vector<std::string> vars) {

AVFormatContextPtr get_input_format_context(
const std::string& src,
const std::string& device,
const std::map<std::string, std::string>& option) {
const c10::optional<std::string>& device,
const OptionDict& option) {
AVFormatContext* pFormat = NULL;

AVINPUT_FORMAT_CONST AVInputFormat* pInput =
device.empty() ? NULL : av_find_input_format(device.c_str());
AVINPUT_FORMAT_CONST AVInputFormat* pInput = [&]() -> AVInputFormat* {
if (device.has_value()) {
std::string device_str = device.value();
AVINPUT_FORMAT_CONST AVInputFormat* p =
av_find_input_format(device_str.c_str());
if (!p) {
std::ostringstream msg;
msg << "Unsupported device: \"" << device_str << "\"";
throw std::runtime_error(msg.str());
}
return p;
}
return nullptr;
}();

AVDictionary* opt = get_option_dict(option);
int ret = avformat_open_input(&pFormat, src.c_str(), pInput, &opt);

Expand Down Expand Up @@ -148,18 +160,18 @@ void AVCodecContextDeleter::operator()(AVCodecContext* p) {
namespace {
const AVCodec* get_decode_codec(
enum AVCodecID codec_id,
const std::string& decoder_name) {
const AVCodec* pCodec = decoder_name.empty()
const c10::optional<std::string>& decoder_name) {
const AVCodec* pCodec = !decoder_name.has_value()
? avcodec_find_decoder(codec_id)
: avcodec_find_decoder_by_name(decoder_name.c_str());
: avcodec_find_decoder_by_name(decoder_name.value().c_str());

if (!pCodec) {
std::stringstream ss;
if (decoder_name.empty()) {
if (!decoder_name.has_value()) {
ss << "Unsupported codec: \"" << avcodec_get_name(codec_id) << "\", ("
<< codec_id << ").";
} else {
ss << "Unsupported codec: \"" << decoder_name << "\".";
ss << "Unsupported codec: \"" << decoder_name.value() << "\".";
}
throw std::runtime_error(ss.str());
}
Expand All @@ -170,7 +182,7 @@ const AVCodec* get_decode_codec(

AVCodecContextPtr get_decode_context(
enum AVCodecID codec_id,
const std::string& decoder_name) {
const c10::optional<std::string>& decoder_name) {
const AVCodec* pCodec = get_decode_codec(codec_id, decoder_name);

AVCodecContext* pCodecContext = avcodec_alloc_context3(pCodec);
Expand Down Expand Up @@ -216,8 +228,8 @@ const AVCodecHWConfig* get_cuda_config(const AVCodec* pCodec) {
void init_codec_context(
AVCodecContext* pCodecContext,
AVCodecParameters* pParams,
const std::string& decoder_name,
const std::map<std::string, std::string>& decoder_option,
const c10::optional<std::string>& decoder_name,
const OptionDict& decoder_option,
const torch::Device& device,
AVBufferRefPtr& pHWBufferRef) {
const AVCodec* pCodec = get_decode_codec(pParams->codec_id, decoder_name);
Expand Down
12 changes: 7 additions & 5 deletions torchaudio/csrc/ffmpeg/ffmpeg.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ extern "C" {
namespace torchaudio {
namespace ffmpeg {

using OptionDict = std::map<std::string, std::string>;

// Replacement of av_err2str, which causes
// `error: taking address of temporary array`
// https://github.com/joncampbell123/composite-video-simulator/issues/5
Expand Down Expand Up @@ -71,8 +73,8 @@ struct AVFormatContextPtr
// create format context for reading media
AVFormatContextPtr get_input_format_context(
const std::string& src,
const std::string& device,
const std::map<std::string, std::string>& option);
const c10::optional<std::string>& device,
const OptionDict& option);

////////////////////////////////////////////////////////////////////////////////
// AVPacket
Expand Down Expand Up @@ -141,14 +143,14 @@ struct AVCodecContextPtr
// Allocate codec context from either decoder name or ID
AVCodecContextPtr get_decode_context(
enum AVCodecID codec_id,
const std::string& decoder);
const c10::optional<std::string>& decoder);

// Initialize codec context with the parameters
void init_codec_context(
AVCodecContext* pCodecContext,
AVCodecParameters* pParams,
const std::string& decoder_name,
const std::map<std::string, std::string>& decoder_option,
const c10::optional<std::string>& decoder_name,
const OptionDict& decoder_option,
const torch::Device& device,
AVBufferRefPtr& pHWBufferRef);

Expand Down
19 changes: 8 additions & 11 deletions torchaudio/csrc/ffmpeg/filter_graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ namespace ffmpeg {
FilterGraph::FilterGraph(
AVRational time_base,
AVCodecParameters* codecpar,
std::string filter_description)
const c10::optional<std::string>& filter_description)
: input_time_base(time_base),
codecpar(codecpar),
filter_description(std::move(filter_description)),
filter_description(filter_description.value_or(
codecpar->codec_type == AVMEDIA_TYPE_AUDIO ? "anull" : "null")),
media_type(codecpar->codec_type) {
init();
}
Expand Down Expand Up @@ -49,10 +50,10 @@ std::string get_video_src_args(
std::snprintf(
args,
sizeof(args),
"video_size=%dx%d:pix_fmt=%d:time_base=%d/%d:pixel_aspect=%d/%d",
"video_size=%dx%d:pix_fmt=%s:time_base=%d/%d:pixel_aspect=%d/%d",
codecpar->width,
codecpar->height,
static_cast<AVPixelFormat>(codecpar->format),
av_get_pix_fmt_name(static_cast<AVPixelFormat>(codecpar->format)),
time_base.num,
time_base.den,
codecpar->sample_aspect_ratio.num,
Expand Down Expand Up @@ -165,16 +166,12 @@ void FilterGraph::add_process() {
// If you are debugging this part of the code, you might get confused.
InOuts in{"in", buffersrc_ctx}, out{"out", buffersink_ctx};

std::string desc = filter_description.empty()
? (media_type == AVMEDIA_TYPE_AUDIO) ? "anull" : "null"
: filter_description;

int ret =
avfilter_graph_parse_ptr(pFilterGraph, desc.c_str(), out, in, nullptr);
int ret = avfilter_graph_parse_ptr(
pFilterGraph, filter_description.c_str(), out, in, nullptr);

if (ret < 0) {
throw std::runtime_error(
"Failed to create the filter from \"" + desc + "\" (" +
"Failed to create the filter from \"" + filter_description + "\" (" +
av_err2string(ret) + ".)");
}
}
Expand Down
2 changes: 1 addition & 1 deletion torchaudio/csrc/ffmpeg/filter_graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ class FilterGraph {
FilterGraph(
AVRational time_base,
AVCodecParameters* codecpar,
std::string filter_desc);
const c10::optional<std::string>& filter_desc);
// Custom destructor to release AVFilterGraph*
~FilterGraph() = default;
// Non-copyable
Expand Down
Loading