Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move helper functions out of common utility for better locality #2512

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
126 changes: 120 additions & 6 deletions torchaudio/csrc/ffmpeg/decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,133 @@ namespace ffmpeg {
////////////////////////////////////////////////////////////////////////////////
// Decoder
////////////////////////////////////////////////////////////////////////////////
namespace {
AVCodecContextPtr get_decode_context(
enum AVCodecID codec_id,
const c10::optional<std::string>& decoder_name) {
const AVCodec* pCodec = !decoder_name.has_value()
? avcodec_find_decoder(codec_id)
: avcodec_find_decoder_by_name(decoder_name.value().c_str());

if (!pCodec) {
std::stringstream ss;
if (!decoder_name.has_value()) {
ss << "Unsupported codec: \"" << avcodec_get_name(codec_id) << "\", ("
<< codec_id << ").";
} else {
ss << "Unsupported codec: \"" << decoder_name.value() << "\".";
}
throw std::runtime_error(ss.str());
}

AVCodecContext* pCodecContext = avcodec_alloc_context3(pCodec);
if (!pCodecContext) {
throw std::runtime_error("Failed to allocate CodecContext.");
}
return AVCodecContextPtr(pCodecContext);
}

#ifdef USE_CUDA
enum AVPixelFormat get_hw_format(
AVCodecContext* ctx,
const enum AVPixelFormat* pix_fmts) {
const enum AVPixelFormat* p = nullptr;
AVPixelFormat pix_fmt = *static_cast<AVPixelFormat*>(ctx->opaque);
for (p = pix_fmts; *p != -1; p++) {
if (*p == pix_fmt) {
return *p;
}
}
TORCH_WARN("Failed to get HW surface format.");
return AV_PIX_FMT_NONE;
}

const AVCodecHWConfig* get_cuda_config(const AVCodec* pCodec) {
for (int i = 0;; ++i) {
const AVCodecHWConfig* config = avcodec_get_hw_config(pCodec, i);
if (!config) {
break;
}
if (config->device_type == AV_HWDEVICE_TYPE_CUDA &&
config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) {
return config;
}
}
std::stringstream ss;
ss << "CUDA device was requested, but the codec \"" << pCodec->name
<< "\" is not supported.";
throw std::runtime_error(ss.str());
}
#endif

void init_codec_context(
AVCodecContext* pCodecContext,
AVCodecParameters* pParams,
const OptionDict& decoder_option,
const torch::Device& device,
AVBufferRefPtr& pHWBufferRef) {
int ret = avcodec_parameters_to_context(pCodecContext, pParams);
if (ret < 0) {
throw std::runtime_error(
"Failed to set CodecContext parameter: " + av_err2string(ret));
}

#ifdef USE_CUDA
// Enable HW Acceleration
if (device.type() == c10::DeviceType::CUDA) {
const AVCodecHWConfig* config = get_cuda_config(pCodecContext->codec);
// TODO: check how to log
// C10_LOG << "Decoder " << pCodec->name << " supports device " <<
// av_hwdevice_get_type_name(config->device_type);

// https://www.ffmpeg.org/doxygen/trunk/hw__decode_8c_source.html#l00221
// 1. Set HW pixel format (config->pix_fmt) to opaue pointer.
static thread_local AVPixelFormat pix_fmt = config->pix_fmt;
pCodecContext->opaque = static_cast<void*>(&pix_fmt);
// 2. Set pCodecContext->get_format call back function which
// will retrieve the HW pixel format from opaque pointer.
pCodecContext->get_format = get_hw_format;
// 3. Create HW device context and set to pCodecContext.
AVBufferRef* hw_device_ctx = nullptr;
ret = av_hwdevice_ctx_create(
&hw_device_ctx,
AV_HWDEVICE_TYPE_CUDA,
std::to_string(device.index()).c_str(),
nullptr,
0);
if (ret < 0) {
throw std::runtime_error(
"Failed to create CUDA device context: " + av_err2string(ret));
}
assert(hw_device_ctx);
pCodecContext->hw_device_ctx = av_buffer_ref(hw_device_ctx);
pHWBufferRef.reset(hw_device_ctx);
}
#endif

AVDictionary* opts = get_option_dict(decoder_option);
ret = avcodec_open2(pCodecContext, pCodecContext->codec, &opts);
clean_up_dict(opts);

if (ret < 0) {
throw std::runtime_error(
"Failed to initialize CodecContext: " + av_err2string(ret));
}

if (pParams->codec_type == AVMEDIA_TYPE_AUDIO && !pParams->channel_layout)
pParams->channel_layout =
av_get_default_channel_layout(pCodecContext->channels);
}
} // namespace

Decoder::Decoder(
AVCodecParameters* pParam,
const c10::optional<std::string>& decoder_name,
const OptionDict& decoder_option,
const torch::Device& device)
: pCodecContext(get_decode_context(pParam->codec_id, decoder_name)) {
init_codec_context(
pCodecContext,
pParam,
decoder_name,
decoder_option,
device,
pHWBufferRef);
pCodecContext, pParam, decoder_option, device, pHWBufferRef);
}

int Decoder::process_packet(AVPacket* pPacket) {
Expand Down
182 changes: 0 additions & 182 deletions torchaudio/csrc/ffmpeg/ffmpeg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,65 +35,13 @@ void clean_up_dict(AVDictionary* p) {
}
}

namespace {

// https://github.com/FFmpeg/FFmpeg/blob/4e6debe1df7d53f3f59b37449b82265d5c08a172/doc/APIchanges#L252-L260
// Starting from libavformat 59 (ffmpeg 5),
// AVInputFormat is const and related functions expect constant.
#if LIBAVFORMAT_VERSION_MAJOR >= 59
#define AVINPUT_FORMAT_CONST const
#else
#define AVINPUT_FORMAT_CONST
#endif

} // namespace

////////////////////////////////////////////////////////////////////////////////
// AVFormatContext
////////////////////////////////////////////////////////////////////////////////
void AVFormatContextDeleter::operator()(AVFormatContext* p) {
avformat_close_input(&p);
};

AVFormatContextPtr get_input_format_context(
const std::string& src,
const c10::optional<std::string>& device,
const OptionDict& option,
AVIOContext* io_ctx) {
AVFormatContext* pFormat = avformat_alloc_context();
if (!pFormat) {
throw std::runtime_error("Failed to allocate AVFormatContext.");
}
if (io_ctx) {
pFormat->pb = io_ctx;
}

auto* pInput = [&]() -> AVINPUT_FORMAT_CONST AVInputFormat* {
if (device.has_value()) {
std::string device_str = device.value();
AVINPUT_FORMAT_CONST AVInputFormat* p =
av_find_input_format(device_str.c_str());
if (!p) {
std::ostringstream msg;
msg << "Unsupported device/format: \"" << device_str << "\"";
throw std::runtime_error(msg.str());
}
return p;
}
return nullptr;
}();

AVDictionary* opt = get_option_dict(option);
int ret = avformat_open_input(&pFormat, src.c_str(), pInput, &opt);
clean_up_dict(opt);

if (ret < 0)
throw std::runtime_error(
"Failed to open the input \"" + src + "\" (" + av_err2string(ret) +
").");
return AVFormatContextPtr(pFormat);
}

AVFormatContextPtr::AVFormatContextPtr(AVFormatContext* p)
: Wrapper<AVFormatContext, AVFormatContextDeleter>(p) {}

Expand Down Expand Up @@ -162,136 +110,6 @@ void AVCodecContextDeleter::operator()(AVCodecContext* p) {
avcodec_free_context(&p);
};

namespace {
const AVCodec* get_decode_codec(
enum AVCodecID codec_id,
const c10::optional<std::string>& decoder_name) {
const AVCodec* pCodec = !decoder_name.has_value()
? avcodec_find_decoder(codec_id)
: avcodec_find_decoder_by_name(decoder_name.value().c_str());

if (!pCodec) {
std::stringstream ss;
if (!decoder_name.has_value()) {
ss << "Unsupported codec: \"" << avcodec_get_name(codec_id) << "\", ("
<< codec_id << ").";
} else {
ss << "Unsupported codec: \"" << decoder_name.value() << "\".";
}
throw std::runtime_error(ss.str());
}
return pCodec;
}

} // namespace

AVCodecContextPtr get_decode_context(
enum AVCodecID codec_id,
const c10::optional<std::string>& decoder_name) {
const AVCodec* pCodec = get_decode_codec(codec_id, decoder_name);

AVCodecContext* pCodecContext = avcodec_alloc_context3(pCodec);
if (!pCodecContext) {
throw std::runtime_error("Failed to allocate CodecContext.");
}
return AVCodecContextPtr(pCodecContext);
}

#ifdef USE_CUDA
enum AVPixelFormat get_hw_format(
AVCodecContext* ctx,
const enum AVPixelFormat* pix_fmts) {
const enum AVPixelFormat* p = nullptr;
AVPixelFormat pix_fmt = *static_cast<AVPixelFormat*>(ctx->opaque);
for (p = pix_fmts; *p != -1; p++) {
if (*p == pix_fmt) {
return *p;
}
}
TORCH_WARN("Failed to get HW surface format.");
return AV_PIX_FMT_NONE;
}

const AVCodecHWConfig* get_cuda_config(const AVCodec* pCodec) {
for (int i = 0;; ++i) {
const AVCodecHWConfig* config = avcodec_get_hw_config(pCodec, i);
if (!config) {
break;
}
if (config->device_type == AV_HWDEVICE_TYPE_CUDA &&
config->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) {
return config;
}
}
std::stringstream ss;
ss << "CUDA device was requested, but the codec \"" << pCodec->name
<< "\" is not supported.";
throw std::runtime_error(ss.str());
}
#endif

void init_codec_context(
AVCodecContext* pCodecContext,
AVCodecParameters* pParams,
const c10::optional<std::string>& decoder_name,
const OptionDict& decoder_option,
const torch::Device& device,
AVBufferRefPtr& pHWBufferRef) {
const AVCodec* pCodec = get_decode_codec(pParams->codec_id, decoder_name);

int ret = avcodec_parameters_to_context(pCodecContext, pParams);
if (ret < 0) {
throw std::runtime_error(
"Failed to set CodecContext parameter: " + av_err2string(ret));
}

#ifdef USE_CUDA
// Enable HW Acceleration
if (device.type() == c10::DeviceType::CUDA) {
const AVCodecHWConfig* config = get_cuda_config(pCodec);
// TODO: check how to log
// C10_LOG << "Decoder " << pCodec->name << " supports device " <<
// av_hwdevice_get_type_name(config->device_type);

// https://www.ffmpeg.org/doxygen/trunk/hw__decode_8c_source.html#l00221
// 1. Set HW pixel format (config->pix_fmt) to opaue pointer.
static thread_local AVPixelFormat pix_fmt = config->pix_fmt;
pCodecContext->opaque = static_cast<void*>(&pix_fmt);
// 2. Set pCodecContext->get_format call back function which
// will retrieve the HW pixel format from opaque pointer.
pCodecContext->get_format = get_hw_format;
// 3. Create HW device context and set to pCodecContext.
AVBufferRef* hw_device_ctx = nullptr;
ret = av_hwdevice_ctx_create(
&hw_device_ctx,
AV_HWDEVICE_TYPE_CUDA,
std::to_string(device.index()).c_str(),
nullptr,
0);
if (ret < 0) {
throw std::runtime_error(
"Failed to create CUDA device context: " + av_err2string(ret));
}
assert(hw_device_ctx);
pCodecContext->hw_device_ctx = av_buffer_ref(hw_device_ctx);
pHWBufferRef.reset(hw_device_ctx);
}
#endif

AVDictionary* opts = get_option_dict(decoder_option);
ret = avcodec_open2(pCodecContext, pCodec, &opts);
clean_up_dict(opts);

if (ret < 0) {
throw std::runtime_error(
"Failed to initialize CodecContext: " + av_err2string(ret));
}

if (pParams->codec_type == AVMEDIA_TYPE_AUDIO && !pParams->channel_layout)
pParams->channel_layout =
av_get_default_channel_layout(pCodecContext->channels);
}

AVCodecContextPtr::AVCodecContextPtr(AVCodecContext* p)
: Wrapper<AVCodecContext, AVCodecContextDeleter>(p) {}

Expand Down
30 changes: 9 additions & 21 deletions torchaudio/csrc/ffmpeg/ffmpeg.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,15 @@ namespace ffmpeg {

using OptionDict = std::map<std::string, std::string>;

// https://github.com/FFmpeg/FFmpeg/blob/4e6debe1df7d53f3f59b37449b82265d5c08a172/doc/APIchanges#L252-L260
// Starting from libavformat 59 (ffmpeg 5),
// AVInputFormat is const and related functions expect constant.
#if LIBAVFORMAT_VERSION_MAJOR >= 59
#define AVFORMAT_CONST const
#else
#define AVFORMAT_CONST
#endif

// Replacement of av_err2str, which causes
// `error: taking address of temporary array`
// https://github.com/joncampbell123/composite-video-simulator/issues/5
Expand Down Expand Up @@ -84,13 +93,6 @@ struct AVFormatContextPtr
explicit AVFormatContextPtr(AVFormatContext* p);
};

// create format context for reading media
AVFormatContextPtr get_input_format_context(
const std::string& src,
const c10::optional<std::string>& device,
const OptionDict& option,
AVIOContext* io_ctx = nullptr);

////////////////////////////////////////////////////////////////////////////////
// AVIO
////////////////////////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -166,20 +168,6 @@ struct AVCodecContextPtr
explicit AVCodecContextPtr(AVCodecContext* p);
};

// Allocate codec context from either decoder name or ID
AVCodecContextPtr get_decode_context(
enum AVCodecID codec_id,
const c10::optional<std::string>& decoder);

// Initialize codec context with the parameters
void init_codec_context(
AVCodecContext* pCodecContext,
AVCodecParameters* pParams,
const c10::optional<std::string>& decoder_name,
const OptionDict& decoder_option,
const torch::Device& device,
AVBufferRefPtr& pHWBufferRef);

////////////////////////////////////////////////////////////////////////////////
// AVFilterGraph
////////////////////////////////////////////////////////////////////////////////
Expand Down
Loading