From 87d9cefe2d70b8b87d5c30ef142516f4b4b930ee Mon Sep 17 00:00:00 2001 From: Antoine C Date: Wed, 3 Apr 2024 11:36:11 +0100 Subject: [PATCH 1/3] feat: refactor FFmpeg soundsource to allow other soundsource to inherit it --- src/sources/soundsourceffmpeg.cpp | 239 +++++++++++++++--------------- src/sources/soundsourceffmpeg.h | 47 ++++-- 2 files changed, 157 insertions(+), 129 deletions(-) diff --git a/src/sources/soundsourceffmpeg.cpp b/src/sources/soundsourceffmpeg.cpp index 3c0a9ec881e..41141ee6990 100644 --- a/src/sources/soundsourceffmpeg.cpp +++ b/src/sources/soundsourceffmpeg.cpp @@ -22,8 +22,6 @@ namespace { // FFmpeg constants -constexpr AVSampleFormat kavSampleFormat = AV_SAMPLE_FMT_FLT; - #if LIBAVUTIL_VERSION_INT < AV_VERSION_INT(57, 28, 100) // FFmpeg 5.1 constexpr uint64_t kavChannelLayoutUndefined = 0; #endif @@ -45,19 +43,6 @@ constexpr FrameCount kDefaultFrameBufferCapacity = 48000; constexpr FrameCount kMinFrameBufferCapacity = kDefaultFrameBufferCapacity; -inline FrameCount frameBufferCapacityForStream( - const AVStream& avStream) { - DEBUG_ASSERT(kMinFrameBufferCapacity <= kDefaultFrameBufferCapacity); - if (avStream.codecpar->frame_size > 0) { - return math_max( - static_cast( - avStream.codecpar->frame_size * - kavMaxDecodedFramesPerPacket), - kMinFrameBufferCapacity); - } - return kDefaultFrameBufferCapacity; -} - // "AAC Audio - Encoder Delay and Synchronization: The 2112 Sample Assumption" // https://developer.apple.com/library/ios/technotes/tn2258/_index.html // "It must also be assumed that without an explicit value, the playback @@ -70,47 +55,6 @@ constexpr SINT kMaxSamplesPerMP3Frame = 1152; const Logger kLogger("SoundSourceFFmpeg"); -// FFmpeg API Changes: -// https://github.com/FFmpeg/FFmpeg/blob/master/doc/APIchanges - -#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100) // FFmpeg 5.1 -void initChannelLayoutFromStream( - AVChannelLayout* pUninitializedChannelLayout, - const AVStream& avStream) { - if (avStream.codecpar->ch_layout.order == AV_CHANNEL_ORDER_UNSPEC) { - // Workaround: FFmpeg sometimes fails to determine the channel - // layout, e.g. for a mono WAV files with a single channel! - av_channel_layout_default(pUninitializedChannelLayout, - avStream.codecpar->ch_layout.nb_channels); - kLogger.info() - << "Unknown channel layout -> using default layout" - << pUninitializedChannelLayout->order - << "for" - << avStream.codecpar->ch_layout.nb_channels - << "channel(s)"; - } else { - av_channel_layout_default(pUninitializedChannelLayout, 0); - av_channel_layout_copy(pUninitializedChannelLayout, &avStream.codecpar->ch_layout); - } -} -#else -int64_t getStreamChannelLayout(const AVStream& avStream) { - auto channel_layout = avStream.codecpar->channel_layout; - if (channel_layout == kavChannelLayoutUndefined) { - // Workaround: FFmpeg sometimes fails to determine the channel - // layout, e.g. for a mono WAV files with a single channel! - channel_layout = av_get_default_channel_layout(avStream.codecpar->channels); - kLogger.info() - << "Unknown channel layout -> using default layout" - << channel_layout - << "for" - << avStream.codecpar->channels - << "channel(s)"; - } - return channel_layout; -} -#endif - int64_t getStreamStartTime(const AVStream& avStream) { auto start_time = avStream.start_time; if (start_time == AV_NOPTS_VALUE) { @@ -167,15 +111,95 @@ inline int64_t convertFrameIndexToStreamTime(const AVStream& avStream, SINT fram avStream.time_base); } -IndexRange getStreamFrameIndexRange(const AVStream& avStream) { - const auto frameIndexRange = IndexRange::between( - convertStreamTimeToFrameIndex(avStream, getStreamStartTime(avStream)), - convertStreamTimeToFrameIndex(avStream, getStreamEndTime(avStream))); - DEBUG_ASSERT(frameIndexRange.orientation() != IndexRange::Orientation::Backward); - return frameIndexRange; +#if VERBOSE_DEBUG_LOG +inline void avTrace(const char* preamble, const AVPacket& avPacket) { + kLogger.debug() + << preamble + << "{ stream_index" << avPacket.stream_index + << "| pos" << avPacket.pos + << "| size" << avPacket.size + << "| dst" << avPacket.dts + << "| pts" << avPacket.pts + << "| duration" << avPacket.duration + << '}'; +} + +inline void avTrace(const char* preamble, const AVFrame& avFrame) { + kLogger.debug() + << preamble + << "{ channels" << avFrame.channels + << "| channel_layout" << avFrame.channel_layout + << "| format" << avFrame.format + << "| sample_rate" << avFrame.sample_rate + << "| pkt_dts" << avFrame.pkt_dts + << "| pkt_duration" << avFrame.pkt_duration + << "| pts" << avFrame.pts + << "| nb_samples" << avFrame.nb_samples + << '}'; } +#endif // VERBOSE_DEBUG_LOG -SINT getStreamSeekPrerollFrameCount(const AVStream& avStream) { +} // anonymous namespace + +// FFmpeg API Changes: +// https://github.com/FFmpeg/FFmpeg/blob/master/doc/APIchanges + +#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100) // FFmpeg 5.1 +// Static +void SoundSourceFFmpeg::initChannelLayoutFromStream( + AVChannelLayout* pUninitializedChannelLayout, + const AVStream& avStream) { + if (avStream.codecpar->ch_layout.order == AV_CHANNEL_ORDER_UNSPEC) { + // Workaround: FFmpeg sometimes fails to determine the channel + // layout, e.g. for a mono WAV files with a single channel! + av_channel_layout_default(pUninitializedChannelLayout, + avStream.codecpar->ch_layout.nb_channels); + kLogger.info() + << "Unknown channel layout -> using default layout" + << pUninitializedChannelLayout->order + << "for" + << avStream.codecpar->ch_layout.nb_channels + << "channel(s)"; + } else { + av_channel_layout_default(pUninitializedChannelLayout, 0); + av_channel_layout_copy(pUninitializedChannelLayout, &avStream.codecpar->ch_layout); + } +} +#else +// Static +int64_t SoundSourceFFmpeg::getStreamChannelLayout(const AVStream& avStream) { + auto channel_layout = avStream.codecpar->channel_layout; + if (channel_layout == kavChannelLayoutUndefined) { + // Workaround: FFmpeg sometimes fails to determine the channel + // layout, e.g. for a mono WAV files with a single channel! + channel_layout = av_get_default_channel_layout(avStream.codecpar->channels); + kLogger.info() + << "Unknown channel layout -> using default layout" + << channel_layout + << "for" + << avStream.codecpar->channels + << "channel(s)"; + } + return channel_layout; +} +#endif + +// Static +FrameCount SoundSourceFFmpeg::frameBufferCapacityForStream( + const AVStream& avStream) { + DEBUG_ASSERT(kMinFrameBufferCapacity <= kDefaultFrameBufferCapacity); + if (avStream.codecpar->frame_size > 0) { + return math_max( + static_cast( + avStream.codecpar->frame_size * + kavMaxDecodedFramesPerPacket), + kMinFrameBufferCapacity); + } + return kDefaultFrameBufferCapacity; +} + +// Static +SINT SoundSourceFFmpeg::getStreamSeekPrerollFrameCount(const AVStream& avStream) { // Stream might not provide an appropriate value that is // sufficient for sample accurate decoding const SINT defaultSeekPrerollFrameCount = @@ -218,7 +242,34 @@ SINT getStreamSeekPrerollFrameCount(const AVStream& avStream) { } } -inline QString formatErrorString(int errnum) { +// Static +IndexRange SoundSourceFFmpeg::getStreamFrameIndexRange(const AVStream& avStream) { + const auto frameIndexRange = IndexRange::between( + convertStreamTimeToFrameIndex(avStream, getStreamStartTime(avStream)), + convertStreamTimeToFrameIndex(avStream, getStreamEndTime(avStream))); + DEBUG_ASSERT(frameIndexRange.orientation() != IndexRange::Orientation::Backward); + return frameIndexRange; +} + +// Static +bool SoundSourceFFmpeg::openDecodingContext( + AVCodecContext* pavCodecContext) { + DEBUG_ASSERT(pavCodecContext != nullptr); + + const int avcodec_open2_result = + avcodec_open2(pavCodecContext, pavCodecContext->codec, nullptr); + if (avcodec_open2_result != 0) { + DEBUG_ASSERT(avcodec_open2_result < 0); + kLogger.warning().noquote() + << "avcodec_open2() failed:" + << SoundSourceFFmpeg::formatErrorString(avcodec_open2_result); + return false; + } + return true; +} + +// Static +QString SoundSourceFFmpeg::formatErrorString(int errnum) { // Allocate a static buffer on the stack and initialize it // with a `\0` terminator for extra safety if av_strerror() // unexpectedly fails and does nothing. @@ -230,35 +281,8 @@ inline QString formatErrorString(int errnum) { return QString::fromLocal8Bit(errbuf); } -#if VERBOSE_DEBUG_LOG -inline void avTrace(const char* preamble, const AVPacket& avPacket) { - kLogger.debug() - << preamble - << "{ stream_index" << avPacket.stream_index - << "| pos" << avPacket.pos - << "| size" << avPacket.size - << "| dst" << avPacket.dts - << "| pts" << avPacket.pts - << "| duration" << avPacket.duration - << '}'; -} - -inline void avTrace(const char* preamble, const AVFrame& avFrame) { - kLogger.debug() - << preamble - << "{ channels" << avFrame.channels - << "| channel_layout" << avFrame.channel_layout - << "| format" << avFrame.format - << "| sample_rate" << avFrame.sample_rate - << "| pkt_dts" << avFrame.pkt_dts - << "| pkt_duration" << avFrame.pkt_duration - << "| pts" << avFrame.pts - << "| nb_samples" << avFrame.nb_samples - << '}'; -} -#endif // VERBOSE_DEBUG_LOG - -AVFormatContext* openInputFile( +// Static +AVFormatContext* SoundSourceFFmpeg::openInputFile( const QString& fileName) { // Will be allocated implicitly when opening the input file AVFormatContext* pavInputFormatContext = nullptr; @@ -277,23 +301,6 @@ AVFormatContext* openInputFile( return pavInputFormatContext; } -bool openDecodingContext( - AVCodecContext* pavCodecContext) { - DEBUG_ASSERT(pavCodecContext != nullptr); - - const int avcodec_open2_result = avcodec_open2(pavCodecContext, pavCodecContext->codec, nullptr); - if (avcodec_open2_result != 0) { - DEBUG_ASSERT(avcodec_open2_result < 0); - kLogger.warning().noquote() - << "avcodec_open2() failed:" - << formatErrorString(avcodec_open2_result); - return false; - } - return true; -} - -} // anonymous namespace - void SoundSourceFFmpeg::InputAVFormatContextPtr::take( AVFormatContext** ppavInputFormatContext) { DEBUG_ASSERT(ppavInputFormatContext != nullptr); @@ -477,10 +484,10 @@ QString SoundSourceProviderFFmpeg::getVersionString() const { SoundSourceFFmpeg::SoundSourceFFmpeg(const QUrl& url) : SoundSource(url), m_pavStream(nullptr), - m_pavPacket(av_packet_alloc()), m_pavDecodedFrame(nullptr), - m_pavResampledFrame(nullptr), m_seekPrerollFrameCount(0), + m_pavPacket(av_packet_alloc()), + m_pavResampledFrame(nullptr), m_avutilVersion(avutil_version()) { DEBUG_ASSERT(m_pavPacket); #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100) // FFmpeg 5.1 @@ -594,7 +601,7 @@ SoundSource::OpenResult SoundSourceFFmpeg::tryOpen( } // Request output format - pavCodecContext->request_sample_fmt = kavSampleFormat; + pavCodecContext->request_sample_fmt = s_avSampleFormat; if (params.getSignalInfo().getChannelCount().isValid()) { // A dedicated number of channels for the output signal // has been requested. Forward this to FFmpeg to avoid @@ -767,7 +774,7 @@ bool SoundSourceFFmpeg::initResampling( const auto avStreamSampleFormat = m_pavCodecContext->sample_fmt; const auto avResampledSampleFormat = - kavSampleFormat; + s_avSampleFormat; // NOTE(uklotzde): We prefer not to change adjust sample rate here, because // all the frame calculations while decoding use the frame information // from the underlying stream! We only need resampling for up-/downsampling @@ -904,7 +911,7 @@ SINT readNextPacket( } else { kLogger.warning().noquote() << "av_read_frame() failed:" - << formatErrorString(av_read_frame_result); + << SoundSourceFFmpeg::formatErrorString(av_read_frame_result); return ReadAheadFrameBuffer::kInvalidFrameIndex; } } @@ -1039,7 +1046,7 @@ const CSAMPLE* SoundSourceFFmpeg::resampleDecodedAVFrame() { if (m_pSwrContext) { // Decoded frame must be resampled before reading m_pavResampledFrame->sample_rate = getSignalInfo().getSampleRate(); - m_pavResampledFrame->format = kavSampleFormat; + m_pavResampledFrame->format = s_avSampleFormat; #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100) // FFmpeg 5.1 av_channel_layout_copy(&m_pavResampledFrame->ch_layout, &m_avResampledChannelLayout); if (m_pavDecodedFrame->ch_layout.order == AV_CHANNEL_ORDER_UNSPEC) { diff --git a/src/sources/soundsourceffmpeg.h b/src/sources/soundsourceffmpeg.h index 4e0057b8407..a16d4db4edd 100644 --- a/src/sources/soundsourceffmpeg.h +++ b/src/sources/soundsourceffmpeg.h @@ -20,18 +20,17 @@ class SoundSourceFFmpeg : public SoundSource { void close() override; + static QString formatErrorString(int errnum); + protected: ReadableSampleFrames readSampleFramesClamped( const WritableSampleFrames& sampleFrames) override; - private: - OpenResult tryOpen( + virtual OpenResult tryOpen( OpenMode mode, const OpenParams& params) override; - bool initResampling( - audio::ChannelCount* pResampledChannelCount, - audio::SampleRate* pResampledSampleRate); + private: const CSAMPLE* resampleDecodedAVFrame(); // Seek to the requested start index (if needed) or return false @@ -84,10 +83,8 @@ class SoundSourceFFmpeg : public SoundSource { private: AVFormatContext* m_pavInputFormatContext; }; - InputAVFormatContextPtr m_pavInputFormatContext; - - AVStream* m_pavStream; + protected: // Takes ownership of an opened (audio) codec context and ensures that // the corresponding AVCodecContext is closed, either explicitly or // implicitly by the destructor. The wrapper can only be moved, @@ -131,7 +128,36 @@ class SoundSourceFFmpeg : public SoundSource { private: AVCodecContext* m_pavCodecContext; }; + + bool initResampling( + audio::ChannelCount* pResampledChannelCount, + audio::SampleRate* pResampledSampleRate); + + public: + // The following static functions are used by children classes + static AVFormatContext* openInputFile(const QString& fileName); + static bool openDecodingContext(AVCodecContext* pavCodecContext); +#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100) // FFmpeg 5.1 + static void initChannelLayoutFromStream( + AVChannelLayout* pUninitializedChannelLayout, + const AVStream& avStream); +#else + static int64_t getStreamChannelLayout(const AVStream& avStream); +#endif + static IndexRange getStreamFrameIndexRange(const AVStream& avStream); + static SINT getStreamSeekPrerollFrameCount(const AVStream& avStream); + static FrameCount frameBufferCapacityForStream(const AVStream& avStream); + + protected: + InputAVFormatContextPtr m_pavInputFormatContext; + AVStream* m_pavStream; AVCodecContextPtr m_pavCodecContext; + AVFrame* m_pavDecodedFrame; + FrameCount m_seekPrerollFrameCount; + ReadAheadFrameBuffer m_frameBuffer; + + // FFmpeg static constants + static constexpr AVSampleFormat s_avSampleFormat = AV_SAMPLE_FMT_FLT; // Resampler class SwrContextPtr final { @@ -184,13 +210,8 @@ class SoundSourceFFmpeg : public SoundSource { AVPacket* m_pavPacket; - AVFrame* m_pavDecodedFrame; AVFrame* m_pavResampledFrame; - FrameCount m_seekPrerollFrameCount; - - ReadAheadFrameBuffer m_frameBuffer; - const unsigned int m_avutilVersion; }; From 715210a2889c6cc239593b02ab5b62a3ed69709e Mon Sep 17 00:00:00 2001 From: Antoine C Date: Sat, 13 Apr 2024 12:14:39 +0100 Subject: [PATCH 2/3] Use QString instead of char ptr --- src/sources/soundsourceffmpeg.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/sources/soundsourceffmpeg.cpp b/src/sources/soundsourceffmpeg.cpp index 41141ee6990..5cd14163444 100644 --- a/src/sources/soundsourceffmpeg.cpp +++ b/src/sources/soundsourceffmpeg.cpp @@ -112,7 +112,7 @@ inline int64_t convertFrameIndexToStreamTime(const AVStream& avStream, SINT fram } #if VERBOSE_DEBUG_LOG -inline void avTrace(const char* preamble, const AVPacket& avPacket) { +inline void avTrace(const QString& preamble, const AVPacket& avPacket) { kLogger.debug() << preamble << "{ stream_index" << avPacket.stream_index @@ -124,7 +124,7 @@ inline void avTrace(const char* preamble, const AVPacket& avPacket) { << '}'; } -inline void avTrace(const char* preamble, const AVFrame& avFrame) { +inline void avTrace(const QString& preamble, const AVFrame& avFrame) { kLogger.debug() << preamble << "{ channels" << avFrame.channels From c98b92ad88aa903c3392ed5f20cd96d2a376596e Mon Sep 17 00:00:00 2001 From: Antoine C Date: Sat, 13 Apr 2024 20:31:19 +0100 Subject: [PATCH 3/3] Add comment for static method public visibility --- src/sources/soundsourceffmpeg.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/sources/soundsourceffmpeg.h b/src/sources/soundsourceffmpeg.h index a16d4db4edd..0d99a74d6ea 100644 --- a/src/sources/soundsourceffmpeg.h +++ b/src/sources/soundsourceffmpeg.h @@ -134,7 +134,8 @@ class SoundSourceFFmpeg : public SoundSource { audio::SampleRate* pResampledSampleRate); public: - // The following static functions are used by children classes + // The following static functions are used by children and closely related + // classes, this is why these static methods aren't defined as protected. static AVFormatContext* openInputFile(const QString& fileName); static bool openDecodingContext(AVCodecContext* pavCodecContext); #if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 28, 100) // FFmpeg 5.1