From ed67fcdaf83802512627390be6a7230b25c95a0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Damstedt=20Rasmussen?= Date: Wed, 6 Nov 2024 22:48:09 +0100 Subject: [PATCH] Adapt to WebRTC r6818 Voice detection and level estimation support have been removed in WebRTC audio processing. Voice detection is disabled by macro ENABLE_WEBRTC_VOICEDETECTION. Level estimation is disabled by macro ENABLE_WEBRTC_LEVELESTIMATION. --- Library/TeamTalkLib/avstream/MediaPlayback.cpp | 4 ++-- Library/TeamTalkLib/avstream/MediaPlayback.h | 2 +- Library/TeamTalkLib/avstream/SoundLoopback.cpp | 6 +++--- Library/TeamTalkLib/avstream/SoundLoopback.h | 2 +- .../TeamTalkLib/avstream/WebRTCPreprocess.cpp | 9 +++++++-- Library/TeamTalkLib/avstream/WebRTCPreprocess.h | 2 +- Library/TeamTalkLib/bin/dll/Convert.cpp | 16 ++++++++++++++-- .../TeamTalkLib/teamtalk/client/AudioThread.cpp | 17 +++++++++-------- .../TeamTalkLib/teamtalk/client/AudioThread.h | 2 +- 9 files changed, 39 insertions(+), 21 deletions(-) diff --git a/Library/TeamTalkLib/avstream/MediaPlayback.cpp b/Library/TeamTalkLib/avstream/MediaPlayback.cpp index 75b5217674..235578aaf2 100644 --- a/Library/TeamTalkLib/avstream/MediaPlayback.cpp +++ b/Library/TeamTalkLib/avstream/MediaPlayback.cpp @@ -199,12 +199,12 @@ bool MediaPlayback::SetupSpeexPreprocess(bool enableagc, const SpeexAGC& agc, bool MediaPlayback::SetupWebRTCPreprocess(const webrtc::AudioProcessing::Config& webrtc) { if (!m_apm) - m_apm.reset(webrtc::AudioProcessingBuilder().Create()); + m_apm = webrtc::AudioProcessingBuilder().Create(); m_apm->ApplyConfig(webrtc); if (m_apm->Initialize() != webrtc::AudioProcessing::kNoError) { - m_apm.reset(); + m_apm.release(); return false; } return true; diff --git a/Library/TeamTalkLib/avstream/MediaPlayback.h b/Library/TeamTalkLib/avstream/MediaPlayback.h index 219cc78ef5..25aa42bb3a 100644 --- a/Library/TeamTalkLib/avstream/MediaPlayback.h +++ b/Library/TeamTalkLib/avstream/MediaPlayback.h @@ -134,7 +134,7 @@ class MediaPlayback : public soundsystem::StreamPlayer std::shared_ptr m_preprocess_left, m_preprocess_right; #endif #if defined(ENABLE_WEBRTC) - std::unique_ptr m_apm; + rtc::scoped_refptr m_apm; #endif bool m_last_callback = false; ACE_Future m_drained; diff --git a/Library/TeamTalkLib/avstream/SoundLoopback.cpp b/Library/TeamTalkLib/avstream/SoundLoopback.cpp index 29814b5c4e..9fc3eded5b 100644 --- a/Library/TeamTalkLib/avstream/SoundLoopback.cpp +++ b/Library/TeamTalkLib/avstream/SoundLoopback.cpp @@ -120,7 +120,7 @@ bool SoundLoopback::StartTest(int inputdevid, int outputdevid, #if defined(ENABLE_WEBRTC) if (IsEnabled(apm_cfg)) { - m_apm.reset(webrtc::AudioProcessingBuilder().Create()); + m_apm = webrtc::AudioProcessingBuilder().Create(); if (!m_apm) { StopTest(); @@ -214,7 +214,7 @@ bool SoundLoopback::StartDuplexTest(int inputdevid, int outputdevid, #if defined(ENABLE_WEBRTC) if (IsEnabled(apm_cfg)) { - m_apm.reset(webrtc::AudioProcessingBuilder().Create()); + m_apm = webrtc::AudioProcessingBuilder().Create(); if (!m_apm) { StopTest(); @@ -254,7 +254,7 @@ bool SoundLoopback::StopTest() #endif #if defined(ENABLE_WEBRTC) - m_apm.reset(); + m_apm.release(); #endif m_preprocess_buffer_left.clear(); m_preprocess_buffer_right.clear(); diff --git a/Library/TeamTalkLib/avstream/SoundLoopback.h b/Library/TeamTalkLib/avstream/SoundLoopback.h index dfc6f9a765..a116015bc5 100644 --- a/Library/TeamTalkLib/avstream/SoundLoopback.h +++ b/Library/TeamTalkLib/avstream/SoundLoopback.h @@ -111,7 +111,7 @@ class SoundLoopback SpeexPreprocess m_preprocess_left, m_preprocess_right; #endif #if defined(ENABLE_WEBRTC) - std::unique_ptr m_apm; + rtc::scoped_refptr m_apm; std::vector m_prev_buffer; #endif std::vector m_preprocess_buffer_left, m_preprocess_buffer_right; diff --git a/Library/TeamTalkLib/avstream/WebRTCPreprocess.cpp b/Library/TeamTalkLib/avstream/WebRTCPreprocess.cpp index 9d35999599..0d5cef5992 100644 --- a/Library/TeamTalkLib/avstream/WebRTCPreprocess.cpp +++ b/Library/TeamTalkLib/avstream/WebRTCPreprocess.cpp @@ -30,8 +30,9 @@ // webrtc::GainControlImpl queries this feature. Field trials is // excluded by passing rtc_exclude_field_trial_default=true to GN. namespace webrtc { namespace field_trial { -std::string FindFullName(const std::string& trial) +std::string FindFullName(absl::string_view trial_) { + std::string trial(trial_);; #if defined(UNICODE) ACE_TString str = LocalToUnicode(trial.c_str()); #else @@ -82,7 +83,7 @@ int WebRTCPreprocess(webrtc::AudioProcessing& apm, const media::AudioFrame& infr } // AudioProcessingStats - int output_rms_dbfs = 0; + int output_rms_dbfs = 0; bool voice_detected = false; int in_index = 0, out_index = 0, n = 0; @@ -116,9 +117,11 @@ int WebRTCPreprocess(webrtc::AudioProcessing& apm, const media::AudioFrame& infr if (stats) { auto wstats = apm.GetStatistics(); +#if defined(ENABLE_WEBRTC_VOICEDETECTION) output_rms_dbfs += wstats.output_rms_dbfs.value_or(0); assert(!wstats.output_rms_dbfs.has_value() || wstats.output_rms_dbfs.value() <= 127); assert(!wstats.output_rms_dbfs.has_value() || wstats.output_rms_dbfs.value() >= 0); +#endif voice_detected |= wstats.voice_detected.value_or(false); } @@ -129,7 +132,9 @@ int WebRTCPreprocess(webrtc::AudioProcessing& apm, const media::AudioFrame& infr if (stats && n > 0) { +#if defined(ENABLE_WEBRTC_VOICEDETECTION) stats->output_rms_dbfs = output_rms_dbfs / n; +#endif stats->voice_detected = voice_detected; } diff --git a/Library/TeamTalkLib/avstream/WebRTCPreprocess.h b/Library/TeamTalkLib/avstream/WebRTCPreprocess.h index eb11137630..2bc317d172 100644 --- a/Library/TeamTalkLib/avstream/WebRTCPreprocess.h +++ b/Library/TeamTalkLib/avstream/WebRTCPreprocess.h @@ -25,10 +25,10 @@ #define WEBRTCPREPROCESS_H #include +#include #include -#include bool IsEnabled(const webrtc::AudioProcessing::Config& cfg); diff --git a/Library/TeamTalkLib/bin/dll/Convert.cpp b/Library/TeamTalkLib/bin/dll/Convert.cpp index a18d65213a..7dfa4cc524 100644 --- a/Library/TeamTalkLib/bin/dll/Convert.cpp +++ b/Library/TeamTalkLib/bin/dll/Convert.cpp @@ -1067,17 +1067,23 @@ void Convert(const WebRTCAudioPreprocessor& webrtc, webrtc::AudioProcessing::Con break; } +#if defined(ENABLE_WEBRTC_VOICEDETECTION) result.voice_detection.enabled = webrtc.voicedetection.bEnable; - +#endif + result.gain_controller2.enabled = webrtc.gaincontroller2.bEnable; result.gain_controller2.fixed_digital.gain_db = webrtc.gaincontroller2.fixeddigital.fGainDB; result.gain_controller2.adaptive_digital.enabled = webrtc.gaincontroller2.adaptivedigital.bEnable; +#if defined(ENABLE_WEBRTC_GAINCONTROLLER2) result.gain_controller2.adaptive_digital.initial_saturation_margin_db = webrtc.gaincontroller2.adaptivedigital.fInitialSaturationMarginDB; result.gain_controller2.adaptive_digital.extra_saturation_margin_db = webrtc.gaincontroller2.adaptivedigital.fExtraSaturationMarginDB; +#endif result.gain_controller2.adaptive_digital.max_gain_change_db_per_second = webrtc.gaincontroller2.adaptivedigital.fMaxGainChangeDBPerSecond; result.gain_controller2.adaptive_digital.max_output_noise_level_dbfs = webrtc.gaincontroller2.adaptivedigital.fMaxOutputNoiseLevelDBFS; +#if defined(ENABLE_WEBRTC_LEVELESTIMATION) result.level_estimation.enabled = webrtc.levelestimation.bEnable; +#endif } void Convert(const webrtc::AudioProcessing::Config& cfg, WebRTCAudioPreprocessor& result) @@ -1086,21 +1092,27 @@ void Convert(const webrtc::AudioProcessing::Config& cfg, WebRTCAudioPreprocessor result.preamplifier.fFixedGainFactor = cfg.pre_amplifier.fixed_gain_factor; result.echocanceller.bEnable = cfg.echo_canceller.enabled; - + result.noisesuppression.bEnable = cfg.noise_suppression.enabled; result.noisesuppression.nLevel = cfg.noise_suppression.level; +#if defined(ENABLE_WEBRTC_VOICEDETECTION) result.voicedetection.bEnable = cfg.voice_detection.enabled; +#endif result.gaincontroller2.bEnable = cfg.gain_controller2.enabled; result.gaincontroller2.fixeddigital.fGainDB = cfg.gain_controller2.fixed_digital.gain_db; result.gaincontroller2.adaptivedigital.bEnable = cfg.gain_controller2.adaptive_digital.enabled; +#if defined(ENABLE_WEBRTC_GAINCONTROLLER2) result.gaincontroller2.adaptivedigital.fInitialSaturationMarginDB = cfg.gain_controller2.adaptive_digital.initial_saturation_margin_db; result.gaincontroller2.adaptivedigital.fExtraSaturationMarginDB = cfg.gain_controller2.adaptive_digital.extra_saturation_margin_db; +#endif result.gaincontroller2.adaptivedigital.fMaxGainChangeDBPerSecond = cfg.gain_controller2.adaptive_digital.max_gain_change_db_per_second; result.gaincontroller2.adaptivedigital.fMaxOutputNoiseLevelDBFS = cfg.gain_controller2.adaptive_digital.max_output_noise_level_dbfs; +#if defined(ENABLE_WEBRTC_LEVELESTIMATION) result.levelestimation.bEnable = cfg.level_estimation.enabled; +#endif } #endif diff --git a/Library/TeamTalkLib/teamtalk/client/AudioThread.cpp b/Library/TeamTalkLib/teamtalk/client/AudioThread.cpp index 616ec7d64b..ef95af6eec 100644 --- a/Library/TeamTalkLib/teamtalk/client/AudioThread.cpp +++ b/Library/TeamTalkLib/teamtalk/client/AudioThread.cpp @@ -189,7 +189,7 @@ void AudioThread::StopEncoder() #endif #if defined(ENABLE_WEBRTC) - m_apm.reset(); + m_apm.release(); m_aps.reset(); #endif @@ -235,7 +235,7 @@ bool AudioThread::UpdatePreprocessor(const teamtalk::AudioPreprocessor& preproce #if defined(ENABLE_WEBRTC) if (preprocess.preprocessor != AUDIOPREPROCESSOR_WEBRTC) { - m_apm.reset(); + m_apm.release(); m_aps.reset(); } #endif @@ -267,11 +267,11 @@ bool AudioThread::UpdatePreprocessor(const teamtalk::AudioPreprocessor& preproce } if (!m_apm) - m_apm.reset(webrtc::AudioProcessingBuilder().Create()); + m_apm = webrtc::AudioProcessingBuilder().Create(); m_apm->ApplyConfig(preprocess.webrtc); if (m_apm->Initialize() != webrtc::AudioProcessing::kNoError) { - m_apm.reset(); + m_apm.release(); MYTRACE(ACE_TEXT("Failed to initialize WebRTC audio preprocessor\n")); return false; } @@ -424,7 +424,7 @@ void AudioThread::QueueAudio(ACE_Message_Block* mb_audio) bool AudioThread::IsVoiceActive() { -#if defined(ENABLE_WEBRTC) +#if defined(ENABLE_WEBRTC_VOICEDETECTION) std::unique_lock g(m_preprocess_lock); if (m_apm && m_apm->GetConfig().voice_detection.enabled) @@ -440,14 +440,13 @@ bool AudioThread::IsVoiceActive() int AudioThread::GetCurrentVoiceLevel() { -#if defined(ENABLE_WEBRTC) +#if defined(ENABLE_WEBRTC_LEVELESTIMATION) std::unique_lock g(m_preprocess_lock); if (m_apm) { assert(m_aps); - auto cfg = m_apm->GetConfig(); - if (cfg.level_estimation.enabled) + if (m_apm->GetConfig().level_estimation.enabled) { // WebRTC's maximum value for dB from digital full scale float value = 127.f - m_aps->output_rms_dbfs.value_or(0); @@ -693,6 +692,7 @@ void AudioThread::PreprocessWebRTC(media::AudioFrame& audblock, bool& vad) MYTRACE(ACE_TEXT("WebRTC failed to process audio\n")); } +#if defined(ENABLE_WEBRTC_VOICEDETECTION) vad = m_apm->GetConfig().voice_detection.enabled; if (vad) { @@ -700,6 +700,7 @@ void AudioThread::PreprocessWebRTC(media::AudioFrame& audblock, bool& vad) if (m_aps->voice_detected.value_or(false)) m_lastActive = ACE_OS::gettimeofday(); } +#endif } #endif diff --git a/Library/TeamTalkLib/teamtalk/client/AudioThread.h b/Library/TeamTalkLib/teamtalk/client/AudioThread.h index a6cd390704..e5e9c88b88 100644 --- a/Library/TeamTalkLib/teamtalk/client/AudioThread.h +++ b/Library/TeamTalkLib/teamtalk/client/AudioThread.h @@ -112,7 +112,7 @@ class AudioThread : protected ACE_Task std::unique_ptr m_preprocess_left, m_preprocess_right; #endif #if defined(ENABLE_WEBRTC) - std::unique_ptr m_apm; + rtc::scoped_refptr m_apm; std::unique_ptr m_aps; #endif #if defined(ENABLE_SPEEX)