diff --git a/include/dpp/cluster.h b/include/dpp/cluster.h index 2aa1e78129..4825fe497f 100644 --- a/include/dpp/cluster.h +++ b/include/dpp/cluster.h @@ -1334,17 +1334,6 @@ class DPP_EXPORT cluster { event_router_t on_voice_buffer_send; - /** - * @brief Called when a user is talking on a voice channel. - * - * @warning If the cache policy has disabled guild caching, the pointer to the guild in this event may be nullptr. - * - * @note Use operator() to attach a lambda to this event, and the detach method to detach the listener using the returned ID. - * The function signature for this event takes a single `const` reference of type voice_user_talking_t&, and returns void. - */ - event_router_t on_voice_user_talking; - - /** * @brief Called when a voice channel is connected and ready to send audio. * Note that this is not directly attached to the READY event of the websocket, diff --git a/include/dpp/discordvoiceclient.h b/include/dpp/discordvoiceclient.h index 658ba6422a..1330b7e0b2 100644 --- a/include/dpp/discordvoiceclient.h +++ b/include/dpp/discordvoiceclient.h @@ -240,6 +240,11 @@ class DPP_EXPORT discord_voice_client : public websocket_client */ void cleanup(); + /** + * @brief A frame of silence packet + */ + static constexpr uint8_t silence_packet[3] = { 0xf8, 0xff, 0xfe }; + /** * @brief Mutex for outbound packet stream */ @@ -434,6 +439,13 @@ class DPP_EXPORT discord_voice_client : public websocket_client */ bool paused; + /** + * @brief Whether has sent 5 frame of silence before stopping on pause. + * + * This is to avoid unintended Opus interpolation with subsequent transmissions. + */ + bool sent_stop_frames; + #ifdef HAVE_VOICE /** * @brief libopus encoder @@ -650,8 +662,10 @@ class DPP_EXPORT discord_voice_client : public websocket_client * @param packet packet data * @param len length of packet * @param duration duration of opus packet + * @param send_now send this packet right away without buffering. + * Do NOT set send_now to true outside write_ready. */ - void send(const char* packet, size_t len, uint64_t duration); + void send(const char* packet, size_t len, uint64_t duration, bool send_now = false); /** * @brief Queue a message to be sent via the websocket @@ -962,6 +976,10 @@ class DPP_EXPORT discord_voice_client : public websocket_client * @param duration Generally duration is 2.5, 5, 10, 20, 40 or 60 * if the timescale is 1000000 (1ms) * + * @param send_now Send this packet right away without buffering, + * this will skip duration calculation for the packet being sent + * and only safe to be set to true in write_ready. + * * @return discord_voice_client& Reference to self * * @note It is your responsibility to ensure that packets of data @@ -972,7 +990,7 @@ class DPP_EXPORT discord_voice_client : public websocket_client * * @throw dpp::voice_exception If data length is invalid or voice support not compiled into D++ */ - discord_voice_client& send_audio_opus(uint8_t* opus_packet, const size_t length, uint64_t duration); + discord_voice_client& send_audio_opus(const uint8_t* opus_packet, const size_t length, uint64_t duration, bool send_now = false); /** * @brief Send opus packets to the voice channel @@ -999,7 +1017,7 @@ class DPP_EXPORT discord_voice_client : public websocket_client * * @throw dpp::voice_exception If data length is invalid or voice support not compiled into D++ */ - discord_voice_client& send_audio_opus(uint8_t* opus_packet, const size_t length); + discord_voice_client& send_audio_opus(const uint8_t* opus_packet, const size_t length); /** * @brief Send silence to the voice channel @@ -1012,6 +1030,19 @@ class DPP_EXPORT discord_voice_client : public websocket_client */ discord_voice_client& send_silence(const uint64_t duration); + /** + * @brief Send stop frames to the voice channel. + * + * @param send_now send this packet right away without buffering. + * Do NOT set send_now to true outside write_ready. + * Also make sure you're not locking stream_mutex if you + * don't set send_now to true. + * + * @return discord_voice_client& Reference to self + * @throw dpp::voice_exception if voice support is not compiled into D++ + */ + discord_voice_client& send_stop_frames(bool send_now = false); + /** * @brief Sets the audio type that will be sent with send_audio_* methods. * diff --git a/include/dpp/dispatcher.h b/include/dpp/dispatcher.h index 151c2e8880..749f3a0454 100644 --- a/include/dpp/dispatcher.h +++ b/include/dpp/dispatcher.h @@ -1989,30 +1989,7 @@ struct DPP_EXPORT voice_buffer_send_t : public event_dispatch_t { }; /** - * @brief voice user talking - */ -struct DPP_EXPORT voice_user_talking_t : public event_dispatch_t { - using event_dispatch_t::event_dispatch_t; - using event_dispatch_t::operator=; - - /** - * @brief voice client where user is talking - */ - class discord_voice_client* voice_client = nullptr; - - /** - * @brief talking user id - */ - snowflake user_id = {}; - - /** - * @brief flags for talking user - */ - uint8_t talking_flags = 0; -}; - -/** - * @brief voice user talking + * @brief voice ready */ struct DPP_EXPORT voice_ready_t : public event_dispatch_t { using event_dispatch_t::event_dispatch_t; diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp index 1b8fe7b95f..4a4f484968 100644 --- a/src/dpp/discordvoiceclient.cpp +++ b/src/dpp/discordvoiceclient.cpp @@ -20,6 +20,7 @@ * ************************************************************************************/ +#include #ifdef _WIN32 #include #include @@ -148,6 +149,9 @@ bool discord_voice_client::is_end_to_end_encrypted() const { discord_voice_client& discord_voice_client::pause_audio(bool pause) { this->paused = pause; + if (!this->paused) { + this->sent_stop_frames = false; + } return *this; } @@ -172,10 +176,13 @@ dpp::utility::uptime discord_voice_client::get_remaining() { } discord_voice_client& discord_voice_client::stop_audio() { - std::lock_guard lock(this->stream_mutex); - outbuf.clear(); - track_meta.clear(); - tracks = 0; + { + std::lock_guard lock(this->stream_mutex); + outbuf.clear(); + track_meta.clear(); + tracks = 0; + } + this->send_stop_frames(); return *this; } @@ -398,7 +405,6 @@ discord_voice_client& discord_voice_client::skip_to_next_marker() { } discord_voice_client& discord_voice_client::send_silence(const uint64_t duration) { - uint8_t silence_packet[3] = { 0xf8, 0xff, 0xfe }; send_audio_opus(silence_packet, 3, duration); return *this; } @@ -443,4 +449,15 @@ uint16_t discord_voice_client::get_iteration_interval() { return this->iteration_interval; } +discord_voice_client& discord_voice_client::send_stop_frames(bool send_now) { + uint8_t silence_frames[sizeof(silence_packet) / sizeof(*silence_packet) * 5]; + for (size_t i = 0; i < sizeof(silence_frames) / sizeof(*silence_frames); i++) { + silence_frames[i] = silence_packet[i % 3]; + } + + this->send_audio_opus(silence_frames, sizeof(silence_frames) / sizeof(*silence_frames), 20, send_now); + + return *this; +} + } // namespace dpp diff --git a/src/dpp/voice/enabled/courier_loop.cpp b/src/dpp/voice/enabled/courier_loop.cpp index 6526c7f8ba..72ccccdbfc 100644 --- a/src/dpp/voice/enabled/courier_loop.cpp +++ b/src/dpp/voice/enabled/courier_loop.cpp @@ -21,6 +21,7 @@ ************************************************************************************/ #include +#include #include #include #include @@ -34,6 +35,7 @@ namespace dpp { void discord_voice_client::voice_courier_loop(discord_voice_client& client, courier_shared_state_t& shared_state) { utility::set_thread_name(std::string("vcourier/") + std::to_string(client.server_id)); + while (true) { std::this_thread::sleep_for(std::chrono::milliseconds{client.iteration_interval}); @@ -59,13 +61,17 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour bool has_payload_to_deliver = false; for (auto &[user_id, parking_lot]: shared_state.parked_voice_payloads) { has_payload_to_deliver = has_payload_to_deliver || !parking_lot.parked_payloads.empty(); - flush_data.push_back(flush_data_t{user_id, - parking_lot.range.min_seq, - std::move(parking_lot.parked_payloads), + + flush_data.push_back(flush_data_t{ + user_id, + parking_lot.range.min_seq, + std::move(parking_lot.parked_payloads), /* Quickly check if we already have a decoder and only take the pending ctls if so. */ - parking_lot.decoder ? std::move(parking_lot.pending_decoder_ctls) - : decltype(parking_lot.pending_decoder_ctls){}, - parking_lot.decoder}); + parking_lot.decoder ? std::move(parking_lot.pending_decoder_ctls) + : decltype(parking_lot.pending_decoder_ctls){}, + parking_lot.decoder + }); + parking_lot.range.min_seq = parking_lot.range.max_seq + 1; parking_lot.range.min_timestamp = parking_lot.range.max_timestamp + 1; } @@ -76,7 +82,24 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour break; } - shared_state.signal_iteration.wait(lk); + shared_state.signal_iteration.wait(lk, [&shared_state](){ + if (shared_state.terminating) { + return true; + } + + /* + * Actually check the state we're looking for instead of waking up + * everytime read_ready was called. + */ + for (auto &[user_id, parking_lot]: shared_state.parked_voice_payloads) { + if (parking_lot.parked_payloads.empty()) { + continue; + } + return true; + } + return false; + }); + /* * More data came or about to terminate, or just a spurious wake. * We need to collect the payloads again to determine what to do next. @@ -95,13 +118,14 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour /* This 32 bit PCM audio buffer is an upmixed version of the streams * combined for all users. This is a wider width audio buffer so that - * there is no clipping when there are many loud audio sources at once. + * there is no clipping when there are many loud audio sources at once. */ opus_int32 pcm_mix[23040] = {0}; size_t park_count = 0; int max_samples = 0; int samples = 0; + opus_int16 flush_data_pcm[23040]; for (auto &d: flush_data) { if (!d.decoder) { continue; @@ -109,37 +133,133 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour for (const auto &decoder_ctl: d.pending_decoder_ctls) { decoder_ctl(*d.decoder); } + for (rtp_seq_t seq = d.min_seq; !d.parked_payloads.empty(); ++seq) { - opus_int16 pcm[23040]; if (d.parked_payloads.top().seq != seq) { /* * Lost a packet with sequence number "seq", * But Opus decoder might be able to guess something. */ - if (int samples = opus_decode(d.decoder.get(), nullptr, 0, pcm, 5760, 0); - samples >= 0) { + if (int lost_packet_samples = opus_decode(d.decoder.get(), nullptr, 0, flush_data_pcm, 5760, 0); + lost_packet_samples >= 0) { /* * Since this sample comes from a lost packet, * we can only pretend there is an event, without any raw payload byte. */ - voice_receive_t vr(nullptr, "", &client, d.user_id, reinterpret_cast(pcm), - samples * opus_channel_count * sizeof(opus_int16)); + voice_receive_t vr(nullptr, "", &client, d.user_id, + reinterpret_cast(flush_data_pcm), + lost_packet_samples * opus_channel_count * sizeof(opus_int16)); - park_count = audio_mix(client, *client.mixer, pcm_mix, pcm, park_count, samples, max_samples); + park_count = audio_mix(client, *client.mixer, pcm_mix, flush_data_pcm, park_count, lost_packet_samples, max_samples); client.creator->on_voice_receive.call(vr); } } else { voice_receive_t &vr = *d.parked_payloads.top().vr; - if (vr.audio_data.size() > 0x7FFFFFFF) { + + /* + * We do decryption here to avoid blocking ssl_client and saving cpu time by doing it when needed only. + * + * NOTE: You do not want to send audio while also listening for on_voice_receive/on_voice_receive_combined. + * It will cause gaps in your recording, I have no idea why exactly. + */ + + constexpr size_t header_size = 12; + + uint8_t *buffer = vr.audio_data.data(); + size_t packet_size = vr.audio_data.size(); + + constexpr size_t nonce_size = sizeof(uint32_t); + /* Nonce is 4 byte at the end of payload with zero padding */ + uint8_t nonce[24] = { 0 }; + std::memcpy(nonce, buffer + packet_size - nonce_size, nonce_size); + + /* Get the number of CSRC in header */ + const size_t csrc_count = buffer[0] & 0b0000'1111; + /* Skip to the encrypted voice data */ + const ptrdiff_t offset_to_data = header_size + sizeof(uint32_t) * csrc_count; + size_t total_header_len = offset_to_data; + + uint8_t* ciphertext = buffer + offset_to_data; + size_t ciphertext_len = packet_size - offset_to_data - nonce_size; + + size_t ext_len = 0; + if ([[maybe_unused]] const bool uses_extension = (buffer[0] >> 4) & 0b0001) { + /** + * Get the RTP Extensions size, we only get the size here because + * the extension itself is encrypted along with the opus packet + */ + { + uint16_t ext_len_in_words; + memcpy(&ext_len_in_words, &ciphertext[2], sizeof(uint16_t)); + ext_len_in_words = ntohs(ext_len_in_words); + ext_len = sizeof(uint32_t) * ext_len_in_words; + } + constexpr size_t ext_header_len = sizeof(uint16_t) * 2; + ciphertext += ext_header_len; + ciphertext_len -= ext_header_len; + total_header_len += ext_header_len; + } + + uint8_t decrypted[65535] = { 0 }; + unsigned long long opus_packet_len = 0; + if (ssl_crypto_aead_xchacha20poly1305_ietf_decrypt( + decrypted, &opus_packet_len, + nullptr, + ciphertext, ciphertext_len, + buffer, + /** + * Additional Data: + * The whole header (including csrc list) + + * 4 byte extension header (magic 0xBEDE + 16-bit denoting extension length) + */ + total_header_len, + nonce, vr.voice_client->secret_key.data()) != 0) { + /* Invalid Discord RTP payload. */ + return; + } + + uint8_t *opus_packet = decrypted; + if (ext_len > 0) { + /* Skip previously encrypted RTP Header Extension */ + opus_packet += ext_len; + opus_packet_len -= ext_len; + } + + /** + * If DAVE is enabled, use the user's ratchet to decrypt the OPUS audio data + */ + std::vector decrypted_dave_frame; + if (vr.voice_client->is_end_to_end_encrypted()) { + auto decryptor = vr.voice_client->mls_state->decryptors.find(vr.user_id); + + if (decryptor != vr.voice_client->mls_state->decryptors.end()) { + decrypted_dave_frame.resize(decryptor->second->get_max_plaintext_byte_size(dave::media_type::media_audio, opus_packet_len)); + + size_t enc_len = decryptor->second->decrypt( + dave::media_type::media_audio, + dave::make_array_view(opus_packet, opus_packet_len), + dave::make_array_view(decrypted_dave_frame) + ); + + if (enc_len > 0) { + opus_packet = decrypted_dave_frame.data(); + opus_packet_len = enc_len; + } + } + } + + if (opus_packet_len > 0x7FFFFFFF) { throw dpp::length_exception(err_massive_audio, "audio_data > 2GB! This should never happen!"); } - if (samples = opus_decode(d.decoder.get(), vr.audio_data.data(), - static_cast(vr.audio_data.size() & 0x7FFFFFFF), pcm, 5760, 0); - samples >= 0) { - vr.reassign(&client, d.user_id, reinterpret_cast(pcm), - samples * opus_channel_count * sizeof(opus_int16)); + + samples = opus_decode(d.decoder.get(), opus_packet, static_cast(opus_packet_len & 0x7FFFFFFF), flush_data_pcm, 5760, 0); + + if (samples >= 0) { + vr.reassign(&client, d.user_id, reinterpret_cast(flush_data_pcm), samples * opus_channel_count * sizeof(opus_int16)); + client.end_gain = 1.0f / client.moving_average; - park_count = audio_mix(client, *client.mixer, pcm_mix, pcm, park_count, samples, max_samples); + park_count = audio_mix(client, *client.mixer, pcm_mix, flush_data_pcm, park_count, samples, max_samples); + client.creator->on_voice_receive.call(vr); } @@ -147,15 +267,14 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour } } } - /* If combined receive is bound, dispatch it */ if (park_count) { - /* Downsample the 32 bit samples back to 16 bit */ opus_int16 pcm_downsample[23040] = {0}; opus_int16 *pcm_downsample_ptr = pcm_downsample; opus_int32 *pcm_mix_ptr = pcm_mix; client.increment = (client.end_gain - client.current_gain) / static_cast(samples); + for (int64_t x = 0; x < (samples * opus_channel_count) / client.mixer->byte_blocks_per_register; ++x) { client.mixer->collect_single_register(pcm_mix_ptr, pcm_downsample_ptr, client.current_gain, client.increment); client.current_gain += client.increment * static_cast(client.mixer->byte_blocks_per_register); @@ -164,7 +283,7 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour } voice_receive_t vr(nullptr, "", &client, 0, reinterpret_cast(pcm_downsample), - max_samples * opus_channel_count * sizeof(opus_int16)); + max_samples * opus_channel_count * sizeof(opus_int16)); client.creator->on_voice_receive_combined.call(vr); } diff --git a/src/dpp/voice/enabled/handle_frame.cpp b/src/dpp/voice/enabled/handle_frame.cpp index 07423d3377..0b84cc1ca4 100644 --- a/src/dpp/voice/enabled/handle_frame.cpp +++ b/src/dpp/voice/enabled/handle_frame.cpp @@ -411,6 +411,11 @@ bool discord_voice_client::handle_frame(const std::string &data, ws_opcode opcod }); } this->reinit_dave_mls_group(); + + /* Ready now if there's no DAVE user waiting in the vc */ + if (dave_mls_user_list.empty()) { + ready_now = true; + } } } else { /* Non-DAVE ready immediately */ diff --git a/src/dpp/voice/enabled/opus.cpp b/src/dpp/voice/enabled/opus.cpp index d99a3776d0..a92886ac9c 100644 --- a/src/dpp/voice/enabled/opus.cpp +++ b/src/dpp/voice/enabled/opus.cpp @@ -71,14 +71,14 @@ discord_voice_client& discord_voice_client::send_audio_raw(uint16_t* audio_data, return *this; } -discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet, const size_t length) { +discord_voice_client& discord_voice_client::send_audio_opus(const uint8_t* opus_packet, const size_t length) { int samples = opus_packet_get_nb_samples(opus_packet, (opus_int32)length, opus_sample_rate_hz); uint64_t duration = (samples / 48) / (timescale / 1000000); - send_audio_opus(opus_packet, length, duration); + send_audio_opus(opus_packet, length, duration, false); return *this; } -discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet, const size_t length, uint64_t duration) { +discord_voice_client& discord_voice_client::send_audio_opus(const uint8_t* opus_packet, const size_t length, uint64_t duration, bool send_now) { int frame_size = (int)(48 * duration * (timescale / 1000000)); opus_int32 encoded_audio_max_length = (opus_int32)length; std::vector encoded_audio(encoded_audio_max_length); @@ -147,7 +147,7 @@ discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet /* Append the 4 byte nonce to the resulting payload */ std::memcpy(payload.data() + payload.size() - sizeof(noncel), &noncel, sizeof(noncel)); - this->send(reinterpret_cast(payload.data()), payload.size(), duration); + this->send(reinterpret_cast(payload.data()), payload.size(), duration, send_now); timestamp += frame_size; diff --git a/src/dpp/voice/enabled/read_ready.cpp b/src/dpp/voice/enabled/read_ready.cpp index 95ac598160..03c79b0486 100644 --- a/src/dpp/voice/enabled/read_ready.cpp +++ b/src/dpp/voice/enabled/read_ready.cpp @@ -20,6 +20,7 @@ * ************************************************************************************/ +#include #include #include #include @@ -66,8 +67,8 @@ void discord_voice_client::read_ready() } voice_payload vp{0, // seq, populate later - 0, // timestamp, populate later - std::make_unique(nullptr, std::string(reinterpret_cast(buffer), packet_size))}; + 0, // timestamp, populate later + std::make_unique(nullptr, std::string(reinterpret_cast(buffer), packet_size))}; vp.vr->voice_client = this; @@ -86,88 +87,7 @@ void discord_voice_client::read_ready() std::memcpy(&vp.timestamp, &buffer[4], sizeof(rtp_timestamp_t)); vp.timestamp = ntohl(vp.timestamp); - constexpr size_t nonce_size = sizeof(uint32_t); - /* Nonce is 4 byte at the end of payload with zero padding */ - uint8_t nonce[24] = { 0 }; - std::memcpy(nonce, buffer + packet_size - nonce_size, nonce_size); - - /* Get the number of CSRC in header */ - const size_t csrc_count = buffer[0] & 0b0000'1111; - /* Skip to the encrypted voice data */ - const ptrdiff_t offset_to_data = header_size + sizeof(uint32_t) * csrc_count; - size_t total_header_len = offset_to_data; - - uint8_t* ciphertext = buffer + offset_to_data; - size_t ciphertext_len = packet_size - offset_to_data - nonce_size; - - size_t ext_len = 0; - if ([[maybe_unused]] const bool uses_extension = (buffer[0] >> 4) & 0b0001) { - /** - * Get the RTP Extensions size, we only get the size here because - * the extension itself is encrypted along with the opus packet - */ - { - uint16_t ext_len_in_words; - memcpy(&ext_len_in_words, &ciphertext[2], sizeof(uint16_t)); - ext_len_in_words = ntohs(ext_len_in_words); - ext_len = sizeof(uint32_t) * ext_len_in_words; - } - constexpr size_t ext_header_len = sizeof(uint16_t) * 2; - ciphertext += ext_header_len; - ciphertext_len -= ext_header_len; - total_header_len += ext_header_len; - } - - uint8_t decrypted[65535] = { 0 }; - unsigned long long opus_packet_len = 0; - if (ssl_crypto_aead_xchacha20poly1305_ietf_decrypt( - decrypted, &opus_packet_len, - nullptr, - ciphertext, ciphertext_len, - buffer, - /** - * Additional Data: - * The whole header (including csrc list) + - * 4 byte extension header (magic 0xBEDE + 16-bit denoting extension length) - */ - total_header_len, - nonce, secret_key.data()) != 0) { - /* Invalid Discord RTP payload. */ - return; - } - - uint8_t *opus_packet = decrypted; - if (ext_len > 0) { - /* Skip previously encrypted RTP Header Extension */ - opus_packet += ext_len; - opus_packet_len -= ext_len; - } - - /** - * If DAVE is enabled, use the user's ratchet to decrypt the OPUS audio data - */ - std::vector frame; - if (is_end_to_end_encrypted()) { - auto decryptor = mls_state->decryptors.find(vp.vr->user_id); - if (decryptor != mls_state->decryptors.end()) { - frame.resize(decryptor->second->get_max_plaintext_byte_size(dave::media_type::media_audio, opus_packet_len)); - size_t enc_len = decryptor->second->decrypt( - dave::media_type::media_audio, - dave::make_array_view(opus_packet, opus_packet_len), - dave::make_array_view(frame) - ); - if (enc_len > 0) { - opus_packet = frame.data(); - opus_packet_len = enc_len; - } - } - } - - /* - * We're left with the decrypted, opus-encoded data. - * Park the payload and decode on the voice courier thread. - */ - vp.vr->audio_data.assign(opus_packet, opus_packet + opus_packet_len); + vp.vr->audio_data.assign(buffer, buffer + packet_size); { std::lock_guard lk(voice_courier_shared_state.mtx); @@ -183,7 +103,7 @@ void discord_voice_client::read_ready() int opus_error = 0; decoder.reset(opus_decoder_create(opus_sample_rate_hz, opus_channel_count, &opus_error), - &opus_decoder_destroy); + &opus_decoder_destroy); if (opus_error) { /** * NOTE: The -10 here makes the opus_error match up with values of exception_error_code, @@ -207,8 +127,8 @@ void discord_voice_client::read_ready() if (!voice_courier.joinable()) { /* Courier thread is not running, start it */ voice_courier = std::thread(&voice_courier_loop, - std::ref(*this), - std::ref(voice_courier_shared_state)); + std::ref(*this), + std::ref(voice_courier_shared_state)); } } diff --git a/src/dpp/voice/enabled/read_write.cpp b/src/dpp/voice/enabled/read_write.cpp index c24200b49b..52a09d5a39 100644 --- a/src/dpp/voice/enabled/read_write.cpp +++ b/src/dpp/voice/enabled/read_write.cpp @@ -30,7 +30,7 @@ namespace dpp { dpp::socket discord_voice_client::want_write() { std::lock_guard lock(this->stream_mutex); - if (!this->paused && !outbuf.empty()) { + if (!this->sent_stop_frames && !outbuf.empty()) { return fd; } return INVALID_SOCKET; @@ -42,13 +42,16 @@ dpp::socket discord_voice_client::want_read() { } -void discord_voice_client::send(const char* packet, size_t len, uint64_t duration) { - voice_out_packet frame; - frame.packet.assign(packet, packet + len); - frame.duration = duration; - { +void discord_voice_client::send(const char* packet, size_t len, uint64_t duration, bool send_now) { + if (!send_now) [[likely]] { + voice_out_packet frame; + frame.packet.assign(packet, packet + len); + frame.duration = duration; + std::lock_guard lock(this->stream_mutex); outbuf.emplace_back(frame); + } else [[unlikely]] { + this->udp_send(packet, len); } } @@ -68,4 +71,4 @@ int discord_voice_client::udp_recv(char* data, size_t max_length) return static_cast(recv(this->fd, data, static_cast(max_length), 0)); } -} \ No newline at end of file +} diff --git a/src/dpp/voice/enabled/write_ready.cpp b/src/dpp/voice/enabled/write_ready.cpp index 46c0307055..8287dea2a3 100644 --- a/src/dpp/voice/enabled/write_ready.cpp +++ b/src/dpp/voice/enabled/write_ready.cpp @@ -37,7 +37,14 @@ void discord_voice_client::write_ready() { send_audio_type_t type = satype_recorded_audio; { std::lock_guard lock(this->stream_mutex); - if (!this->paused && outbuf.size()) { + if (this->paused) { + if (!this->sent_stop_frames) { + this->send_stop_frames(true); + this->sent_stop_frames = true; + } + + /* Fallthrough if paused */ + } else if (!outbuf.empty()) { type = send_audio_type; if (outbuf[0].packet.size() == sizeof(uint16_t) && (*(reinterpret_cast(outbuf[0].packet.data()))) == AUDIO_TRACK_MARKER) { outbuf.erase(outbuf.begin()); @@ -46,7 +53,7 @@ void discord_voice_client::write_ready() { tracks--; } } - if (outbuf.size()) { + if (!outbuf.empty()) { if (this->udp_send(outbuf[0].packet.data(), outbuf[0].packet.length()) == (int)outbuf[0].packet.length()) { duration = outbuf[0].duration * timescale; bufsize = outbuf[0].packet.length(); diff --git a/src/dpp/voice/stub/stubs.cpp b/src/dpp/voice/stub/stubs.cpp index b184367d98..c027aa8b43 100644 --- a/src/dpp/voice/stub/stubs.cpp +++ b/src/dpp/voice/stub/stubs.cpp @@ -63,11 +63,11 @@ namespace dpp { return *this; } - discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet, const size_t length) { + discord_voice_client& discord_voice_client::send_audio_opus(const uint8_t* opus_packet, const size_t length, uint64_t duration, bool send_now) { return *this; } - discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet, const size_t length, uint64_t duration) { + discord_voice_client& discord_voice_client::send_audio_opus(const uint8_t* opus_packet, const size_t length) { return *this; } @@ -80,7 +80,7 @@ namespace dpp { } - void discord_voice_client::send(const char* packet, size_t len, uint64_t duration) { + void discord_voice_client::send(const char* packet, size_t len, uint64_t duration, bool send_now) { } int discord_voice_client::udp_send(const char* data, size_t length) { diff --git a/src/unittest/test.cpp b/src/unittest/test.cpp index 7c61570ab7..da96a28096 100644 --- a/src/unittest/test.cpp +++ b/src/unittest/test.cpp @@ -641,7 +641,6 @@ Markdown lol \\|\\|spoiler\\|\\| \\~\\~strikethrough\\~\\~ \\`small \\*code\\* b DPP_CHECK_CONSTRUCT_ASSIGN(EVENT_CLASS, dpp::thread_member_update_t, success); DPP_CHECK_CONSTRUCT_ASSIGN(EVENT_CLASS, dpp::thread_members_update_t, success); DPP_CHECK_CONSTRUCT_ASSIGN(EVENT_CLASS, dpp::voice_buffer_send_t, success); - DPP_CHECK_CONSTRUCT_ASSIGN(EVENT_CLASS, dpp::voice_user_talking_t, success); DPP_CHECK_CONSTRUCT_ASSIGN(EVENT_CLASS, dpp::voice_ready_t, success); DPP_CHECK_CONSTRUCT_ASSIGN(EVENT_CLASS, dpp::voice_receive_t, success); DPP_CHECK_CONSTRUCT_ASSIGN(EVENT_CLASS, dpp::voice_client_speaking_t, success);