From 8bedaa454718627805b1f6c14ed06600cc0633e0 Mon Sep 17 00:00:00 2001
From: Neko-Life <nekolife123579@gmail.com>
Date: Thu, 17 Oct 2024 18:24:43 +0700
Subject: [PATCH 01/10] fix: ready anyway when no user in vs

---
 src/dpp/voice/enabled/handle_frame.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/dpp/voice/enabled/handle_frame.cpp b/src/dpp/voice/enabled/handle_frame.cpp
index 07423d3377..0b84cc1ca4 100644
--- a/src/dpp/voice/enabled/handle_frame.cpp
+++ b/src/dpp/voice/enabled/handle_frame.cpp
@@ -411,6 +411,11 @@ bool discord_voice_client::handle_frame(const std::string &data, ws_opcode opcod
 								});
 						}
 						this->reinit_dave_mls_group();
+
+						/* Ready now if there's no DAVE user waiting in the vc */
+						if (dave_mls_user_list.empty()) {
+							ready_now = true;
+						}
 					}
 				} else {
 					/* Non-DAVE ready immediately */

From d1db33625ced8b0c6c15c2101db801fe02c4ab51 Mon Sep 17 00:00:00 2001
From: Neko-Life <nekolife123579@gmail.com>
Date: Thu, 17 Oct 2024 18:59:53 +0700
Subject: [PATCH 02/10] [BREAKING CHANGE] fix: remove unused event with unknown
 opcode it supposed to handle

---
 include/dpp/cluster.h    | 11 -----------
 include/dpp/dispatcher.h | 25 +------------------------
 src/unittest/test.cpp    |  1 -
 3 files changed, 1 insertion(+), 36 deletions(-)

diff --git a/include/dpp/cluster.h b/include/dpp/cluster.h
index 2aa1e78129..4825fe497f 100644
--- a/include/dpp/cluster.h
+++ b/include/dpp/cluster.h
@@ -1334,17 +1334,6 @@ class DPP_EXPORT cluster {
 	event_router_t<voice_buffer_send_t> on_voice_buffer_send;
 
 	
-	/**
-	 * @brief Called when a user is talking on a voice channel.
-	 *
-	 * @warning If the cache policy has disabled guild caching, the pointer to the guild in this event may be nullptr.
-	 *
-	 * @note Use operator() to attach a lambda to this event, and the detach method to detach the listener using the returned ID.
-	 * The function signature for this event takes a single `const` reference of type voice_user_talking_t&, and returns void.
-	 */
-	event_router_t<voice_user_talking_t> on_voice_user_talking;
-
-	
 	/**
 	 * @brief Called when a voice channel is connected and ready to send audio.
 	 * Note that this is not directly attached to the READY event of the websocket,
diff --git a/include/dpp/dispatcher.h b/include/dpp/dispatcher.h
index 151c2e8880..749f3a0454 100644
--- a/include/dpp/dispatcher.h
+++ b/include/dpp/dispatcher.h
@@ -1989,30 +1989,7 @@ struct DPP_EXPORT voice_buffer_send_t : public event_dispatch_t {
 };
 
 /**
- * @brief voice user talking
- */
-struct DPP_EXPORT voice_user_talking_t : public event_dispatch_t {
-	using event_dispatch_t::event_dispatch_t;
-	using event_dispatch_t::operator=;
-
-	/**
-	 * @brief voice client where user is talking
-	 */
-	class discord_voice_client* voice_client = nullptr;
-
-	/**
-	 * @brief talking user id
-	 */
-	snowflake user_id = {};
-
-	/**
-	 * @brief flags for talking user
-	 */
-	uint8_t talking_flags = 0;
-};
-
-/**
- * @brief voice user talking
+ * @brief voice ready
  */
 struct DPP_EXPORT voice_ready_t : public event_dispatch_t {
 	using event_dispatch_t::event_dispatch_t;
diff --git a/src/unittest/test.cpp b/src/unittest/test.cpp
index 7c61570ab7..da96a28096 100644
--- a/src/unittest/test.cpp
+++ b/src/unittest/test.cpp
@@ -641,7 +641,6 @@ Markdown lol \\|\\|spoiler\\|\\| \\~\\~strikethrough\\~\\~ \\`small \\*code\\* b
 		DPP_CHECK_CONSTRUCT_ASSIGN(EVENT_CLASS, dpp::thread_member_update_t, success);
 		DPP_CHECK_CONSTRUCT_ASSIGN(EVENT_CLASS, dpp::thread_members_update_t, success);
 		DPP_CHECK_CONSTRUCT_ASSIGN(EVENT_CLASS, dpp::voice_buffer_send_t, success);
-		DPP_CHECK_CONSTRUCT_ASSIGN(EVENT_CLASS, dpp::voice_user_talking_t, success);
 		DPP_CHECK_CONSTRUCT_ASSIGN(EVENT_CLASS, dpp::voice_ready_t, success);
 		DPP_CHECK_CONSTRUCT_ASSIGN(EVENT_CLASS, dpp::voice_receive_t, success);
 		DPP_CHECK_CONSTRUCT_ASSIGN(EVENT_CLASS, dpp::voice_client_speaking_t, success);

From 43b8f6aa4133ba3d86bfd7401b0997295bd94050 Mon Sep 17 00:00:00 2001
From: Neko-Life <nekolife123579@gmail.com>
Date: Fri, 18 Oct 2024 00:20:58 +0700
Subject: [PATCH 03/10] feat: implement sending stop frames on pause/stop,
 TODO: make voice receive smooth while sending audio

---
 include/dpp/discordvoiceclient.h       | 35 +++++++++++++++++++++++---
 src/dpp/discordvoiceclient.cpp         | 18 ++++++++++++-
 src/dpp/voice/enabled/courier_loop.cpp | 16 +++++++++++-
 src/dpp/voice/enabled/opus.cpp         |  8 +++---
 src/dpp/voice/enabled/read_write.cpp   | 17 +++++++------
 src/dpp/voice/enabled/write_ready.cpp  |  9 ++++++-
 6 files changed, 86 insertions(+), 17 deletions(-)

diff --git a/include/dpp/discordvoiceclient.h b/include/dpp/discordvoiceclient.h
index 658ba6422a..3a5f457938 100644
--- a/include/dpp/discordvoiceclient.h
+++ b/include/dpp/discordvoiceclient.h
@@ -240,6 +240,11 @@ class DPP_EXPORT discord_voice_client : public websocket_client
 	 */
 	void cleanup();
 
+	/**
+	 * @brief A frame of silence packet
+	 */
+	static constexpr uint8_t silence_packet[3] = { 0xf8, 0xff, 0xfe };
+
 	/**
 	 * @brief Mutex for outbound packet stream
 	 */
@@ -434,6 +439,13 @@ class DPP_EXPORT discord_voice_client : public websocket_client
 	 */
 	bool paused;
 
+	/**
+	 * @brief Whether has sent 5 frame of silence before stopping on pause/stop.
+	 *
+	 * This is to avoid unintended Opus interpolation with subsequent transmissions.
+	 */
+	bool sent_stop_frames;
+
 #ifdef HAVE_VOICE
 	/**
 	 * @brief libopus encoder
@@ -650,8 +662,10 @@ class DPP_EXPORT discord_voice_client : public websocket_client
 	 * @param packet packet data
 	 * @param len length of packet
 	 * @param duration duration of opus packet
+	 * @param send_now send this packet right away without buffering.
+	 * Do NOT set send_now to true outside write_ready.
 	 */
-	void send(const char* packet, size_t len, uint64_t duration);
+	void send(const char* packet, size_t len, uint64_t duration, bool send_now = false);
 
 	/**
 	 * @brief Queue a message to be sent via the websocket
@@ -962,6 +976,10 @@ class DPP_EXPORT discord_voice_client : public websocket_client
 	 * @param duration Generally duration is 2.5, 5, 10, 20, 40 or 60
 	 * if the timescale is 1000000 (1ms) 
 	 * 
+	 * @param send_now Send this packet right away without buffering,
+	 * this will skip duration calculation for the packet being sent
+	 * and only safe to be set to true in write_ready.
+	 *
 	 * @return discord_voice_client& Reference to self
 	 * 
 	 * @note It is your responsibility to ensure that packets of data 
@@ -972,7 +990,7 @@ class DPP_EXPORT discord_voice_client : public websocket_client
 	 * 
 	 * @throw dpp::voice_exception If data length is invalid or voice support not compiled into D++
 	 */
-	discord_voice_client& send_audio_opus(uint8_t* opus_packet, const size_t length, uint64_t duration);
+	discord_voice_client& send_audio_opus(const uint8_t* opus_packet, const size_t length, uint64_t duration, bool send_now = false);
 
 	/**
 	 * @brief Send opus packets to the voice channel
@@ -999,7 +1017,7 @@ class DPP_EXPORT discord_voice_client : public websocket_client
 	 * 
 	 * @throw dpp::voice_exception If data length is invalid or voice support not compiled into D++
 	 */
-	discord_voice_client& send_audio_opus(uint8_t* opus_packet, const size_t length);
+	discord_voice_client& send_audio_opus(const uint8_t* opus_packet, const size_t length);
 
 	/**
 	 * @brief Send silence to the voice channel
@@ -1012,6 +1030,17 @@ class DPP_EXPORT discord_voice_client : public websocket_client
 	 */
 	discord_voice_client& send_silence(const uint64_t duration);
 
+	/**
+	 * @brief Send stop frames to the voice channel.
+	 *
+	 * @param send_now send this packet right away without buffering.
+	 * Do NOT set send_now to true outside write_ready.
+	 * 
+	 * @return discord_voice_client& Reference to self
+	 * @throw dpp::voice_exception if voice support is not compiled into D++
+	 */
+	discord_voice_client& send_stop_frames(bool send_now = false);
+
 	/**
 	 * @brief Sets the audio type that will be sent with send_audio_* methods.
 	 *
diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp
index 1b8fe7b95f..d63cccc6a6 100644
--- a/src/dpp/discordvoiceclient.cpp
+++ b/src/dpp/discordvoiceclient.cpp
@@ -20,6 +20,7 @@
  *
  ************************************************************************************/
 
+#include <cstdint>
 #ifdef _WIN32
 	#include <WinSock2.h>
 	#include <WS2tcpip.h>
@@ -148,6 +149,9 @@ bool discord_voice_client::is_end_to_end_encrypted() const {
 
 discord_voice_client& discord_voice_client::pause_audio(bool pause) {
 	this->paused = pause;
+	if (!this->paused) {
+		this->sent_stop_frames = false;
+	}
 	return *this;
 }
 
@@ -176,6 +180,7 @@ discord_voice_client& discord_voice_client::stop_audio() {
 	outbuf.clear();
 	track_meta.clear();
 	tracks = 0;
+	this->send_stop_frames();
 	return *this;
 }
 
@@ -398,7 +403,6 @@ discord_voice_client& discord_voice_client::skip_to_next_marker() {
 }
 
 discord_voice_client& discord_voice_client::send_silence(const uint64_t duration) {
-	uint8_t silence_packet[3] = { 0xf8, 0xff, 0xfe };
 	send_audio_opus(silence_packet, 3, duration);
 	return *this;
 }
@@ -412,6 +416,7 @@ discord_voice_client& discord_voice_client::set_send_audio_type(send_audio_type_
 
 discord_voice_client& discord_voice_client::speak() {
 	if (!this->sending) {
+		std::cout << "Sending voice_opcode_client_speaking\n";
 		this->queue_message(json({
 		{"op", voice_opcode_client_speaking},
 		{"d", {
@@ -443,4 +448,15 @@ uint16_t discord_voice_client::get_iteration_interval() {
 	return this->iteration_interval;
 }
 
+discord_voice_client& discord_voice_client::send_stop_frames(bool send_now) {
+	uint8_t silence_frames[sizeof(silence_packet) / sizeof(*silence_packet) * 5];
+	for (size_t i = 0; i < sizeof(silence_frames) / sizeof(*silence_frames); i++) {
+		silence_frames[i] = silence_packet[i % 3];
+	}
+
+	this->send_audio_opus(silence_frames, sizeof(silence_frames) / sizeof(*silence_frames), 20, send_now);
+
+	return *this;
+}
+
 } // namespace dpp
diff --git a/src/dpp/voice/enabled/courier_loop.cpp b/src/dpp/voice/enabled/courier_loop.cpp
index 6526c7f8ba..57b5e99652 100644
--- a/src/dpp/voice/enabled/courier_loop.cpp
+++ b/src/dpp/voice/enabled/courier_loop.cpp
@@ -76,7 +76,21 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour
 					break;
 				}
 
-				shared_state.signal_iteration.wait(lk);
+                shared_state.signal_iteration.wait(lk, [&shared_state](){
+                    /* 
+                     * Actually check the state we're looking for instead of waking up
+                     * everytime read_ready was called.
+                     */
+                    for (auto &[user_id, parking_lot]: shared_state.parked_voice_payloads) {
+                        if (parking_lot.parked_payloads.empty()) {
+                            continue;
+                        }
+
+                        return true;
+                    }
+                    return false;
+                });
+
 				/*
 				 * More data came or about to terminate, or just a spurious wake.
 				 * We need to collect the payloads again to determine what to do next.
diff --git a/src/dpp/voice/enabled/opus.cpp b/src/dpp/voice/enabled/opus.cpp
index d99a3776d0..a92886ac9c 100644
--- a/src/dpp/voice/enabled/opus.cpp
+++ b/src/dpp/voice/enabled/opus.cpp
@@ -71,14 +71,14 @@ discord_voice_client& discord_voice_client::send_audio_raw(uint16_t* audio_data,
 	return *this;
 }
 
-discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet, const size_t length) {
+discord_voice_client& discord_voice_client::send_audio_opus(const uint8_t* opus_packet, const size_t length) {
 	int samples = opus_packet_get_nb_samples(opus_packet, (opus_int32)length, opus_sample_rate_hz);
 	uint64_t duration = (samples / 48) / (timescale / 1000000);
-	send_audio_opus(opus_packet, length, duration);
+	send_audio_opus(opus_packet, length, duration, false);
 	return *this;
 }
 
-discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet, const size_t length, uint64_t duration) {
+discord_voice_client& discord_voice_client::send_audio_opus(const uint8_t* opus_packet, const size_t length, uint64_t duration, bool send_now) {
 	int frame_size = (int)(48 * duration * (timescale / 1000000));
 	opus_int32 encoded_audio_max_length = (opus_int32)length;
 	std::vector<uint8_t> encoded_audio(encoded_audio_max_length);
@@ -147,7 +147,7 @@ discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet
 	/* Append the 4 byte nonce to the resulting payload */
 	std::memcpy(payload.data() + payload.size() - sizeof(noncel), &noncel, sizeof(noncel));
 
-	this->send(reinterpret_cast<const char *>(payload.data()), payload.size(), duration);
+	this->send(reinterpret_cast<const char *>(payload.data()), payload.size(), duration, send_now);
 
 	timestamp += frame_size;
 
diff --git a/src/dpp/voice/enabled/read_write.cpp b/src/dpp/voice/enabled/read_write.cpp
index c24200b49b..52a09d5a39 100644
--- a/src/dpp/voice/enabled/read_write.cpp
+++ b/src/dpp/voice/enabled/read_write.cpp
@@ -30,7 +30,7 @@ namespace dpp {
 
 dpp::socket discord_voice_client::want_write() {
 	std::lock_guard<std::mutex> lock(this->stream_mutex);
-	if (!this->paused && !outbuf.empty()) {
+	if (!this->sent_stop_frames && !outbuf.empty()) {
 		return fd;
 	}
 	return INVALID_SOCKET;
@@ -42,13 +42,16 @@ dpp::socket discord_voice_client::want_read() {
 }
 
 
-void discord_voice_client::send(const char* packet, size_t len, uint64_t duration) {
-	voice_out_packet frame;
-	frame.packet.assign(packet, packet + len);
-	frame.duration = duration;
-	{
+void discord_voice_client::send(const char* packet, size_t len, uint64_t duration, bool send_now) {
+	if (!send_now) [[likely]] {
+		voice_out_packet frame;
+		frame.packet.assign(packet, packet + len);
+		frame.duration = duration;
+
 		std::lock_guard<std::mutex> lock(this->stream_mutex);
 		outbuf.emplace_back(frame);
+	} else [[unlikely]] {
+		this->udp_send(packet, len);
 	}
 }
 
@@ -68,4 +71,4 @@ int discord_voice_client::udp_recv(char* data, size_t max_length)
 	return static_cast<int>(recv(this->fd, data, static_cast<int>(max_length), 0));
 }
 
-}
\ No newline at end of file
+}
diff --git a/src/dpp/voice/enabled/write_ready.cpp b/src/dpp/voice/enabled/write_ready.cpp
index 46c0307055..3fda474f45 100644
--- a/src/dpp/voice/enabled/write_ready.cpp
+++ b/src/dpp/voice/enabled/write_ready.cpp
@@ -37,7 +37,14 @@ void discord_voice_client::write_ready() {
 	send_audio_type_t type = satype_recorded_audio;
 	{
 		std::lock_guard<std::mutex> lock(this->stream_mutex);
-		if (!this->paused && outbuf.size()) {
+		if (this->paused) {
+			if (!this->sent_stop_frames) {
+				this->send_stop_frames(true);
+				this->sent_stop_frames = true;
+			}
+
+			/* Fallthrough if paused */
+		} else if (outbuf.size()) {
 			type = send_audio_type;
 			if (outbuf[0].packet.size() == sizeof(uint16_t) && (*(reinterpret_cast<uint16_t*>(outbuf[0].packet.data()))) == AUDIO_TRACK_MARKER) {
 				outbuf.erase(outbuf.begin());

From a84758d593db78a2969b4a8f8055f281c1f0e905 Mon Sep 17 00:00:00 2001
From: Neko-Life <nekolife123579@gmail.com>
Date: Fri, 18 Oct 2024 00:24:19 +0700
Subject: [PATCH 04/10] refactor: use vector::empty instead

---
 src/dpp/voice/enabled/write_ready.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/dpp/voice/enabled/write_ready.cpp b/src/dpp/voice/enabled/write_ready.cpp
index 3fda474f45..8287dea2a3 100644
--- a/src/dpp/voice/enabled/write_ready.cpp
+++ b/src/dpp/voice/enabled/write_ready.cpp
@@ -44,7 +44,7 @@ void discord_voice_client::write_ready() {
 			}
 
 			/* Fallthrough if paused */
-		} else if (outbuf.size()) {
+		} else if (!outbuf.empty()) {
 			type = send_audio_type;
 			if (outbuf[0].packet.size() == sizeof(uint16_t) && (*(reinterpret_cast<uint16_t*>(outbuf[0].packet.data()))) == AUDIO_TRACK_MARKER) {
 				outbuf.erase(outbuf.begin());
@@ -53,7 +53,7 @@ void discord_voice_client::write_ready() {
 					tracks--;
 				}
 			}
-			if (outbuf.size()) {
+			if (!outbuf.empty()) {
 				if (this->udp_send(outbuf[0].packet.data(), outbuf[0].packet.length()) == (int)outbuf[0].packet.length()) {
 					duration = outbuf[0].duration * timescale;
 					bufsize = outbuf[0].packet.length();

From 45fc7ca9ec36c8820fd1e604537147c2cca95434 Mon Sep 17 00:00:00 2001
From: Neko-Life <nekolife123579@gmail.com>
Date: Sun, 20 Oct 2024 06:34:48 +0700
Subject: [PATCH 05/10] fix: stubs.cpp mismatch signatures

---
 src/dpp/voice/stub/stubs.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/dpp/voice/stub/stubs.cpp b/src/dpp/voice/stub/stubs.cpp
index b184367d98..c027aa8b43 100644
--- a/src/dpp/voice/stub/stubs.cpp
+++ b/src/dpp/voice/stub/stubs.cpp
@@ -63,11 +63,11 @@ namespace dpp {
 		return *this;
 	}
 
-	discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet, const size_t length) {
+	discord_voice_client& discord_voice_client::send_audio_opus(const uint8_t* opus_packet, const size_t length, uint64_t duration, bool send_now) {
 		return *this;
 	}
 
-	discord_voice_client& discord_voice_client::send_audio_opus(uint8_t* opus_packet, const size_t length, uint64_t duration) {
+	discord_voice_client& discord_voice_client::send_audio_opus(const uint8_t* opus_packet, const size_t length) {
 		return *this;
 	}
 
@@ -80,7 +80,7 @@ namespace dpp {
 	}
 
 
-	void discord_voice_client::send(const char* packet, size_t len, uint64_t duration) {
+	void discord_voice_client::send(const char* packet, size_t len, uint64_t duration, bool send_now) {
 	}
 
 	int discord_voice_client::udp_send(const char* data, size_t length) {

From c1a2f0d27d4f1cea6f3b099858171b6de67e2ace Mon Sep 17 00:00:00 2001
From: Neko-Life <nekolife123579@gmail.com>
Date: Sun, 20 Oct 2024 08:46:43 +0700
Subject: [PATCH 06/10] feat: move decryption to courier thread where it's
 actually needed

---
 src/dpp/voice/enabled/courier_loop.cpp | 187 ++++++++++++++++++++-----
 src/dpp/voice/enabled/read_ready.cpp   | 111 ++++-----------
 2 files changed, 179 insertions(+), 119 deletions(-)

diff --git a/src/dpp/voice/enabled/courier_loop.cpp b/src/dpp/voice/enabled/courier_loop.cpp
index 57b5e99652..f9dab53561 100644
--- a/src/dpp/voice/enabled/courier_loop.cpp
+++ b/src/dpp/voice/enabled/courier_loop.cpp
@@ -21,6 +21,7 @@
  ************************************************************************************/
 
 #include <string_view>
+#include <utility>
 #include <dpp/exception.h>
 #include <dpp/isa_detection.h>
 #include <dpp/discordvoiceclient.h>
@@ -32,11 +33,32 @@
 
 namespace dpp {
 
+struct iter_bench {
+	std::chrono::time_point<std::chrono::steady_clock> start;
+	std::string n;
+
+	iter_bench(std::string _n) : n(std::move(_n)) {
+		start = std::chrono::steady_clock::now();
+		std::cout << n << "START: " << std::chrono::duration_cast<std::chrono::milliseconds>(start.time_since_epoch()).count() << "\n";
+	}
+
+	~iter_bench() {
+		auto end = std::chrono::steady_clock::now();
+		std::cout << n << "END: " << std::chrono::duration_cast<std::chrono::milliseconds>(end.time_since_epoch()).count() << "\n";
+		std::cout << n << "BENCH: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "\n";
+	}
+};
+
 void discord_voice_client::voice_courier_loop(discord_voice_client& client, courier_shared_state_t& shared_state) {
 	utility::set_thread_name(std::string("vcourier/") + std::to_string(client.server_id));
+	size_t iter = 0;
 	while (true) {
+		std::cout << "voice_courier_loop ITER: " << ++iter << "\n";
+
 		std::this_thread::sleep_for(std::chrono::milliseconds{client.iteration_interval});
 
+		iter_bench b("voice_courier_loop ITER_");
+
 		struct flush_data_t {
 			snowflake user_id;
 			rtp_seq_t min_seq;
@@ -51,6 +73,9 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour
 		 * release the lock as soon as possible.
 		 */
 		{
+			std::cout << "shared_state.mtx LOCK\n";
+			iter_bench c("voice_courier_loop FLUSH_DATA_");
+
 			std::unique_lock lk(shared_state.mtx);
 
 			/* mitigates vector resizing while holding the mutex */
@@ -60,12 +85,12 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour
 			for (auto &[user_id, parking_lot]: shared_state.parked_voice_payloads) {
 				has_payload_to_deliver = has_payload_to_deliver || !parking_lot.parked_payloads.empty();
 				flush_data.push_back(flush_data_t{user_id,
-								  parking_lot.range.min_seq,
-								  std::move(parking_lot.parked_payloads),
+					parking_lot.range.min_seq,
+					std::move(parking_lot.parked_payloads),
 					/* Quickly check if we already have a decoder and only take the pending ctls if so. */
-								  parking_lot.decoder ? std::move(parking_lot.pending_decoder_ctls)
-										      : decltype(parking_lot.pending_decoder_ctls){},
-								  parking_lot.decoder});
+					parking_lot.decoder ? std::move(parking_lot.pending_decoder_ctls)
+					: decltype(parking_lot.pending_decoder_ctls){},
+					parking_lot.decoder});
 				parking_lot.range.min_seq = parking_lot.range.max_seq + 1;
 				parking_lot.range.min_timestamp = parking_lot.range.max_timestamp + 1;
 			}
@@ -73,23 +98,32 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour
 			if (!has_payload_to_deliver) {
 				if (shared_state.terminating) {
 					/* We have delivered all data to handlers. Terminate now. */
+					std::cout << "shared_state.mtx RELEASE\n";
 					break;
 				}
 
-                shared_state.signal_iteration.wait(lk, [&shared_state](){
-                    /* 
-                     * Actually check the state we're looking for instead of waking up
-                     * everytime read_ready was called.
-                     */
-                    for (auto &[user_id, parking_lot]: shared_state.parked_voice_payloads) {
-                        if (parking_lot.parked_payloads.empty()) {
-                            continue;
-                        }
-
-                        return true;
-                    }
-                    return false;
-                });
+				shared_state.signal_iteration.wait(lk, [&shared_state](){
+					std::cout << "shared_state.mtx LOCKED\n";
+					if (shared_state.terminating) {
+						return true;
+					}
+
+					/* 
+					 * Actually check the state we're looking for instead of waking up
+					 * everytime read_ready was called.
+					 */
+					for (auto &[user_id, parking_lot]: shared_state.parked_voice_payloads) {
+						if (parking_lot.parked_payloads.empty()) {
+							continue;
+						}
+
+						std::cout << "shared_state.mtx RELEASE\n";
+						return true;
+					}
+					std::cout << "shared_state.mtx RELEASE\n";
+					return false;
+				});
+				std::cout << "shared_state.mtx RELEASE\n";
 
 				/*
 				 * More data came or about to terminate, or just a spurious wake.
@@ -97,6 +131,7 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour
 				 */
 				continue;
 			}
+			std::cout << "shared_state.mtx RELEASE\n";
 		}
 
 		if (client.creator->on_voice_receive.empty() && client.creator->on_voice_receive_combined.empty()) {
@@ -116,44 +151,133 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour
 		int max_samples = 0;
 		int samples = 0;
 
+		opus_int16 flush_data_pcm[23040];
 		for (auto &d: flush_data) {
+			iter_bench c("voice_courier_loop FLUSH_LOOP_");
 			if (!d.decoder) {
 				continue;
 			}
 			for (const auto &decoder_ctl: d.pending_decoder_ctls) {
 				decoder_ctl(*d.decoder);
 			}
+
 			for (rtp_seq_t seq = d.min_seq; !d.parked_payloads.empty(); ++seq) {
-				opus_int16 pcm[23040];
+				iter_bench e("voice_courier_loop SEQ_LOOP_");
+				std::cout << "TOP_SEQ: " << d.parked_payloads.top().seq << "\nCURRENT_SEQ: "<< seq << "\n";
+
 				if (d.parked_payloads.top().seq != seq) {
 					/*
 					 * Lost a packet with sequence number "seq",
 					 * But Opus decoder might be able to guess something.
 					 */
-					if (int samples = opus_decode(d.decoder.get(), nullptr, 0, pcm, 5760, 0);
+					if (int samples = opus_decode(d.decoder.get(), nullptr, 0, flush_data_pcm, 5760, 0);
 						samples >= 0) {
 						/*
 						 * Since this sample comes from a lost packet,
 						 * we can only pretend there is an event, without any raw payload byte.
 						 */
-						voice_receive_t vr(nullptr, "", &client, d.user_id, reinterpret_cast<uint8_t *>(pcm),
-								   samples * opus_channel_count * sizeof(opus_int16));
+						voice_receive_t vr(nullptr, "", &client, d.user_id, reinterpret_cast<uint8_t *>(flush_data_pcm),
+						 samples * opus_channel_count * sizeof(opus_int16));
 
-						park_count = audio_mix(client, *client.mixer, pcm_mix, pcm, park_count, samples, max_samples);
+						park_count = audio_mix(client, *client.mixer, pcm_mix, flush_data_pcm, park_count, samples, max_samples);
 						client.creator->on_voice_receive.call(vr);
 					}
 				} else {
 					voice_receive_t &vr = *d.parked_payloads.top().vr;
-					if (vr.audio_data.size() > 0x7FFFFFFF) {
+
+					/* We do decryption here to avoid blocking ssl_client from receiving more audio data */
+					constexpr size_t header_size = 12;
+
+					uint8_t *buffer = vr.audio_data.data();
+					int packet_size = vr.audio_data.size();
+
+					constexpr size_t nonce_size = sizeof(uint32_t);
+					/* Nonce is 4 byte at the end of payload with zero padding */
+					uint8_t nonce[24] = { 0 };
+					std::memcpy(nonce, buffer + packet_size - nonce_size, nonce_size);
+
+					/* Get the number of CSRC in header */
+					const size_t csrc_count = buffer[0] & 0b0000'1111;
+					/* Skip to the encrypted voice data */
+					const ptrdiff_t offset_to_data = header_size + sizeof(uint32_t) * csrc_count;
+					size_t total_header_len = offset_to_data;
+
+					uint8_t* ciphertext = buffer + offset_to_data;
+					size_t ciphertext_len = packet_size - offset_to_data - nonce_size;
+
+					size_t ext_len = 0;
+					if ([[maybe_unused]] const bool uses_extension = (buffer[0] >> 4) & 0b0001) {
+						/**
+						 * Get the RTP Extensions size, we only get the size here because
+						 * the extension itself is encrypted along with the opus packet
+						 */
+						{
+							uint16_t ext_len_in_words;
+							memcpy(&ext_len_in_words, &ciphertext[2], sizeof(uint16_t));
+							ext_len_in_words = ntohs(ext_len_in_words);
+							ext_len = sizeof(uint32_t) * ext_len_in_words;
+						}
+						constexpr size_t ext_header_len = sizeof(uint16_t) * 2;
+						ciphertext += ext_header_len;
+						ciphertext_len -= ext_header_len;
+						total_header_len += ext_header_len;
+					}
+
+					uint8_t decrypted[65535] = { 0 };
+					unsigned long long opus_packet_len  = 0;
+					if (ssl_crypto_aead_xchacha20poly1305_ietf_decrypt(
+						decrypted, &opus_packet_len,
+						nullptr,
+						ciphertext, ciphertext_len,
+						buffer,
+						/**
+						 * Additional Data:
+						 * The whole header (including csrc list) +
+						 * 4 byte extension header (magic 0xBEDE + 16-bit denoting extension length)
+						 */
+						total_header_len,
+						nonce, vr.voice_client->secret_key.data()) != 0) {
+						/* Invalid Discord RTP payload. */
+						return;
+					}
+
+					uint8_t *opus_packet = decrypted;
+					if (ext_len > 0) {
+						/* Skip previously encrypted RTP Header Extension */
+						opus_packet += ext_len;
+						opus_packet_len -= ext_len;
+					}
+
+					/**
+					 * If DAVE is enabled, use the user's ratchet to decrypt the OPUS audio data
+					 */
+					std::vector<uint8_t> frame;
+					if (vr.voice_client->is_end_to_end_encrypted()) {
+						auto decryptor = vr.voice_client->mls_state->decryptors.find(vr.user_id);
+						if (decryptor != vr.voice_client->mls_state->decryptors.end()) {
+							frame.resize(decryptor->second->get_max_plaintext_byte_size(dave::media_type::media_audio, opus_packet_len));
+							size_t enc_len = decryptor->second->decrypt(
+								dave::media_type::media_audio,
+								dave::make_array_view<const uint8_t>(opus_packet, opus_packet_len),
+								dave::make_array_view(frame)
+							);
+							if (enc_len > 0) {
+								opus_packet = frame.data();
+								opus_packet_len = enc_len;
+							}
+						}
+					}
+
+					if (opus_packet_len > 0x7FFFFFFF) {
 						throw dpp::length_exception(err_massive_audio, "audio_data > 2GB! This should never happen!");
 					}
-					if (samples = opus_decode(d.decoder.get(), vr.audio_data.data(),
-								  static_cast<opus_int32>(vr.audio_data.size() & 0x7FFFFFFF), pcm, 5760, 0);
+					if (samples = opus_decode(d.decoder.get(), opus_packet,
+							   static_cast<opus_int32>(opus_packet_len & 0x7FFFFFFF), flush_data_pcm, 5760, 0);
 						samples >= 0) {
-						vr.reassign(&client, d.user_id, reinterpret_cast<uint8_t *>(pcm),
-							    samples * opus_channel_count * sizeof(opus_int16));
+						vr.reassign(&client, d.user_id, reinterpret_cast<uint8_t *>(flush_data_pcm),
+									samples * opus_channel_count * sizeof(opus_int16));
 						client.end_gain = 1.0f / client.moving_average;
-						park_count = audio_mix(client, *client.mixer, pcm_mix, pcm, park_count, samples, max_samples);
+						park_count = audio_mix(client, *client.mixer, pcm_mix, flush_data_pcm, park_count, samples, max_samples);
 						client.creator->on_voice_receive.call(vr);
 					}
 
@@ -161,7 +285,6 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour
 				}
 			}
 		}
-
 		/* If combined receive is bound, dispatch it */
 		if (park_count) {
 
@@ -178,7 +301,7 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour
 			}
 
 			voice_receive_t vr(nullptr, "", &client, 0, reinterpret_cast<uint8_t *>(pcm_downsample),
-					   max_samples * opus_channel_count * sizeof(opus_int16));
+					  max_samples * opus_channel_count * sizeof(opus_int16));
 
 			client.creator->on_voice_receive_combined.call(vr);
 		}
diff --git a/src/dpp/voice/enabled/read_ready.cpp b/src/dpp/voice/enabled/read_ready.cpp
index 95ac598160..d90ad00587 100644
--- a/src/dpp/voice/enabled/read_ready.cpp
+++ b/src/dpp/voice/enabled/read_ready.cpp
@@ -20,6 +20,7 @@
  *
  ************************************************************************************/
 
+#include <chrono>
 #include <string_view>
 #include <dpp/exception.h>
 #include <dpp/isa_detection.h>
@@ -65,9 +66,12 @@ void discord_voice_client::read_ready()
 		return;
 	}
 
+	auto start = std::chrono::steady_clock::now();
+	std::cout << "read_ready START: " << std::chrono::duration_cast<std::chrono::milliseconds>(start.time_since_epoch()) .count() << "\n";
+
 	voice_payload vp{0, // seq, populate later
-			 0, // timestamp, populate later
-			 std::make_unique<voice_receive_t>(nullptr, std::string(reinterpret_cast<char*>(buffer), packet_size))};
+		0, // timestamp, populate later
+		std::make_unique<voice_receive_t>(nullptr, std::string(reinterpret_cast<char*>(buffer), packet_size))};
 
 	vp.vr->voice_client = this;
 
@@ -86,90 +90,11 @@ void discord_voice_client::read_ready()
 	std::memcpy(&vp.timestamp, &buffer[4], sizeof(rtp_timestamp_t));
 	vp.timestamp = ntohl(vp.timestamp);
 
-	constexpr size_t nonce_size = sizeof(uint32_t);
-	/* Nonce is 4 byte at the end of payload with zero padding */
-	uint8_t nonce[24] = { 0 };
-	std::memcpy(nonce, buffer + packet_size - nonce_size, nonce_size);
-
-	/* Get the number of CSRC in header */
-	const size_t csrc_count = buffer[0] & 0b0000'1111;
-	/* Skip to the encrypted voice data */
-	const ptrdiff_t offset_to_data = header_size + sizeof(uint32_t) * csrc_count;
-	size_t total_header_len = offset_to_data;
-
-	uint8_t* ciphertext = buffer + offset_to_data;
-	size_t ciphertext_len = packet_size - offset_to_data - nonce_size;
-
-	size_t ext_len = 0;
-	if ([[maybe_unused]] const bool uses_extension = (buffer[0] >> 4) & 0b0001) {
-		/**
-		 * Get the RTP Extensions size, we only get the size here because
-		 * the extension itself is encrypted along with the opus packet
-		 */
-		{
-			uint16_t ext_len_in_words;
-			memcpy(&ext_len_in_words, &ciphertext[2], sizeof(uint16_t));
-			ext_len_in_words = ntohs(ext_len_in_words);
-			ext_len = sizeof(uint32_t) * ext_len_in_words;
-		}
-		constexpr size_t ext_header_len = sizeof(uint16_t) * 2;
-		ciphertext += ext_header_len;
-		ciphertext_len -= ext_header_len;
-		total_header_len += ext_header_len;
-	}
-
-	uint8_t decrypted[65535] = { 0 };
-	unsigned long long opus_packet_len  = 0;
-	if (ssl_crypto_aead_xchacha20poly1305_ietf_decrypt(
-		decrypted, &opus_packet_len,
-		nullptr,
-		ciphertext, ciphertext_len,
-		buffer,
-		/**
-		 * Additional Data:
-		 * The whole header (including csrc list) +
-		 * 4 byte extension header (magic 0xBEDE + 16-bit denoting extension length)
-		 */
-		total_header_len,
-		nonce, secret_key.data()) != 0) {
-		/* Invalid Discord RTP payload. */
-		return;
-	}
-
-	uint8_t *opus_packet = decrypted;
-	if (ext_len > 0) {
-		/* Skip previously encrypted RTP Header Extension */
-		opus_packet += ext_len;
-		opus_packet_len -= ext_len;
-	}
-
-	/**
-	 * If DAVE is enabled, use the user's ratchet to decrypt the OPUS audio data
-	 */
-	std::vector<uint8_t> frame;
-	if (is_end_to_end_encrypted()) {
-		auto decryptor = mls_state->decryptors.find(vp.vr->user_id);
-		if (decryptor != mls_state->decryptors.end()) {
-			frame.resize(decryptor->second->get_max_plaintext_byte_size(dave::media_type::media_audio, opus_packet_len));
-			size_t enc_len = decryptor->second->decrypt(
-				dave::media_type::media_audio,
-				dave::make_array_view<const uint8_t>(opus_packet, opus_packet_len),
-				dave::make_array_view(frame)
-			);
-			if (enc_len > 0) {
-				opus_packet = frame.data();
-				opus_packet_len = enc_len;
-			}
-		}
-	}
-
-	/*
-	 * We're left with the decrypted, opus-encoded data.
-	 * Park the payload and decode on the voice courier thread.
-	 */
-	vp.vr->audio_data.assign(opus_packet, opus_packet + opus_packet_len);
+	vp.vr->audio_data.assign(buffer, buffer + packet_size);
 
 	{
+		std::cout << "voice_courier_shared_state.mtx LOCK\n";
+
 		std::lock_guard lk(voice_courier_shared_state.mtx);
 		auto& [range, payload_queue, pending_decoder_ctls, decoder] = voice_courier_shared_state.parked_voice_payloads[vp.vr->user_id];
 
@@ -183,7 +108,7 @@ void discord_voice_client::read_ready()
 
 			int opus_error = 0;
 			decoder.reset(opus_decoder_create(opus_sample_rate_hz, opus_channel_count, &opus_error),
-				      &opus_decoder_destroy);
+				 &opus_decoder_destroy);
 			if (opus_error) {
 				/**
 				 * NOTE: The -10 here makes the opus_error match up with values of exception_error_code,
@@ -195,20 +120,32 @@ void discord_voice_client::read_ready()
 
 		if (vp.seq < range.min_seq && vp.timestamp < range.min_timestamp) {
 			/* This packet arrived too late. We can only discard it. */
+
+			std::cout << "voice_courier_shared_state.mtx RELEASE\n";
+
 			return;
 		}
 		range.max_seq = vp.seq;
 		range.max_timestamp = vp.timestamp;
 		payload_queue.push(std::move(vp));
+
+		std::cout << "voice_courier_shared_state.mtx RELEASE\n";
 	}
 
+	std::cout << "read_ready NOTIFY_ONE\n";
+
 	voice_courier_shared_state.signal_iteration.notify_one();
 
+	auto end = std::chrono::steady_clock::now();
+	std::cout << "read_ready END: " << std::chrono::duration_cast<std::chrono::milliseconds>(start.time_since_epoch()).count() << "\n";
+
+	std::cout << "read_ready TIME: " << std::chrono::duration_cast<std::chrono::milliseconds>((end - start)).count() << "\n";
+
 	if (!voice_courier.joinable()) {
 		/* Courier thread is not running, start it */
 		voice_courier = std::thread(&voice_courier_loop,
-					    std::ref(*this),
-					    std::ref(voice_courier_shared_state));
+							  std::ref(*this),
+							  std::ref(voice_courier_shared_state));
 	}
 }
 

From 72801c3cfb69726c2762a30a41723f12779e6ef7 Mon Sep 17 00:00:00 2001
From: Neko-Life <nekolife123579@gmail.com>
Date: Sun, 20 Oct 2024 10:11:01 +0700
Subject: [PATCH 07/10] chore: code cleanups

---
 src/dpp/voice/enabled/courier_loop.cpp | 92 +++++++++++---------------
 src/dpp/voice/enabled/read_ready.cpp   | 17 -----
 2 files changed, 37 insertions(+), 72 deletions(-)

diff --git a/src/dpp/voice/enabled/courier_loop.cpp b/src/dpp/voice/enabled/courier_loop.cpp
index f9dab53561..8ba0c0d187 100644
--- a/src/dpp/voice/enabled/courier_loop.cpp
+++ b/src/dpp/voice/enabled/courier_loop.cpp
@@ -33,32 +33,12 @@
 
 namespace dpp {
 
-struct iter_bench {
-	std::chrono::time_point<std::chrono::steady_clock> start;
-	std::string n;
-
-	iter_bench(std::string _n) : n(std::move(_n)) {
-		start = std::chrono::steady_clock::now();
-		std::cout << n << "START: " << std::chrono::duration_cast<std::chrono::milliseconds>(start.time_since_epoch()).count() << "\n";
-	}
-
-	~iter_bench() {
-		auto end = std::chrono::steady_clock::now();
-		std::cout << n << "END: " << std::chrono::duration_cast<std::chrono::milliseconds>(end.time_since_epoch()).count() << "\n";
-		std::cout << n << "BENCH: " << std::chrono::duration_cast<std::chrono::milliseconds>(end - start).count() << "\n";
-	}
-};
-
 void discord_voice_client::voice_courier_loop(discord_voice_client& client, courier_shared_state_t& shared_state) {
 	utility::set_thread_name(std::string("vcourier/") + std::to_string(client.server_id));
-	size_t iter = 0;
-	while (true) {
-		std::cout << "voice_courier_loop ITER: " << ++iter << "\n";
 
+	while (true) {
 		std::this_thread::sleep_for(std::chrono::milliseconds{client.iteration_interval});
 
-		iter_bench b("voice_courier_loop ITER_");
-
 		struct flush_data_t {
 			snowflake user_id;
 			rtp_seq_t min_seq;
@@ -73,9 +53,6 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour
 		 * release the lock as soon as possible.
 		 */
 		{
-			std::cout << "shared_state.mtx LOCK\n";
-			iter_bench c("voice_courier_loop FLUSH_DATA_");
-
 			std::unique_lock lk(shared_state.mtx);
 
 			/* mitigates vector resizing while holding the mutex */
@@ -84,13 +61,17 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour
 			bool has_payload_to_deliver = false;
 			for (auto &[user_id, parking_lot]: shared_state.parked_voice_payloads) {
 				has_payload_to_deliver = has_payload_to_deliver || !parking_lot.parked_payloads.empty();
-				flush_data.push_back(flush_data_t{user_id,
+
+				flush_data.push_back(flush_data_t{
+					user_id,
 					parking_lot.range.min_seq,
 					std::move(parking_lot.parked_payloads),
 					/* Quickly check if we already have a decoder and only take the pending ctls if so. */
 					parking_lot.decoder ? std::move(parking_lot.pending_decoder_ctls)
 					: decltype(parking_lot.pending_decoder_ctls){},
-					parking_lot.decoder});
+					parking_lot.decoder
+				});
+
 				parking_lot.range.min_seq = parking_lot.range.max_seq + 1;
 				parking_lot.range.min_timestamp = parking_lot.range.max_timestamp + 1;
 			}
@@ -98,12 +79,10 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour
 			if (!has_payload_to_deliver) {
 				if (shared_state.terminating) {
 					/* We have delivered all data to handlers. Terminate now. */
-					std::cout << "shared_state.mtx RELEASE\n";
 					break;
 				}
 
 				shared_state.signal_iteration.wait(lk, [&shared_state](){
-					std::cout << "shared_state.mtx LOCKED\n";
 					if (shared_state.terminating) {
 						return true;
 					}
@@ -116,14 +95,10 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour
 						if (parking_lot.parked_payloads.empty()) {
 							continue;
 						}
-
-						std::cout << "shared_state.mtx RELEASE\n";
 						return true;
 					}
-					std::cout << "shared_state.mtx RELEASE\n";
 					return false;
 				});
-				std::cout << "shared_state.mtx RELEASE\n";
 
 				/*
 				 * More data came or about to terminate, or just a spurious wake.
@@ -131,7 +106,6 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour
 				 */
 				continue;
 			}
-			std::cout << "shared_state.mtx RELEASE\n";
 		}
 
 		if (client.creator->on_voice_receive.empty() && client.creator->on_voice_receive_combined.empty()) {
@@ -144,7 +118,7 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour
 
 		/* This 32 bit PCM audio buffer is an upmixed version of the streams
 		 * combined for all users. This is a wider width audio buffer so that
-		  * there is no clipping when there are many loud audio sources at once.
+		 * there is no clipping when there are many loud audio sources at once.
 		 */
 		opus_int32 pcm_mix[23040] = {0};
 		size_t park_count = 0;
@@ -153,7 +127,6 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour
 
 		opus_int16 flush_data_pcm[23040];
 		for (auto &d: flush_data) {
-			iter_bench c("voice_courier_loop FLUSH_LOOP_");
 			if (!d.decoder) {
 				continue;
 			}
@@ -162,34 +135,38 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour
 			}
 
 			for (rtp_seq_t seq = d.min_seq; !d.parked_payloads.empty(); ++seq) {
-				iter_bench e("voice_courier_loop SEQ_LOOP_");
-				std::cout << "TOP_SEQ: " << d.parked_payloads.top().seq << "\nCURRENT_SEQ: "<< seq << "\n";
-
 				if (d.parked_payloads.top().seq != seq) {
 					/*
 					 * Lost a packet with sequence number "seq",
 					 * But Opus decoder might be able to guess something.
 					 */
-					if (int samples = opus_decode(d.decoder.get(), nullptr, 0, flush_data_pcm, 5760, 0);
-						samples >= 0) {
+					if (int lost_packet_samples = opus_decode(d.decoder.get(), nullptr, 0, flush_data_pcm, 5760, 0);
+						lost_packet_samples >= 0) {
 						/*
 						 * Since this sample comes from a lost packet,
 						 * we can only pretend there is an event, without any raw payload byte.
 						 */
-						voice_receive_t vr(nullptr, "", &client, d.user_id, reinterpret_cast<uint8_t *>(flush_data_pcm),
-						 samples * opus_channel_count * sizeof(opus_int16));
+						voice_receive_t vr(nullptr, "", &client, d.user_id,
+						 reinterpret_cast<uint8_t *>(flush_data_pcm),
+						 lost_packet_samples * opus_channel_count * sizeof(opus_int16));
 
-						park_count = audio_mix(client, *client.mixer, pcm_mix, flush_data_pcm, park_count, samples, max_samples);
+						park_count = audio_mix(client, *client.mixer, pcm_mix, flush_data_pcm, park_count, lost_packet_samples, max_samples);
 						client.creator->on_voice_receive.call(vr);
 					}
 				} else {
 					voice_receive_t &vr = *d.parked_payloads.top().vr;
 
-					/* We do decryption here to avoid blocking ssl_client from receiving more audio data */
+					/* 
+					 * We do decryption here to avoid blocking ssl_client and saving cpu time by doing it when needed only.
+					 *
+					 * NOTE: You do not want to send audio while also listening for on_voice_receive/on_voice_receive_combined.
+					 * It will cause gaps in your recording, I have no idea why exactly.
+					 */
+
 					constexpr size_t header_size = 12;
 
 					uint8_t *buffer = vr.audio_data.data();
-					int packet_size = vr.audio_data.size();
+					size_t packet_size = vr.audio_data.size();
 
 					constexpr size_t nonce_size = sizeof(uint32_t);
 					/* Nonce is 4 byte at the end of payload with zero padding */
@@ -251,18 +228,21 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour
 					/**
 					 * If DAVE is enabled, use the user's ratchet to decrypt the OPUS audio data
 					 */
-					std::vector<uint8_t> frame;
+					std::vector<uint8_t> decrypted_dave_frame;
 					if (vr.voice_client->is_end_to_end_encrypted()) {
 						auto decryptor = vr.voice_client->mls_state->decryptors.find(vr.user_id);
+
 						if (decryptor != vr.voice_client->mls_state->decryptors.end()) {
-							frame.resize(decryptor->second->get_max_plaintext_byte_size(dave::media_type::media_audio, opus_packet_len));
+							decrypted_dave_frame.resize(decryptor->second->get_max_plaintext_byte_size(dave::media_type::media_audio, opus_packet_len));
+
 							size_t enc_len = decryptor->second->decrypt(
 								dave::media_type::media_audio,
 								dave::make_array_view<const uint8_t>(opus_packet, opus_packet_len),
-								dave::make_array_view(frame)
+								dave::make_array_view(decrypted_dave_frame)
 							);
+
 							if (enc_len > 0) {
-								opus_packet = frame.data();
+								opus_packet = decrypted_dave_frame.data();
 								opus_packet_len = enc_len;
 							}
 						}
@@ -271,13 +251,15 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour
 					if (opus_packet_len > 0x7FFFFFFF) {
 						throw dpp::length_exception(err_massive_audio, "audio_data > 2GB! This should never happen!");
 					}
-					if (samples = opus_decode(d.decoder.get(), opus_packet,
-							   static_cast<opus_int32>(opus_packet_len & 0x7FFFFFFF), flush_data_pcm, 5760, 0);
-						samples >= 0) {
-						vr.reassign(&client, d.user_id, reinterpret_cast<uint8_t *>(flush_data_pcm),
-									samples * opus_channel_count * sizeof(opus_int16));
+
+					samples = opus_decode(d.decoder.get(), opus_packet, static_cast<opus_int32>(opus_packet_len & 0x7FFFFFFF), flush_data_pcm, 5760, 0);
+
+					if (samples >= 0) {
+						vr.reassign(&client, d.user_id, reinterpret_cast<uint8_t *>(flush_data_pcm), samples * opus_channel_count * sizeof(opus_int16));
+
 						client.end_gain = 1.0f / client.moving_average;
 						park_count = audio_mix(client, *client.mixer, pcm_mix, flush_data_pcm, park_count, samples, max_samples);
+
 						client.creator->on_voice_receive.call(vr);
 					}
 
@@ -287,12 +269,12 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour
 		}
 		/* If combined receive is bound, dispatch it */
 		if (park_count) {
-
 			/* Downsample the 32 bit samples back to 16 bit */
 			opus_int16 pcm_downsample[23040] = {0};
 			opus_int16 *pcm_downsample_ptr = pcm_downsample;
 			opus_int32 *pcm_mix_ptr = pcm_mix;
 			client.increment = (client.end_gain - client.current_gain) / static_cast<float>(samples);
+
 			for (int64_t x = 0; x < (samples * opus_channel_count) / client.mixer->byte_blocks_per_register; ++x) {
 				client.mixer->collect_single_register(pcm_mix_ptr, pcm_downsample_ptr, client.current_gain, client.increment);
 				client.current_gain += client.increment * static_cast<float>(client.mixer->byte_blocks_per_register);
diff --git a/src/dpp/voice/enabled/read_ready.cpp b/src/dpp/voice/enabled/read_ready.cpp
index d90ad00587..f6b49e2da0 100644
--- a/src/dpp/voice/enabled/read_ready.cpp
+++ b/src/dpp/voice/enabled/read_ready.cpp
@@ -66,9 +66,6 @@ void discord_voice_client::read_ready()
 		return;
 	}
 
-	auto start = std::chrono::steady_clock::now();
-	std::cout << "read_ready START: " << std::chrono::duration_cast<std::chrono::milliseconds>(start.time_since_epoch()) .count() << "\n";
-
 	voice_payload vp{0, // seq, populate later
 		0, // timestamp, populate later
 		std::make_unique<voice_receive_t>(nullptr, std::string(reinterpret_cast<char*>(buffer), packet_size))};
@@ -93,8 +90,6 @@ void discord_voice_client::read_ready()
 	vp.vr->audio_data.assign(buffer, buffer + packet_size);
 
 	{
-		std::cout << "voice_courier_shared_state.mtx LOCK\n";
-
 		std::lock_guard lk(voice_courier_shared_state.mtx);
 		auto& [range, payload_queue, pending_decoder_ctls, decoder] = voice_courier_shared_state.parked_voice_payloads[vp.vr->user_id];
 
@@ -120,27 +115,15 @@ void discord_voice_client::read_ready()
 
 		if (vp.seq < range.min_seq && vp.timestamp < range.min_timestamp) {
 			/* This packet arrived too late. We can only discard it. */
-
-			std::cout << "voice_courier_shared_state.mtx RELEASE\n";
-
 			return;
 		}
 		range.max_seq = vp.seq;
 		range.max_timestamp = vp.timestamp;
 		payload_queue.push(std::move(vp));
-
-		std::cout << "voice_courier_shared_state.mtx RELEASE\n";
 	}
 
-	std::cout << "read_ready NOTIFY_ONE\n";
-
 	voice_courier_shared_state.signal_iteration.notify_one();
 
-	auto end = std::chrono::steady_clock::now();
-	std::cout << "read_ready END: " << std::chrono::duration_cast<std::chrono::milliseconds>(start.time_since_epoch()).count() << "\n";
-
-	std::cout << "read_ready TIME: " << std::chrono::duration_cast<std::chrono::milliseconds>((end - start)).count() << "\n";
-
 	if (!voice_courier.joinable()) {
 		/* Courier thread is not running, start it */
 		voice_courier = std::thread(&voice_courier_loop,

From 22521c5c964081dc58c443657a291337b4624aee Mon Sep 17 00:00:00 2001
From: Neko-Life <nekolife123579@gmail.com>
Date: Sun, 20 Oct 2024 10:27:05 +0700
Subject: [PATCH 08/10] fix: fucked indentation

---
 src/dpp/discordvoiceclient.cpp         |  1 -
 src/dpp/voice/enabled/courier_loop.cpp |  6 +++---
 src/dpp/voice/enabled/read_ready.cpp   | 10 +++++-----
 3 files changed, 8 insertions(+), 9 deletions(-)

diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp
index d63cccc6a6..66fe8d3550 100644
--- a/src/dpp/discordvoiceclient.cpp
+++ b/src/dpp/discordvoiceclient.cpp
@@ -416,7 +416,6 @@ discord_voice_client& discord_voice_client::set_send_audio_type(send_audio_type_
 
 discord_voice_client& discord_voice_client::speak() {
 	if (!this->sending) {
-		std::cout << "Sending voice_opcode_client_speaking\n";
 		this->queue_message(json({
 		{"op", voice_opcode_client_speaking},
 		{"d", {
diff --git a/src/dpp/voice/enabled/courier_loop.cpp b/src/dpp/voice/enabled/courier_loop.cpp
index 8ba0c0d187..72ccccdbfc 100644
--- a/src/dpp/voice/enabled/courier_loop.cpp
+++ b/src/dpp/voice/enabled/courier_loop.cpp
@@ -147,8 +147,8 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour
 						 * we can only pretend there is an event, without any raw payload byte.
 						 */
 						voice_receive_t vr(nullptr, "", &client, d.user_id,
-						 reinterpret_cast<uint8_t *>(flush_data_pcm),
-						 lost_packet_samples * opus_channel_count * sizeof(opus_int16));
+						                   reinterpret_cast<uint8_t *>(flush_data_pcm),
+						                   lost_packet_samples * opus_channel_count * sizeof(opus_int16));
 
 						park_count = audio_mix(client, *client.mixer, pcm_mix, flush_data_pcm, park_count, lost_packet_samples, max_samples);
 						client.creator->on_voice_receive.call(vr);
@@ -283,7 +283,7 @@ void discord_voice_client::voice_courier_loop(discord_voice_client& client, cour
 			}
 
 			voice_receive_t vr(nullptr, "", &client, 0, reinterpret_cast<uint8_t *>(pcm_downsample),
-					  max_samples * opus_channel_count * sizeof(opus_int16));
+			                   max_samples * opus_channel_count * sizeof(opus_int16));
 
 			client.creator->on_voice_receive_combined.call(vr);
 		}
diff --git a/src/dpp/voice/enabled/read_ready.cpp b/src/dpp/voice/enabled/read_ready.cpp
index f6b49e2da0..03c79b0486 100644
--- a/src/dpp/voice/enabled/read_ready.cpp
+++ b/src/dpp/voice/enabled/read_ready.cpp
@@ -67,8 +67,8 @@ void discord_voice_client::read_ready()
 	}
 
 	voice_payload vp{0, // seq, populate later
-		0, // timestamp, populate later
-		std::make_unique<voice_receive_t>(nullptr, std::string(reinterpret_cast<char*>(buffer), packet_size))};
+	                 0, // timestamp, populate later
+	                 std::make_unique<voice_receive_t>(nullptr, std::string(reinterpret_cast<char*>(buffer), packet_size))};
 
 	vp.vr->voice_client = this;
 
@@ -103,7 +103,7 @@ void discord_voice_client::read_ready()
 
 			int opus_error = 0;
 			decoder.reset(opus_decoder_create(opus_sample_rate_hz, opus_channel_count, &opus_error),
-				 &opus_decoder_destroy);
+			              &opus_decoder_destroy);
 			if (opus_error) {
 				/**
 				 * NOTE: The -10 here makes the opus_error match up with values of exception_error_code,
@@ -127,8 +127,8 @@ void discord_voice_client::read_ready()
 	if (!voice_courier.joinable()) {
 		/* Courier thread is not running, start it */
 		voice_courier = std::thread(&voice_courier_loop,
-							  std::ref(*this),
-							  std::ref(voice_courier_shared_state));
+		                            std::ref(*this),
+		                            std::ref(voice_courier_shared_state));
 	}
 }
 

From 07474789f4158afb09affbe01bc69f9aa1b0f689 Mon Sep 17 00:00:00 2001
From: Neko-Life <nekolife123579@gmail.com>
Date: Sun, 20 Oct 2024 10:33:14 +0700
Subject: [PATCH 09/10] fix: only on pause

---
 include/dpp/discordvoiceclient.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/dpp/discordvoiceclient.h b/include/dpp/discordvoiceclient.h
index 3a5f457938..48401a598d 100644
--- a/include/dpp/discordvoiceclient.h
+++ b/include/dpp/discordvoiceclient.h
@@ -440,7 +440,7 @@ class DPP_EXPORT discord_voice_client : public websocket_client
 	bool paused;
 
 	/**
-	 * @brief Whether has sent 5 frame of silence before stopping on pause/stop.
+	 * @brief Whether has sent 5 frame of silence before stopping on pause.
 	 *
 	 * This is to avoid unintended Opus interpolation with subsequent transmissions.
 	 */

From d906a572d37bb8922b00d5e7db48b54f2074f0ed Mon Sep 17 00:00:00 2001
From: Neko-Life <nekolife123579@gmail.com>
Date: Sun, 20 Oct 2024 12:52:42 +0700
Subject: [PATCH 10/10] fix: deadlock

---
 include/dpp/discordvoiceclient.h |  2 ++
 src/dpp/discordvoiceclient.cpp   | 10 ++++++----
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/include/dpp/discordvoiceclient.h b/include/dpp/discordvoiceclient.h
index 48401a598d..1330b7e0b2 100644
--- a/include/dpp/discordvoiceclient.h
+++ b/include/dpp/discordvoiceclient.h
@@ -1035,6 +1035,8 @@ class DPP_EXPORT discord_voice_client : public websocket_client
 	 *
 	 * @param send_now send this packet right away without buffering.
 	 * Do NOT set send_now to true outside write_ready.
+	 * Also make sure you're not locking stream_mutex if you
+	 * don't set send_now to true.
 	 * 
 	 * @return discord_voice_client& Reference to self
 	 * @throw dpp::voice_exception if voice support is not compiled into D++
diff --git a/src/dpp/discordvoiceclient.cpp b/src/dpp/discordvoiceclient.cpp
index 66fe8d3550..4a4f484968 100644
--- a/src/dpp/discordvoiceclient.cpp
+++ b/src/dpp/discordvoiceclient.cpp
@@ -176,10 +176,12 @@ dpp::utility::uptime discord_voice_client::get_remaining() {
 }
 
 discord_voice_client& discord_voice_client::stop_audio() {
-	std::lock_guard<std::mutex> lock(this->stream_mutex);
-	outbuf.clear();
-	track_meta.clear();
-	tracks = 0;
+	{
+		std::lock_guard<std::mutex> lock(this->stream_mutex);
+		outbuf.clear();
+		track_meta.clear();
+		tracks = 0;
+	}
 	this->send_stop_frames();
 	return *this;
 }