From cc3f80aed25f0db8abdb0a7f957c84874d1acc27 Mon Sep 17 00:00:00 2001 From: Will Miles Date: Sun, 27 Jul 2025 16:01:46 -0400 Subject: [PATCH 1/4] Check event queue on null close When calling _tcp_close_api, guarantee that no events for that client remain in the queue, even if the pcb was shut down. This fixes a race where an error callback is processed by the LwIP thread, invalidating the pcb*, after the async thread has dequeued an event that will lead to a close (such as a fin or final ack) but before close() is called. --- src/AsyncTCP.cpp | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/src/AsyncTCP.cpp b/src/AsyncTCP.cpp index e0408f6..ced5961 100644 --- a/src/AsyncTCP.cpp +++ b/src/AsyncTCP.cpp @@ -615,11 +615,17 @@ static esp_err_t _tcp_recved(tcp_pcb **pcb, size_t len) { } static err_t _tcp_close_api(struct tcpip_api_call_data *api_call_msg) { + // Unlike the other calls, this is not a direct wrapper of the LwIP function; + // we perform the AsyncClient teardown interlocked safely with the LwIP task. + + // As a postcondition, the queue must not have any events referencing + // this AsyncClient. This is because it is possible for an error event + // to have been queued, clearing the pcb*, but after the async thread + // has committed to closing/destructing the AsyncClient object. + tcp_api_call_t *msg = (tcp_api_call_t *)api_call_msg; msg->err = ERR_CONN; if (*msg->pcb) { - // Unlike the other calls, this is not a direct wrapper of the LwIP function; - // we perform the AsyncClient teardown interlocked safely with the LwIP task. tcp_pcb *pcb = *msg->pcb; _reset_tcp_callbacks(pcb, msg->close); msg->err = tcp_close(pcb); @@ -627,6 +633,9 @@ static err_t _tcp_close_api(struct tcpip_api_call_data *api_call_msg) { tcp_abort(pcb); } *msg->pcb = nullptr; // PCB is now the property of LwIP + } else { + // Ensure there is not an error event queued for this client + _remove_events_for_client(msg->close); } return msg->err; } From f2ec361091dd04a85df8b486490a33af39982288 Mon Sep 17 00:00:00 2001 From: Will Miles Date: Tue, 29 Jul 2025 21:50:56 -0400 Subject: [PATCH 2/4] Additional close semantic fixes When close is invoked, make sure to interlock with the LwIP task and clear the queue; checks of pcb are not safe as it is owned by the LwIP task. Extend this checking to include abort(), as well. --- src/AsyncTCP.cpp | 40 ++++++++++++++++++++++------------------ 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/src/AsyncTCP.cpp b/src/AsyncTCP.cpp index ced5961..ac4194d 100644 --- a/src/AsyncTCP.cpp +++ b/src/AsyncTCP.cpp @@ -619,19 +619,20 @@ static err_t _tcp_close_api(struct tcpip_api_call_data *api_call_msg) { // we perform the AsyncClient teardown interlocked safely with the LwIP task. // As a postcondition, the queue must not have any events referencing - // this AsyncClient. This is because it is possible for an error event - // to have been queued, clearing the pcb*, but after the async thread - // has committed to closing/destructing the AsyncClient object. + // the AsyncClient in api_call_msg->close. This is because it is possible for + // an error event to have been queued, clearing the pcb*, but after the async + // thread has committed to closing/destructing the AsyncClient object. tcp_api_call_t *msg = (tcp_api_call_t *)api_call_msg; msg->err = ERR_CONN; if (*msg->pcb) { tcp_pcb *pcb = *msg->pcb; _reset_tcp_callbacks(pcb, msg->close); - msg->err = tcp_close(pcb); - if (msg->err != ERR_OK) { + if (tcp_close(pcb) != ERR_OK) { + // We do not permit failure here: abandon the pcb anyways. tcp_abort(pcb); } + msg->err = ERR_OK; *msg->pcb = nullptr; // PCB is now the property of LwIP } else { // Ensure there is not an error event queued for this client @@ -641,9 +642,6 @@ static err_t _tcp_close_api(struct tcpip_api_call_data *api_call_msg) { } static esp_err_t _tcp_close(tcp_pcb **pcb, AsyncClient *client) { - if (!pcb || !*pcb) { - return ERR_CONN; - } tcp_api_call_t msg; msg.pcb = pcb; msg.close = client; @@ -652,21 +650,29 @@ static esp_err_t _tcp_close(tcp_pcb **pcb, AsyncClient *client) { } static err_t _tcp_abort_api(struct tcpip_api_call_data *api_call_msg) { + // Like close(), we must ensure that the queue is cleared tcp_api_call_t *msg = (tcp_api_call_t *)api_call_msg; msg->err = ERR_CONN; if (*msg->pcb) { - tcp_abort(*msg->pcb); + tcp_pcb *pcb = *msg->pcb; + _reset_tcp_callbacks(pcb, msg->close); + tcp_abort(pcb); *msg->pcb = nullptr; // PCB is now the property of LwIP + msg->err = ERR_OK; + } else { + // Ensure there is not an error event queued for this client + _remove_events_for_client(msg->close); } return msg->err; } -static esp_err_t _tcp_abort(tcp_pcb **pcb) { +static esp_err_t _tcp_abort(tcp_pcb **pcb, AsyncClient *client) { if (!pcb || !*pcb) { return ERR_CONN; } tcp_api_call_t msg; msg.pcb = pcb; + msg.close = client; tcpip_api_call(_tcp_abort_api, (struct tcpip_api_call_data *)&msg); return msg.err; } @@ -912,7 +918,7 @@ void AsyncClient::close(bool now) { int8_t AsyncClient::abort() { if (_pcb) { - _tcp_abort(&_pcb); + _tcp_abort(&_pcb, this); // _pcb is now NULL } return ERR_ABRT; @@ -977,13 +983,11 @@ void AsyncClient::ackPacket(struct pbuf *pb) { int8_t AsyncClient::_close() { // ets_printf("X: 0x%08x\n", (uint32_t)this); - int8_t err = ERR_OK; - if (_pcb) { - _tcp_close(&_pcb, this); - // _pcb is now NULL - if (_discard_cb) { - _discard_cb(_discard_cb_arg, this); - } + int8_t err = _tcp_close(&_pcb, this); + // _pcb is now NULL + if ((err == ERR_OK) && _discard_cb) { + // _pcb was closed here + _discard_cb(_discard_cb_arg, this); } return err; } From 97c6772f6b5996d8f173ac2d2568d7400daf697c Mon Sep 17 00:00:00 2001 From: Will Miles Date: Tue, 29 Jul 2025 21:55:26 -0400 Subject: [PATCH 3/4] Fix use-after-free in tcp_error When tcp_error is called, the pcb has already been freed by LwIP. Remove the attempt to write through the invalidated pointer. --- src/AsyncTCP.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/AsyncTCP.cpp b/src/AsyncTCP.cpp index ac4194d..8d619bd 100644 --- a/src/AsyncTCP.cpp +++ b/src/AsyncTCP.cpp @@ -484,7 +484,8 @@ void AsyncTCP_detail::tcp_error(void *arg, int8_t err) { // ets_printf("+E: 0x%08x\n", arg); AsyncClient *client = reinterpret_cast(arg); if (client && client->_pcb) { - _reset_tcp_callbacks(client->_pcb, client); + // The pcb has already been freed by LwIP; do not attempt to clear the callbacks! + _remove_events_for_client(client); client->_pcb = nullptr; } From f73621987766f845a45c87ce0273e7c483667800 Mon Sep 17 00:00:00 2001 From: Will Miles Date: Tue, 29 Jul 2025 22:39:16 -0400 Subject: [PATCH 4/4] Ensure dispose is run in close if needed If close is being run when the pcb is nulled, and we find an event on the queue, run dispose. This catches cases where fin or error is pending. --- src/AsyncTCP.cpp | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/AsyncTCP.cpp b/src/AsyncTCP.cpp index 8d619bd..e373927 100644 --- a/src/AsyncTCP.cpp +++ b/src/AsyncTCP.cpp @@ -253,7 +253,7 @@ static inline lwip_tcp_event_packet_t *_get_async_event() { } } -static void _remove_events_for_client(AsyncClient *client) { +static size_t _remove_events_for_client(AsyncClient *client) { lwip_tcp_event_packet_t *removed_event_chain; { queue_mutex_guard guard; @@ -269,6 +269,7 @@ static void _remove_events_for_client(AsyncClient *client) { removed_event_chain = t->next; _free_event(t); } + return count; }; void AsyncTCP_detail::handle_async_event(lwip_tcp_event_packet_t *e) { @@ -637,7 +638,9 @@ static err_t _tcp_close_api(struct tcpip_api_call_data *api_call_msg) { *msg->pcb = nullptr; // PCB is now the property of LwIP } else { // Ensure there is not an error event queued for this client - _remove_events_for_client(msg->close); + if (_remove_events_for_client(msg->close)) { + msg->err = ERR_OK; // dispose needs to be run + } } return msg->err; }