From aba57bbc07442dd366607487715f4e3659fa3f2f Mon Sep 17 00:00:00 2001
From: Matt Corallo <git@bluematt.me>
Date: Mon, 18 Nov 2024 15:41:52 +0000
Subject: [PATCH 1/8] Make `test_durable_preimages_on_closed_channel` more
 robust

Makes `test_durable_preimages_on_closed_channel` more robust
against changes to the order in which transactions are broadcast.
---
 lightning/src/ln/chanmon_update_fail_tests.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lightning/src/ln/chanmon_update_fail_tests.rs b/lightning/src/ln/chanmon_update_fail_tests.rs
index 5b276b1c2ae..dc67b198149 100644
--- a/lightning/src/ln/chanmon_update_fail_tests.rs
+++ b/lightning/src/ln/chanmon_update_fail_tests.rs
@@ -3288,7 +3288,7 @@ fn do_test_durable_preimages_on_closed_channel(close_chans_before_reload: bool,
 	// Finally, check that B created a payment preimage transaction and close out the payment.
 	let bs_txn = nodes[1].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
 	assert_eq!(bs_txn.len(), if close_chans_before_reload && !close_only_a { 2 } else { 1 });
-	let bs_preimage_tx = &bs_txn[0];
+	let bs_preimage_tx = bs_txn.iter().find(|tx| tx.input[0].previous_output.txid == as_closing_tx[0].compute_txid()).unwrap();
 	check_spends!(bs_preimage_tx, as_closing_tx[0]);
 
 	if !close_chans_before_reload {

From f9374b8377cb880e21357ee6914a2354c82d6156 Mon Sep 17 00:00:00 2001
From: Matt Corallo <git@bluematt.me>
Date: Thu, 14 Nov 2024 14:55:45 +0000
Subject: [PATCH 2/8] When removing `PeerState` check for in-flight mon updates
 deeply

When deciding if we should remove a `PeerState` entry we want to
ensure we don't remove if there are pending updates in
`in_flight_monitor_updates`. Previously this was done with a simple
`in_flight_monitor_updates.is_empty()`, however this can prevent
removal of `PeerState` entries if a channel had an update at some
point (leaving an entry in the map) but the update was ultimately
completed.

Instead, we need to iterate over the entries in
`in_flight_monitor_updates` and decline to remove `PeerState`s only
if there is an entry for a pending update still in-flight.
---
 lightning/src/ln/channelmanager.rs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs
index 86faeac9a68..25539218c0f 100644
--- a/lightning/src/ln/channelmanager.rs
+++ b/lightning/src/ln/channelmanager.rs
@@ -1332,6 +1332,11 @@ impl <SP: Deref> PeerState<SP> where SP::Target: SignerProvider {
 		if require_disconnected && self.is_connected {
 			return false
 		}
+		for (_, updates) in self.in_flight_monitor_updates.iter() {
+			if !updates.is_empty() {
+				return false;
+			}
+		}
 		!self.channel_by_id.iter().any(|(_, phase)|
 			match phase {
 				ChannelPhase::Funded(_) | ChannelPhase::UnfundedOutboundV1(_) => true,
@@ -1341,7 +1346,6 @@ impl <SP: Deref> PeerState<SP> where SP::Target: SignerProvider {
 			}
 		)
 			&& self.monitor_update_blocked_actions.is_empty()
-			&& self.in_flight_monitor_updates.is_empty()
 			&& self.closed_channel_monitor_update_ids.is_empty()
 	}
 

From d33687dbef761fb3f0e7d85110218e2ad30f3727 Mon Sep 17 00:00:00 2001
From: Matt Corallo <git@bluematt.me>
Date: Mon, 18 Nov 2024 15:03:11 +0000
Subject: [PATCH 3/8] Don't generate dup force-close `ChannelMonitorUpdate`s on
 startup

On startup, if we have a channel which was closed immediately
before shutdown such that the `ChannelMonitorUpdate` marking the
channel as closed is still in-flight, it doesn't make sense to
generate a fresh `ChannelMonitorUpdate` marking the channel as
closed immediately after the existing in-flight one.

Here we detect this case and drop the extra update, though its not
all that harmful it does avoid some test changes in the coming
commits.
---
 lightning/src/ln/channelmanager.rs | 32 ++++++++++++++++++++++++++++--
 1 file changed, 30 insertions(+), 2 deletions(-)

diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs
index 25539218c0f..a60a12c4b11 100644
--- a/lightning/src/ln/channelmanager.rs
+++ b/lightning/src/ln/channelmanager.rs
@@ -13599,8 +13599,36 @@ where
 			}
 		}
 
-		// Note that we have to do the above replays before we push new monitor updates.
-		pending_background_events.append(&mut close_background_events);
+		// The newly generated `close_background_events` have to be added after any updates that
+		// were already in-flight on shutdown, so we append them here.
+		pending_background_events.reserve(close_background_events.len());
+		'each_bg_event: for mut new_event in close_background_events {
+			if let BackgroundEvent::MonitorUpdateRegeneratedOnStartup {
+				counterparty_node_id, funding_txo, channel_id, update,
+			} = &mut new_event {
+				debug_assert_eq!(update.updates.len(), 1);
+				debug_assert!(matches!(update.updates[0], ChannelMonitorUpdateStep::ChannelForceClosed { .. }));
+				for pending_event in pending_background_events.iter() {
+					if let BackgroundEvent::MonitorUpdateRegeneratedOnStartup {
+						counterparty_node_id: pending_cp, funding_txo: pending_funding,
+						channel_id: pending_chan_id, update: pending_update,
+					} = pending_event {
+						let for_same_channel = counterparty_node_id == pending_cp
+							&& funding_txo == pending_funding
+							&& channel_id == pending_chan_id;
+						if for_same_channel {
+							if pending_update.updates.iter().any(|upd| matches!(upd, ChannelMonitorUpdateStep::ChannelForceClosed { .. })) {
+								// If the background event we're looking at is just
+								// force-closing the channel which already has a pending
+								// force-close update, no need to duplicate it.
+								continue 'each_bg_event;
+							}
+						}
+					}
+				}
+			}
+			pending_background_events.push(new_event);
+		}
 
 		// If there's any preimages for forwarded HTLCs hanging around in ChannelMonitors we
 		// should ensure we try them again on the inbound edge. We put them here and do so after we

From 385799f6041a0031febbe1c16d554224635e329c Mon Sep 17 00:00:00 2001
From: Matt Corallo <git@bluematt.me>
Date: Mon, 18 Nov 2024 01:09:26 +0000
Subject: [PATCH 4/8] Handle events immediately if we are running during block
 connection

During block connection, we cannot apply `ChannelMonitorUpdate`s if
we're running during the startup sequence (i.e. before the user has
called any methods outside of block connection). We previously
handled this by simply always pushing any `ChannelMonitorUpdate`s
generated during block connection into the
`pending_background_events` queue.

However, this results in `ChannelMonitorUpdate`s going through the
queue when we could just push them immediately. Here we explicitly
check `background_events_processed_since_startup` and use that to
decide whether to push updates through the background queue
instead.
---
 lightning/src/ln/channelmanager.rs | 18 +++++++++++------
 lightning/src/ln/reorg_tests.rs    | 31 ++++++++++++++++++++++--------
 2 files changed, 35 insertions(+), 14 deletions(-)

diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs
index a60a12c4b11..c0713c5e1ae 100644
--- a/lightning/src/ln/channelmanager.rs
+++ b/lightning/src/ln/channelmanager.rs
@@ -9624,8 +9624,8 @@ where
 	}
 
 	/// Handle a list of channel failures during a block_connected or block_disconnected call,
-	/// pushing the channel monitor update (if any) to the background events queue and removing the
-	/// Channel object.
+	/// pushing the channel monitor update (if any) to the background events queue (if we're
+	/// currently in the startup phase) and calling [`Self::finish_close_channel`].
 	fn handle_init_event_channel_failures(&self, mut failed_channels: Vec<ShutdownResult>) {
 		for mut failure in failed_channels.drain(..) {
 			// Either a commitment transactions has been confirmed on-chain or
@@ -9640,10 +9640,16 @@ where
 				if let ChannelMonitorUpdateStep::ChannelForceClosed { should_broadcast } = update.updates[0] {
 					assert!(should_broadcast);
 				} else { unreachable!(); }
-				self.pending_background_events.lock().unwrap().push(
-					BackgroundEvent::MonitorUpdateRegeneratedOnStartup {
-						counterparty_node_id, funding_txo, update, channel_id,
-					});
+				if self.background_events_processed_since_startup.load(Ordering::Acquire) {
+					let res = self.chain_monitor.update_channel(funding_txo, &update);
+					debug_assert_eq!(res, ChannelMonitorUpdateStatus::Completed,
+						"TODO: We don't currently handle failures here, this logic is removed in the next commit");
+				} else {
+					self.pending_background_events.lock().unwrap().push(
+						BackgroundEvent::MonitorUpdateRegeneratedOnStartup {
+							counterparty_node_id, funding_txo, update, channel_id,
+						});
+				}
 			}
 			self.finish_close_channel(failure);
 		}
diff --git a/lightning/src/ln/reorg_tests.rs b/lightning/src/ln/reorg_tests.rs
index c629c5bbce6..b1b4f77c590 100644
--- a/lightning/src/ln/reorg_tests.rs
+++ b/lightning/src/ln/reorg_tests.rs
@@ -268,6 +268,9 @@ fn do_test_unconf_chan(reload_node: bool, reorg_after_reload: bool, use_funding_
 	assert_eq!(nodes[1].node.list_channels()[0].confirmations, Some(10));
 
 	if !reorg_after_reload {
+		// With expect_channel_force_closed set the TestChainMonitor will enforce that the next update
+		// is a ChannelForceClosed on the right channel with should_broadcast set.
+		*nodes[0].chain_monitor.expect_channel_force_closed.lock().unwrap() = Some((chan.2, true));
 		if use_funding_unconfirmed {
 			let relevant_txids = nodes[0].node.get_relevant_txids();
 			assert_eq!(relevant_txids.len(), 1);
@@ -293,12 +296,17 @@ fn do_test_unconf_chan(reload_node: bool, reorg_after_reload: bool, use_funding_
 		let relevant_txids = nodes[0].node.get_relevant_txids();
 		assert_eq!(relevant_txids.len(), 0);
 
+		let txn = nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
+		assert_eq!(txn.len(), 1);
+
 		{
 			let per_peer_state = nodes[0].node.per_peer_state.read().unwrap();
 			let peer_state = per_peer_state.get(&nodes[1].node.get_our_node_id()).unwrap().lock().unwrap();
 			assert_eq!(peer_state.channel_by_id.len(), 0);
 			assert_eq!(nodes[0].node.short_to_chan_info.read().unwrap().len(), 0);
 		}
+
+		check_added_monitors!(nodes[0], 1);
 	}
 
 	if reload_node {
@@ -310,10 +318,13 @@ fn do_test_unconf_chan(reload_node: bool, reorg_after_reload: bool, use_funding_
 		let chan_0_monitor_serialized = get_monitor!(nodes[0], chan.2).encode();
 
 		reload_node!(nodes[0], *nodes[0].node.get_current_default_configuration(), &nodes_0_serialized, &[&chan_0_monitor_serialized], persister, new_chain_monitor, nodes_0_deserialized);
-		assert!(nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().is_empty());
 	}
 
 	if reorg_after_reload {
+		// With expect_channel_force_closed set the TestChainMonitor will enforce that the next update
+		// is a ChannelForceClosed on the right channel with should_broadcast set.
+		*nodes[0].chain_monitor.expect_channel_force_closed.lock().unwrap() = Some((chan.2, true));
+
 		if use_funding_unconfirmed {
 			let relevant_txids = nodes[0].node.get_relevant_txids();
 			assert_eq!(relevant_txids.len(), 1);
@@ -345,12 +356,18 @@ fn do_test_unconf_chan(reload_node: bool, reorg_after_reload: bool, use_funding_
 			assert_eq!(peer_state.channel_by_id.len(), 0);
 			assert_eq!(nodes[0].node.short_to_chan_info.read().unwrap().len(), 0);
 		}
+
+		if reload_node {
+			// The update may come when we free background events if we just restarted, or in-line if
+			// we were already running.
+			nodes[0].node.test_process_background_events();
+		}
+		check_added_monitors!(nodes[0], 1);
+
+		let txn = nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().split_off(0);
+		assert_eq!(txn.len(), 1);
 	}
-	// With expect_channel_force_closed set the TestChainMonitor will enforce that the next update
-	// is a ChannelForcClosed on the right channel with should_broadcast set.
-	*nodes[0].chain_monitor.expect_channel_force_closed.lock().unwrap() = Some((chan.2, true));
-	nodes[0].node.test_process_background_events(); // Required to free the pending background monitor update
-	check_added_monitors!(nodes[0], 1);
+
 	let expected_err = "Funding transaction was un-confirmed. Locked at 6 confs, now have 0 confs.";
 	if reorg_after_reload || !reload_node {
 		handle_announce_close_broadcast_events(&nodes, 0, 1, true, "Channel closed because of an exception: Funding transaction was un-confirmed. Locked at 6 confs, now have 0 confs.");
@@ -361,8 +378,6 @@ fn do_test_unconf_chan(reload_node: bool, reorg_after_reload: bool, use_funding_
 
 	check_closed_event!(nodes[0], 1, ClosureReason::ProcessingError { err: expected_err.to_owned() },
 		[nodes[1].node.get_our_node_id()], 100000);
-	assert_eq!(nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().len(), 1);
-	nodes[0].tx_broadcaster.txn_broadcasted.lock().unwrap().clear();
 
 	// Now check that we can create a new channel
 	if reload_node && nodes[0].node.per_peer_state.read().unwrap().len() == 0 {

From d1c340a0e1f988e0414aa5425f7c76e515ada6dd Mon Sep 17 00:00:00 2001
From: Matt Corallo <git@bluematt.me>
Date: Mon, 18 Nov 2024 19:46:33 +0000
Subject: [PATCH 5/8] Add additional variants to `handle_new_monitor_update!`

In the coming commits we'll start handling `ChannelMonitorUpdate`s
during channel closure in-line rather than after dropping locks via
`finish_close_channel`. In order to make that easy, here we add a
new `REMAIN_LOCKED_UPDATE_ACTIONS_PROCESSED_LATER` variant to
`handle_new_monitor_update!` which can attempt to apply an update
without dropping the locks and processing
`MonitorUpdateCompletionAction`s immediately.
---
 lightning/src/ln/channelmanager.rs | 55 +++++++++++++++++++++++-------
 1 file changed, 42 insertions(+), 13 deletions(-)

diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs
index c0713c5e1ae..72612f8928a 100644
--- a/lightning/src/ln/channelmanager.rs
+++ b/lightning/src/ln/channelmanager.rs
@@ -3245,18 +3245,17 @@ macro_rules! handle_monitor_update_completion {
 }
 
 macro_rules! handle_new_monitor_update {
-	($self: ident, $update_res: expr, $chan: expr, _internal, $completed: expr) => { {
+	($self: ident, $update_res: expr, $logger: expr, $channel_id: expr, _internal, $completed: expr) => { {
 		debug_assert!($self.background_events_processed_since_startup.load(Ordering::Acquire));
-		let logger = WithChannelContext::from(&$self.logger, &$chan.context, None);
 		match $update_res {
 			ChannelMonitorUpdateStatus::UnrecoverableError => {
 				let err_str = "ChannelMonitor[Update] persistence failed unrecoverably. This indicates we cannot continue normal operation and must shut down.";
-				log_error!(logger, "{}", err_str);
+				log_error!($logger, "{}", err_str);
 				panic!("{}", err_str);
 			},
 			ChannelMonitorUpdateStatus::InProgress => {
-				log_debug!(logger, "ChannelMonitor update for {} in flight, holding messages until the update completes.",
-					&$chan.context.channel_id());
+				log_debug!($logger, "ChannelMonitor update for {} in flight, holding messages until the update completes.",
+					$channel_id);
 				false
 			},
 			ChannelMonitorUpdateStatus::Completed => {
@@ -3266,22 +3265,52 @@ macro_rules! handle_new_monitor_update {
 		}
 	} };
 	($self: ident, $update_res: expr, $peer_state_lock: expr, $peer_state: expr, $per_peer_state_lock: expr, $chan: expr, INITIAL_MONITOR) => {
-		handle_new_monitor_update!($self, $update_res, $chan, _internal,
+		let logger = WithChannelContext::from(&$self.logger, &$chan.context, None);
+		handle_new_monitor_update!($self, $update_res, logger, $chan.context.channel_id(), _internal,
 			handle_monitor_update_completion!($self, $peer_state_lock, $peer_state, $per_peer_state_lock, $chan))
 	};
-	($self: ident, $funding_txo: expr, $update: expr, $peer_state_lock: expr, $peer_state: expr, $per_peer_state_lock: expr, $chan: expr) => { {
-		let in_flight_updates = $peer_state.in_flight_monitor_updates.entry($funding_txo)
+	(
+		$self: ident, $funding_txo: expr, $update: expr, $peer_state: expr, $logger: expr,
+		$chan_id: expr, $in_flight_updates: ident, $update_idx: ident, _internal_outer,
+		$completed: expr
+	) => { {
+		$in_flight_updates = $peer_state.in_flight_monitor_updates.entry($funding_txo)
 			.or_insert_with(Vec::new);
 		// During startup, we push monitor updates as background events through to here in
 		// order to replay updates that were in-flight when we shut down. Thus, we have to
 		// filter for uniqueness here.
-		let idx = in_flight_updates.iter().position(|upd| upd == &$update)
+		$update_idx = $in_flight_updates.iter().position(|upd| upd == &$update)
 			.unwrap_or_else(|| {
-				in_flight_updates.push($update);
-				in_flight_updates.len() - 1
+				$in_flight_updates.push($update);
+				$in_flight_updates.len() - 1
 			});
-		let update_res = $self.chain_monitor.update_channel($funding_txo, &in_flight_updates[idx]);
-		handle_new_monitor_update!($self, update_res, $chan, _internal,
+		let update_res = $self.chain_monitor.update_channel($funding_txo, &$in_flight_updates[$update_idx]);
+		handle_new_monitor_update!($self, update_res, $logger, $chan_id, _internal, $completed)
+	} };
+	(
+		$self: ident, $funding_txo: expr, $update: expr, $peer_state: expr, $chan_context: expr,
+		REMAIN_LOCKED_UPDATE_ACTIONS_PROCESSED_LATER
+	) => { {
+		let logger = WithChannelContext::from(&$self.logger, &$chan_context, None);
+		let chan_id = $chan_context.channel_id();
+		let in_flight_updates;
+		let idx;
+		handle_new_monitor_update!($self, $funding_txo, $update, $peer_state, logger, chan_id,
+			in_flight_updates, idx, _internal_outer,
+			{
+				let _ = in_flight_updates.remove(idx);
+			})
+	} };
+	(
+		$self: ident, $funding_txo: expr, $update: expr, $peer_state_lock: expr, $peer_state: expr,
+		$per_peer_state_lock: expr, $chan: expr
+	) => { {
+		let logger = WithChannelContext::from(&$self.logger, &$chan.context, None);
+		let chan_id = $chan.context.channel_id();
+		let in_flight_updates;
+		let idx;
+		handle_new_monitor_update!($self, $funding_txo, $update, $peer_state, logger, chan_id,
+			in_flight_updates, idx, _internal_outer,
 			{
 				let _ = in_flight_updates.remove(idx);
 				if in_flight_updates.is_empty() && $chan.blocked_monitor_updates_pending() == 0 {

From 70a375151ee9dac264099a85c326d1fc1becd834 Mon Sep 17 00:00:00 2001
From: Matt Corallo <git@bluematt.me>
Date: Thu, 5 Dec 2024 20:53:21 +0000
Subject: [PATCH 6/8] Add monitor update handling to
 `update_maps_on_chan_removal`

Closing channels requires a two step process - first
`update_maps_on_chan_removal` is called while holding the same
per-peer lock under which the channel reached the terminal state,
then after dropping the same lock(s), `finish_close_channel` is
called.

Because the channel is closed and thus no further
`ChannelMonitorUpdate`s are generated for the off-chain state, we'd
previously applied the `ChannelMonitorUpdate` in
`finish_close_channel`. This was tweaked somewhat in
c99d3d785dd78f594837d283636b82222c3b9ef1 when we stopped using
`u64::MAX` for any updates after closure. However, we worked around
the races that implied by setting the `update_id` only when we go
to apply the `ChannelMonitorUpdate`, rather than when we create it.

In a coming commit, we'll need to have an `update_id` immediately
upon creation (to track in-flight updates that haven't reached
application yet). This implies that we can no longer apply closure
`ChannelMonitorUpdate`s after dropping the per-peer lock(s), as the
updates must be well-ordered with any later updates to the same
channel, even after it has been closed.

Thus, here, we add `ChannelMonitorUpdate` handling to
`update_maps_on_chan_removal`, renaming it `locked_close_channel`
to better capture its new purpose.
---
 lightning/src/ln/channelmanager.rs | 229 +++++++++++++++++------------
 1 file changed, 138 insertions(+), 91 deletions(-)

diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs
index 72612f8928a..be97d96b627 100644
--- a/lightning/src/ln/channelmanager.rs
+++ b/lightning/src/ln/channelmanager.rs
@@ -1000,6 +1000,9 @@ enum BackgroundEvent {
 	/// `counterparty_node_id` is not available as the channel has closed from a [`ChannelMonitor`]
 	/// error the other variant is acceptable.
 	///
+	/// Any such events that exist in [`ChannelManager::pending_background_events`] will *also* be
+	/// tracked in [`PeerState::in_flight_monitor_updates`].
+	///
 	/// Note that any such events are lost on shutdown, so in general they must be updates which
 	/// are regenerated on startup.
 	MonitorUpdateRegeneratedOnStartup {
@@ -1290,6 +1293,13 @@ pub(super) struct PeerState<SP: Deref> where SP::Target: SignerProvider {
 	/// Note that the channel may no longer exist. For example if the channel was closed but we
 	/// later needed to claim an HTLC which is pending on-chain, we may generate a monitor update
 	/// for a missing channel.
+	///
+	/// Note that any pending [`BackgroundEvent::MonitorUpdateRegeneratedOnStartup`]s which are
+	/// sitting in [`ChannelManager::pending_background_events`] will *also* be tracked here. This
+	/// avoids a race condition during [`ChannelManager::pending_background_events`] processing
+	/// where we complete one [`ChannelMonitorUpdate`] (but there are more pending as background
+	/// events) but we conclude all pending [`ChannelMonitorUpdate`]s have completed and its safe
+	/// to run post-completion actions.
 	in_flight_monitor_updates: BTreeMap<OutPoint, Vec<ChannelMonitorUpdate>>,
 	/// Map from a specific channel to some action(s) that should be taken when all pending
 	/// [`ChannelMonitorUpdate`]s for the channel complete updating.
@@ -2942,8 +2952,34 @@ macro_rules! handle_error {
 ///
 /// Note that this step can be skipped if the channel was never opened (through the creation of a
 /// [`ChannelMonitor`]/channel funding transaction) to begin with.
-macro_rules! update_maps_on_chan_removal {
-	($self: expr, $peer_state: expr, $channel_context: expr) => {{
+macro_rules! locked_close_channel {
+	($self: ident, $peer_state: expr, $channel_context: expr, $shutdown_res_mut: expr) => {{
+		if let Some((counterparty_node_id, funding_txo, channel_id, update)) = $shutdown_res_mut.monitor_update.take() {
+			if $self.background_events_processed_since_startup.load(Ordering::Acquire) {
+				handle_new_monitor_update!($self, funding_txo, update, $peer_state,
+					$channel_context, REMAIN_LOCKED_UPDATE_ACTIONS_PROCESSED_LATER);
+			} else {
+				// We want to track the in-flight update both in `in_flight_monitor_updates` and in
+				// `pending_background_events` to avoid a race condition during
+				// `pending_background_events` processing where we complete one
+				// `ChannelMonitorUpdate` (but there are more pending as background events) but we
+				// conclude that all pending `ChannelMonitorUpdate`s have completed and its safe to
+				// run post-completion actions. We could work around that with some effort, but its
+				// simpler to just track updates twice.
+				let in_flight_updates = $peer_state.in_flight_monitor_updates.entry(funding_txo)
+					.or_insert_with(Vec::new);
+				if !in_flight_updates.contains(&update) {
+					in_flight_updates.push(update.clone());
+				}
+				let event = BackgroundEvent::MonitorUpdateRegeneratedOnStartup {
+					counterparty_node_id,
+					funding_txo,
+					channel_id,
+					update,
+				};
+				$self.pending_background_events.lock().unwrap().push(event);
+			}
+		}
 		// If there's a possibility that we need to generate further monitor updates for this
 		// channel, we need to store the last update_id of it. However, we don't want to insert
 		// into the map (which prevents the `PeerState` from being cleaned up) for channels that
@@ -3003,8 +3039,8 @@ macro_rules! convert_chan_phase_err {
 			ChannelError::Close((msg, reason)) => {
 				let logger = WithChannelContext::from(&$self.logger, &$channel.context, None);
 				log_error!(logger, "Closing channel {} due to close-required error: {}", $channel_id, msg);
-				update_maps_on_chan_removal!($self, $peer_state, $channel.context);
-				let shutdown_res = $channel.context.force_shutdown(true, reason);
+				let mut shutdown_res = $channel.context.force_shutdown(true, reason);
+				locked_close_channel!($self, $peer_state, &$channel.context, &mut shutdown_res);
 				let err =
 					MsgHandleErrInternal::from_finish_shutdown(msg, *$channel_id, shutdown_res, $channel_update);
 				(true, err)
@@ -3071,10 +3107,10 @@ macro_rules! try_chan_phase_entry {
 }
 
 macro_rules! remove_channel_phase {
-	($self: expr, $peer_state: expr, $entry: expr) => {
+	($self: ident, $peer_state: expr, $entry: expr, $shutdown_res_mut: expr) => {
 		{
 			let channel = $entry.remove_entry().1;
-			update_maps_on_chan_removal!($self, $peer_state, &channel.context());
+			locked_close_channel!($self, $peer_state, &channel.context(), $shutdown_res_mut);
 			channel
 		}
 	}
@@ -3793,8 +3829,10 @@ where
 								peer_state_lock, peer_state, per_peer_state, chan);
 						}
 					} else {
-						let mut chan_phase = remove_channel_phase!(self, peer_state, chan_phase_entry);
-						shutdown_result = Some(chan_phase.context_mut().force_shutdown(false, ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(false) }));
+						let mut shutdown_res = chan_phase_entry.get_mut().context_mut()
+							.force_shutdown(false, ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(false) });
+						remove_channel_phase!(self, peer_state, chan_phase_entry, shutdown_res);
+						shutdown_result = Some(shutdown_res);
 					}
 				},
 				hash_map::Entry::Vacant(_) => {
@@ -3915,8 +3953,8 @@ where
 					&& matches!(&monitor_update.updates[0], ChannelMonitorUpdateStep::ChannelForceClosed { .. });
 				// If the ChannelMonitorUpdate is closing a channel that never got past initial
 				// funding (to have any commitment updates), we'll skip inserting in
-				// `update_maps_on_chan_removal`, allowing us to avoid keeping around the PeerState
-				// for that peer. In that specific case we expect no entry in the map here. In any
+				// `locked_close_channel`, allowing us to avoid keeping around the PeerState for
+				// that peer. In that specific case we expect no entry in the map here. In any
 				// other cases, this is a bug, but in production we go ahead and recover by
 				// inserting the update_id and hoping its right.
 				debug_assert!(is_closing_unupdated_monitor, "Expected closing monitor against an unused channel, got {:?}", monitor_update);
@@ -3940,7 +3978,7 @@ where
 	}
 
 	/// When a channel is removed, two things need to happen:
-	/// (a) [`update_maps_on_chan_removal`] must be called in the same `per_peer_state` lock as
+	/// (a) [`locked_close_channel`] must be called in the same `per_peer_state` lock as
 	///     the channel-closing action,
 	/// (b) this needs to be called without holding any locks (except
 	///     [`ChannelManager::total_consistency_lock`].
@@ -3964,12 +4002,31 @@ where
 			self.fail_htlc_backwards_internal(&source, &payment_hash, &reason, receiver);
 		}
 		if let Some((_, funding_txo, _channel_id, monitor_update)) = shutdown_res.monitor_update {
-			// There isn't anything we can do if we get an update failure - we're already
-			// force-closing. The monitor update on the required in-memory copy should broadcast
-			// the latest local state, which is the best we can do anyway. Thus, it is safe to
-			// ignore the result here.
+			debug_assert!(false, "This should have been handled in `locked_close_channel`");
 			let _ = self.apply_post_close_monitor_update(shutdown_res.counterparty_node_id, shutdown_res.channel_id, funding_txo, monitor_update);
 		}
+		if self.background_events_processed_since_startup.load(Ordering::Acquire) {
+			// If a `ChannelMonitorUpdate` was applied (i.e. any time we have a funding txo and are
+			// not in the startup sequence) check if we need to handle any
+			// `MonitorUpdateCompletionAction`s.
+			// TODO: If we do the `in_flight_monitor_updates.is_empty()` check in
+			// `locked_close_channel` we can skip the locks here.
+			if let Some(funding_txo) = shutdown_res.channel_funding_txo {
+				let per_peer_state = self.per_peer_state.read().unwrap();
+				if let Some(peer_state_mtx) = per_peer_state.get(&shutdown_res.counterparty_node_id) {
+					let mut peer_state = peer_state_mtx.lock().unwrap();
+					if peer_state.in_flight_monitor_updates.get(&funding_txo).map(|l| l.is_empty()).unwrap_or(true) {
+						let update_actions = peer_state.monitor_update_blocked_actions
+							.remove(&shutdown_res.channel_id).unwrap_or(Vec::new());
+
+						mem::drop(peer_state);
+						mem::drop(per_peer_state);
+
+						self.handle_monitor_update_completion_actions(update_actions);
+					}
+				}
+			}
+		}
 		let mut shutdown_results = Vec::new();
 		if let Some(txid) = shutdown_res.unbroadcasted_batch_funding_txid {
 			let mut funding_batch_states = self.funding_batch_states.lock().unwrap();
@@ -3980,8 +4037,9 @@ where
 				if let Some(peer_state_mutex) = per_peer_state.get(&counterparty_node_id) {
 					let mut peer_state = peer_state_mutex.lock().unwrap();
 					if let Some(mut chan) = peer_state.channel_by_id.remove(&channel_id) {
-						update_maps_on_chan_removal!(self, peer_state, &chan.context());
-						shutdown_results.push(chan.context_mut().force_shutdown(false, ClosureReason::FundingBatchClosure));
+						let mut close_res = chan.context_mut().force_shutdown(false, ClosureReason::FundingBatchClosure);
+						locked_close_channel!(self, &mut *peer_state, chan.context(), close_res);
+						shutdown_results.push(close_res);
 					}
 				}
 				has_uncompleted_channel = Some(has_uncompleted_channel.map_or(!state, |v| v || !state));
@@ -4038,23 +4096,26 @@ where
 				ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(broadcast) }
 			};
 			let logger = WithContext::from(&self.logger, Some(*peer_node_id), Some(*channel_id), None);
-			if let hash_map::Entry::Occupied(chan_phase_entry) = peer_state.channel_by_id.entry(channel_id.clone()) {
+			if let hash_map::Entry::Occupied(mut chan_phase_entry) = peer_state.channel_by_id.entry(channel_id.clone()) {
 				log_error!(logger, "Force-closing channel {}", channel_id);
-				let mut chan_phase = remove_channel_phase!(self, peer_state, chan_phase_entry);
-				mem::drop(peer_state);
-				mem::drop(per_peer_state);
-				match chan_phase {
-					ChannelPhase::Funded(mut chan) => {
-						self.finish_close_channel(chan.context.force_shutdown(broadcast, closure_reason));
-						(self.get_channel_update_for_broadcast(&chan).ok(), chan.context.get_counterparty_node_id())
+				let (mut shutdown_res, update_opt) = match chan_phase_entry.get_mut() {
+					ChannelPhase::Funded(ref mut chan) => {
+						(
+							chan.context.force_shutdown(broadcast, closure_reason),
+							self.get_channel_update_for_broadcast(&chan).ok(),
+						)
 					},
 					ChannelPhase::UnfundedOutboundV1(_) | ChannelPhase::UnfundedInboundV1(_) |
 					ChannelPhase::UnfundedOutboundV2(_) | ChannelPhase::UnfundedInboundV2(_) => {
-						self.finish_close_channel(chan_phase.context_mut().force_shutdown(false, closure_reason));
 						// Unfunded channel has no update
-						(None, chan_phase.context().get_counterparty_node_id())
+						(chan_phase_entry.get_mut().context_mut().force_shutdown(false, closure_reason), None)
 					},
-				}
+				};
+				let chan_phase = remove_channel_phase!(self, peer_state, chan_phase_entry, shutdown_res);
+				mem::drop(peer_state);
+				mem::drop(per_peer_state);
+				self.finish_close_channel(shutdown_res);
+				(update_opt, chan_phase.context().get_counterparty_node_id())
 			} else if peer_state.inbound_channel_request_by_id.remove(channel_id).is_some() {
 				log_error!(logger, "Force-closing channel {}", &channel_id);
 				// N.B. that we don't send any channel close event here: we
@@ -5318,9 +5379,10 @@ where
 						.map(|peer_state_mutex| peer_state_mutex.lock().unwrap())
 						.and_then(|mut peer_state| peer_state.channel_by_id.remove(&channel_id).map(|chan| (chan, peer_state)))
 						.map(|(mut chan, mut peer_state)| {
-							update_maps_on_chan_removal!(self, peer_state, &chan.context());
 							let closure_reason = ClosureReason::ProcessingError { err: e.clone() };
-							shutdown_results.push(chan.context_mut().force_shutdown(false, closure_reason));
+							let mut close_res = chan.context_mut().force_shutdown(false, closure_reason);
+							locked_close_channel!(self, peer_state, chan.context(), close_res);
+							shutdown_results.push(close_res);
 							peer_state.pending_msg_events.push(events::MessageSendEvent::HandleError {
 								node_id: counterparty_node_id,
 								action: msgs::ErrorAction::SendErrorMessage {
@@ -6420,8 +6482,9 @@ where
 						log_error!(logger,
 							"Force-closing pending channel with ID {} for not establishing in a timely manner",
 							context.channel_id());
-						update_maps_on_chan_removal!(self, $peer_state, context);
-						shutdown_channels.push(context.force_shutdown(false, ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(false) }));
+						let mut close_res = context.force_shutdown(false, ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(false) });
+						locked_close_channel!(self, $peer_state, context, close_res);
+						shutdown_channels.push(close_res);
 						$pending_msg_events.push(MessageSendEvent::HandleError {
 							node_id: context.get_counterparty_node_id(),
 							action: msgs::ErrorAction::SendErrorMessage {
@@ -8105,9 +8168,8 @@ where
 			// Note that at this point we've filled in the funding outpoint on our
 			// channel, but its actually in conflict with another channel. Thus, if
 			// we call `convert_chan_phase_err` immediately (thus calling
-			// `update_maps_on_chan_removal`), we'll remove the existing channel
-			// from `outpoint_to_peer`. Thus, we must first unset the funding outpoint
-			// on the channel.
+			// `locked_close_channel`), we'll remove the existing channel from `outpoint_to_peer`.
+			// Thus, we must first unset the funding outpoint on the channel.
 			let err = ChannelError::close($err.to_owned());
 			chan.unset_funding_info(msg.temporary_channel_id);
 			return Err(convert_chan_phase_err!(self, peer_state, err, chan, &funded_channel_id, UNFUNDED_CHANNEL).1);
@@ -8597,9 +8659,12 @@ where
 					},
 					ChannelPhase::UnfundedInboundV1(_) | ChannelPhase::UnfundedOutboundV1(_) |
 					ChannelPhase::UnfundedInboundV2(_) | ChannelPhase::UnfundedOutboundV2(_) => {
-						log_error!(self.logger, "Immediately closing unfunded channel {} as peer asked to cooperatively shut it down (which is unnecessary)", &msg.channel_id);
-						let mut chan = remove_channel_phase!(self, peer_state, chan_phase_entry);
-						finish_shutdown = Some(chan.context_mut().force_shutdown(false, ClosureReason::CounterpartyCoopClosedUnfundedChannel));
+						let context = phase.context_mut();
+						let logger = WithChannelContext::from(&self.logger, context, None);
+						log_error!(logger, "Immediately closing unfunded channel {} as peer asked to cooperatively shut it down (which is unnecessary)", &msg.channel_id);
+						let mut close_res = phase.context_mut().force_shutdown(false, ClosureReason::CounterpartyCoopClosedUnfundedChannel);
+						remove_channel_phase!(self, peer_state, chan_phase_entry, close_res);
+						finish_shutdown = Some(close_res);
 					},
 				}
 			} else {
@@ -8640,14 +8705,19 @@ where
 								msg,
 							});
 						}
-						if tx.is_some() {
+						if let Some(mut close_res) = shutdown_result {
 							// We're done with this channel, we've got a signed closing transaction and
 							// will send the closing_signed back to the remote peer upon return. This
 							// also implies there are no pending HTLCs left on the channel, so we can
 							// fully delete it from tracking (the channel monitor is still around to
 							// watch for old state broadcasts)!
-							(tx, Some(remove_channel_phase!(self, peer_state, chan_phase_entry)), shutdown_result)
-						} else { (tx, None, shutdown_result) }
+							debug_assert!(tx.is_some());
+							let channel_phase = remove_channel_phase!(self, peer_state, chan_phase_entry, close_res);
+							(tx, Some(channel_phase), Some(close_res))
+						} else {
+							debug_assert!(tx.is_none());
+							(tx, None, None)
+						}
 					} else {
 						return try_chan_phase_entry!(self, peer_state, Err(ChannelError::close(
 							"Got a closing_signed message for an unfunded channel!".into())), chan_phase_entry);
@@ -9370,14 +9440,16 @@ where
 								let mut peer_state_lock = peer_state_mutex.lock().unwrap();
 								let peer_state = &mut *peer_state_lock;
 								let pending_msg_events = &mut peer_state.pending_msg_events;
-								if let hash_map::Entry::Occupied(chan_phase_entry) = peer_state.channel_by_id.entry(channel_id) {
-									if let ChannelPhase::Funded(mut chan) = remove_channel_phase!(self, peer_state, chan_phase_entry) {
-										let reason = if let MonitorEvent::HolderForceClosedWithInfo { reason, .. } = monitor_event {
-											reason
-										} else {
-											ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true) }
-										};
-										failed_channels.push(chan.context.force_shutdown(false, reason.clone()));
+								if let hash_map::Entry::Occupied(mut chan_phase_entry) = peer_state.channel_by_id.entry(channel_id) {
+									let reason = if let MonitorEvent::HolderForceClosedWithInfo { reason, .. } = monitor_event {
+										reason
+									} else {
+										ClosureReason::HolderForceClosed { broadcasted_latest_txn: Some(true) }
+									};
+									let mut shutdown_res = chan_phase_entry.get_mut().context_mut().force_shutdown(false, reason.clone());
+									let chan_phase = remove_channel_phase!(self, peer_state, chan_phase_entry, shutdown_res);
+									failed_channels.push(shutdown_res);
+									if let ChannelPhase::Funded(chan) = chan_phase {
 										if let Ok(update) = self.get_channel_update_for_broadcast(&chan) {
 											let mut pending_broadcast_messages = self.pending_broadcast_messages.lock().unwrap();
 											pending_broadcast_messages.push(events::MessageSendEvent::BroadcastChannelUpdate {
@@ -9387,7 +9459,10 @@ where
 										pending_msg_events.push(events::MessageSendEvent::HandleError {
 											node_id: chan.context.get_counterparty_node_id(),
 											action: msgs::ErrorAction::DisconnectPeer {
-												msg: Some(msgs::ErrorMessage { channel_id: chan.context.channel_id(), data: reason.to_string() })
+												msg: Some(msgs::ErrorMessage {
+													channel_id: chan.context.channel_id(),
+													data: reason.to_string()
+												})
 											},
 										});
 									}
@@ -9563,11 +9638,11 @@ where
 					Some((_, channel_id)) if chan.context().channel_id() != channel_id => None,
 					_ => unblock_chan(chan, &mut peer_state.pending_msg_events),
 				};
-				if let Some(shutdown_result) = shutdown_result {
+				if let Some(mut shutdown_result) = shutdown_result {
 					let context = &chan.context();
 					let logger = WithChannelContext::from(&self.logger, context, None);
 					log_trace!(logger, "Removing channel {} now that the signer is unblocked", context.channel_id());
-					update_maps_on_chan_removal!(self, peer_state, context);
+					locked_close_channel!(self, peer_state, context, shutdown_result);
 					shutdown_results.push(shutdown_result);
 					false
 				} else {
@@ -9608,7 +9683,8 @@ where
 										});
 									}
 									debug_assert_eq!(shutdown_result_opt.is_some(), chan.is_shutdown());
-									if let Some(shutdown_result) = shutdown_result_opt {
+									if let Some(mut shutdown_result) = shutdown_result_opt {
+										locked_close_channel!(self, peer_state, &chan.context, shutdown_result);
 										shutdown_results.push(shutdown_result);
 									}
 									if let Some(tx) = tx_opt {
@@ -9623,7 +9699,6 @@ where
 
 										log_info!(logger, "Broadcasting {}", log_tx!(tx));
 										self.tx_broadcaster.broadcast_transactions(&[&tx]);
-										update_maps_on_chan_removal!(self, peer_state, &chan.context);
 										false
 									} else { true }
 								},
@@ -9652,38 +9727,6 @@ where
 		has_update
 	}
 
-	/// Handle a list of channel failures during a block_connected or block_disconnected call,
-	/// pushing the channel monitor update (if any) to the background events queue (if we're
-	/// currently in the startup phase) and calling [`Self::finish_close_channel`].
-	fn handle_init_event_channel_failures(&self, mut failed_channels: Vec<ShutdownResult>) {
-		for mut failure in failed_channels.drain(..) {
-			// Either a commitment transactions has been confirmed on-chain or
-			// Channel::block_disconnected detected that the funding transaction has been
-			// reorganized out of the main chain.
-			// We cannot broadcast our latest local state via monitor update (as
-			// Channel::force_shutdown tries to make us do) as we may still be in initialization,
-			// so we track the update internally and handle it when the user next calls
-			// timer_tick_occurred, guaranteeing we're running normally.
-			if let Some((counterparty_node_id, funding_txo, channel_id, update)) = failure.monitor_update.take() {
-				assert_eq!(update.updates.len(), 1);
-				if let ChannelMonitorUpdateStep::ChannelForceClosed { should_broadcast } = update.updates[0] {
-					assert!(should_broadcast);
-				} else { unreachable!(); }
-				if self.background_events_processed_since_startup.load(Ordering::Acquire) {
-					let res = self.chain_monitor.update_channel(funding_txo, &update);
-					debug_assert_eq!(res, ChannelMonitorUpdateStatus::Completed,
-						"TODO: We don't currently handle failures here, this logic is removed in the next commit");
-				} else {
-					self.pending_background_events.lock().unwrap().push(
-						BackgroundEvent::MonitorUpdateRegeneratedOnStartup {
-							counterparty_node_id, funding_txo, update, channel_id,
-						});
-				}
-			}
-			self.finish_close_channel(failure);
-		}
-	}
-
 	/// Utility for creating a BOLT11 invoice that can be verified by [`ChannelManager`] without
 	/// storing any additional state. It achieves this by including a [`PaymentSecret`] in the
 	/// invoice which it uses to verify that the invoice has not expired and the payment amount is
@@ -11095,11 +11138,12 @@ where
 									}
 								}
 							} else if let Err(reason) = res {
-								update_maps_on_chan_removal!(self, peer_state, &channel.context);
 								// It looks like our counterparty went on-chain or funding transaction was
 								// reorged out of the main chain. Close the channel.
 								let reason_message = format!("{}", reason);
-								failed_channels.push(channel.context.force_shutdown(true, reason));
+								let mut close_res = channel.context.force_shutdown(true, reason);
+								locked_close_channel!(self, peer_state, &channel.context, close_res);
+								failed_channels.push(close_res);
 								if let Ok(update) = self.get_channel_update_for_broadcast(&channel) {
 									let mut pending_broadcast_messages = self.pending_broadcast_messages.lock().unwrap();
 									pending_broadcast_messages.push(events::MessageSendEvent::BroadcastChannelUpdate {
@@ -11175,7 +11219,9 @@ where
 			});
 		}
 
-		self.handle_init_event_channel_failures(failed_channels);
+		for failure in failed_channels {
+			self.finish_close_channel(failure);
+		}
 
 		for (source, payment_hash, reason, destination) in timed_out_htlcs.drain(..) {
 			self.fail_htlc_backwards_internal(&source, &payment_hash, &reason, destination);
@@ -11534,8 +11580,9 @@ where
 						},
 					};
 					// Clean up for removal.
-					update_maps_on_chan_removal!(self, peer_state, &context);
-					failed_channels.push(context.force_shutdown(false, ClosureReason::DisconnectedPeer));
+					let mut close_res = context.force_shutdown(false, ClosureReason::DisconnectedPeer);
+					locked_close_channel!(self, peer_state, &context, close_res);
+					failed_channels.push(close_res);
 					false
 				});
 				// Note that we don't bother generating any events for pre-accept channels -

From 4766e99e6f332f1c7fb51589333968156922ac17 Mon Sep 17 00:00:00 2001
From: Matt Corallo <git@bluematt.me>
Date: Sat, 16 Nov 2024 14:03:08 +0000
Subject: [PATCH 7/8] Properly enforce that all `ChannelMonitorUpdate`s are
 ordered

c99d3d785dd78f594837d283636b82222c3b9ef1 updated
`ChannelMonitorUpdate::update_id` to continue counting up even
after the channel is closed. It, however, accidentally updated the
`ChannelMonitorUpdate` application logic to skip testing that
`ChannelMonitorUpdate`s are well-ordered after the channel has been
closed (in an attempt to ensure other checks in the same
conditional block were applied).

This fixes that oversight.
---
 lightning/src/chain/channelmonitor.rs |  7 ++-
 lightning/src/ln/monitor_tests.rs     | 72 ++++++++++++++++++++++++++-
 lightning/src/sync/nostd_sync.rs      |  3 ++
 3 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/lightning/src/chain/channelmonitor.rs b/lightning/src/chain/channelmonitor.rs
index 95d0b5f8a7b..9f12ff3dba3 100644
--- a/lightning/src/chain/channelmonitor.rs
+++ b/lightning/src/chain/channelmonitor.rs
@@ -3150,8 +3150,11 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitorImpl<Signer> {
 					panic!("Attempted to apply post-force-close ChannelMonitorUpdate that wasn't providing a payment preimage");
 				},
 			}
-		} else if self.latest_update_id + 1 != updates.update_id {
-			panic!("Attempted to apply ChannelMonitorUpdates out of order, check the update_id before passing an update to update_monitor!");
+		}
+		if updates.update_id != LEGACY_CLOSED_CHANNEL_UPDATE_ID {
+			if self.latest_update_id + 1 != updates.update_id {
+				panic!("Attempted to apply ChannelMonitorUpdates out of order, check the update_id before passing an update to update_monitor!");
+			}
 		}
 		let mut ret = Ok(());
 		let bounded_fee_estimator = LowerBoundedFeeEstimator::new(&**fee_estimator);
diff --git a/lightning/src/ln/monitor_tests.rs b/lightning/src/ln/monitor_tests.rs
index 99ad31d3583..358b1e8ff7f 100644
--- a/lightning/src/ln/monitor_tests.rs
+++ b/lightning/src/ln/monitor_tests.rs
@@ -10,7 +10,7 @@
 //! Further functional tests which test blockchain reorganizations.
 
 use crate::sign::{ecdsa::EcdsaChannelSigner, OutputSpender, SpendableOutputDescriptor};
-use crate::chain::channelmonitor::{ANTI_REORG_DELAY, LATENCY_GRACE_PERIOD_BLOCKS, Balance, BalanceSource};
+use crate::chain::channelmonitor::{ANTI_REORG_DELAY, LATENCY_GRACE_PERIOD_BLOCKS, Balance, BalanceSource, ChannelMonitorUpdateStep};
 use crate::chain::transaction::OutPoint;
 use crate::chain::chaininterface::{ConfirmationTarget, LowerBoundedFeeEstimator, compute_feerate_sat_per_1000_weight};
 use crate::events::bump_transaction::{BumpTransactionEvent, WalletSource};
@@ -3175,3 +3175,73 @@ fn test_event_replay_causing_monitor_replay() {
 	// Expect the `PaymentSent` to get replayed, this time without the duplicate monitor update
 	expect_payment_sent(&nodes[0], payment_preimage, None, false, false /* expected post-event monitor update*/);
 }
+
+#[test]
+fn test_update_replay_panics() {
+	// Tests that replaying a `ChannelMonitorUpdate` or applying them out-of-order causes a panic.
+	let chanmon_cfgs = create_chanmon_cfgs(2);
+	let node_cfgs = create_node_cfgs(2, &chanmon_cfgs);
+	let node_chanmgrs = create_node_chanmgrs(2, &node_cfgs, &[None, None]);
+	let nodes = create_network(2, &node_cfgs, &node_chanmgrs);
+
+	let chan = create_announced_chan_between_nodes(&nodes, 0, 1);
+	let monitor = get_monitor!(nodes[1], chan.2).clone();
+
+	// Create some updates to apply
+	let (payment_preimage_1, payment_hash_1, ..) = route_payment(&nodes[0], &[&nodes[1]], 1_000_000);
+	let (payment_preimage_2, payment_hash_2, ..) = route_payment(&nodes[0], &[&nodes[1]], 1_000_000);
+	nodes[1].node.force_close_broadcasting_latest_txn(&chan.2, &nodes[0].node.get_our_node_id(), "".to_owned()).unwrap();
+	let reason = ClosureReason::HolderForceClosed  { broadcasted_latest_txn: Some(true) };
+	check_closed_event(&nodes[1], 1, reason, false, &[nodes[0].node.get_our_node_id()], 100_000);
+	check_closed_broadcast(&nodes[1], 1, true);
+	check_added_monitors(&nodes[1], 1);
+
+	nodes[1].node.claim_funds(payment_preimage_1);
+	check_added_monitors(&nodes[1], 1);
+	expect_payment_claimed!(nodes[1], payment_hash_1, 1_000_000);
+
+	nodes[1].node.claim_funds(payment_preimage_2);
+	check_added_monitors(&nodes[1], 1);
+	expect_payment_claimed!(nodes[1], payment_hash_2, 1_000_000);
+
+	let mut updates = nodes[1].chain_monitor.monitor_updates.lock().unwrap().get_mut(&chan.2).unwrap().split_off(0);
+
+	// Update `monitor` until there's just one normal updates, an FC update, and a post-FC claim
+	// update pending
+	for update in updates.drain(..updates.len() - 4) {
+		monitor.update_monitor(&update, &nodes[1].tx_broadcaster, &nodes[1].fee_estimator, &nodes[1].logger).unwrap();
+	}
+	assert_eq!(updates.len(), 4);
+	assert!(matches!(updates[1].updates[0], ChannelMonitorUpdateStep::ChannelForceClosed { .. }));
+	assert!(matches!(updates[2].updates[0], ChannelMonitorUpdateStep::PaymentPreimage { .. }));
+	assert!(matches!(updates[3].updates[0], ChannelMonitorUpdateStep::PaymentPreimage { .. }));
+
+	// Ensure applying the force-close update skipping the last normal update fails
+	let poisoned_monitor = monitor.clone();
+	std::panic::catch_unwind(|| {
+		let _ = poisoned_monitor.update_monitor(&updates[1], &nodes[1].tx_broadcaster, &nodes[1].fee_estimator, &nodes[1].logger);
+		// We should panic, rather than returning an error here.
+	}).unwrap_err();
+
+	// Then apply the last normal and force-close update and make sure applying the preimage
+	// updates out-of-order fails.
+	monitor.update_monitor(&updates[0], &nodes[1].tx_broadcaster, &nodes[1].fee_estimator, &nodes[1].logger).unwrap();
+	monitor.update_monitor(&updates[1], &nodes[1].tx_broadcaster, &nodes[1].fee_estimator, &nodes[1].logger).unwrap();
+
+	let poisoned_monitor = monitor.clone();
+	std::panic::catch_unwind(|| {
+		let _ = poisoned_monitor.update_monitor(&updates[3], &nodes[1].tx_broadcaster, &nodes[1].fee_estimator, &nodes[1].logger);
+		// We should panic, rather than returning an error here.
+	}).unwrap_err();
+
+	// Make sure re-applying the force-close update fails
+	let poisoned_monitor = monitor.clone();
+	std::panic::catch_unwind(|| {
+		let _ = poisoned_monitor.update_monitor(&updates[1], &nodes[1].tx_broadcaster, &nodes[1].fee_estimator, &nodes[1].logger);
+		// We should panic, rather than returning an error here.
+	}).unwrap_err();
+
+	// ...and finally ensure that applying all the updates succeeds.
+	monitor.update_monitor(&updates[2], &nodes[1].tx_broadcaster, &nodes[1].fee_estimator, &nodes[1].logger).unwrap();
+	monitor.update_monitor(&updates[3], &nodes[1].tx_broadcaster, &nodes[1].fee_estimator, &nodes[1].logger).unwrap();
+}
diff --git a/lightning/src/sync/nostd_sync.rs b/lightning/src/sync/nostd_sync.rs
index b3963da762e..19faa1b5e57 100644
--- a/lightning/src/sync/nostd_sync.rs
+++ b/lightning/src/sync/nostd_sync.rs
@@ -9,6 +9,9 @@ pub struct Mutex<T: ?Sized> {
 	inner: RefCell<T>,
 }
 
+#[cfg(test)]
+impl<T: ?Sized> core::panic::RefUnwindSafe for Mutex<T> {}
+
 #[must_use = "if unused the Mutex will immediately unlock"]
 pub struct MutexGuard<'a, T: ?Sized + 'a> {
 	lock: RefMut<'a, T>,

From b50354dc4808baabeb523f622ed6cd2302688c98 Mon Sep 17 00:00:00 2001
From: Matt Corallo <git@bluematt.me>
Date: Wed, 13 Nov 2024 01:23:03 +0000
Subject: [PATCH 8/8] Check in-flight updates before completing events on
 closed chans

When we handle a `ChannelMonitorUpdate` completion we always
complete everything that was waiting on any updates to the same
channel all at once. Thus, we need to skip all updates if there's
pending updates besides the one that was just completed.

We handled this correctly for open channels, but the shortcut for
closed channels ignored any other pending updates entirely.

Here we fix this, which is ultimately required for tests which are
added in a few commits to pass.
---
 lightning/src/ln/channelmanager.rs | 40 +++++++++++++++++-------------
 1 file changed, 23 insertions(+), 17 deletions(-)

diff --git a/lightning/src/ln/channelmanager.rs b/lightning/src/ln/channelmanager.rs
index be97d96b627..9ca99056e6e 100644
--- a/lightning/src/ln/channelmanager.rs
+++ b/lightning/src/ln/channelmanager.rs
@@ -7675,30 +7675,36 @@ where
 		if peer_state_mutex_opt.is_none() { return }
 		peer_state_lock = peer_state_mutex_opt.unwrap().lock().unwrap();
 		let peer_state = &mut *peer_state_lock;
-		let channel =
-			if let Some(ChannelPhase::Funded(chan)) = peer_state.channel_by_id.get_mut(channel_id) {
-				chan
-			} else {
-				let update_actions = peer_state.monitor_update_blocked_actions
-					.remove(channel_id).unwrap_or(Vec::new());
-				mem::drop(peer_state_lock);
-				mem::drop(per_peer_state);
-				self.handle_monitor_update_completion_actions(update_actions);
-				return;
-			};
+
 		let remaining_in_flight =
 			if let Some(pending) = peer_state.in_flight_monitor_updates.get_mut(funding_txo) {
 				pending.retain(|upd| upd.update_id > highest_applied_update_id);
 				pending.len()
 			} else { 0 };
-		let logger = WithChannelContext::from(&self.logger, &channel.context, None);
-		log_trace!(logger, "ChannelMonitor updated to {}. Current highest is {}. {} pending in-flight updates.",
-			highest_applied_update_id, channel.context.get_latest_monitor_update_id(),
-			remaining_in_flight);
-		if !channel.is_awaiting_monitor_update() || remaining_in_flight != 0 {
+
+		let logger = WithContext::from(&self.logger, Some(counterparty_node_id), Some(*channel_id), None);
+		log_trace!(logger, "ChannelMonitor updated to {}. {} pending in-flight updates.",
+			highest_applied_update_id, remaining_in_flight);
+
+		if remaining_in_flight != 0 {
 			return;
 		}
-		handle_monitor_update_completion!(self, peer_state_lock, peer_state, per_peer_state, channel);
+
+		if let Some(ChannelPhase::Funded(chan)) = peer_state.channel_by_id.get_mut(channel_id) {
+			if chan.is_awaiting_monitor_update() {
+				log_trace!(logger, "Channel is open and awaiting update, resuming it");
+				handle_monitor_update_completion!(self, peer_state_lock, peer_state, per_peer_state, chan);
+			} else {
+				log_trace!(logger, "Channel is open but not awaiting update");
+			}
+		} else {
+			let update_actions = peer_state.monitor_update_blocked_actions
+				.remove(channel_id).unwrap_or(Vec::new());
+			log_trace!(logger, "Channel is closed, applying {} post-update actions", update_actions.len());
+			mem::drop(peer_state_lock);
+			mem::drop(per_peer_state);
+			self.handle_monitor_update_completion_actions(update_actions);
+		}
 	}
 
 	/// Accepts a request to open a channel after a [`Event::OpenChannelRequest`].