Skip to content

Commit

Permalink
Make event handling fallible
Browse files Browse the repository at this point in the history
Previously, we would require our users to handle all events
successfully inline or panic will trying to do so. If they would exit
the `EventHandler` any other way we'd forget about the event and
wouldn't replay them after restart.

Here, we implement fallible event handling, allowing the user to return
`Err(())` which signals to our event providers they should abort event
processing and replay any unhandled events later (i.e., in the next
invocation).
  • Loading branch information
tnull committed May 27, 2024
1 parent 1d421d3 commit 34d7a9b
Show file tree
Hide file tree
Showing 8 changed files with 238 additions and 140 deletions.
78 changes: 45 additions & 33 deletions lightning-background-processor/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -552,7 +552,7 @@ use core::task;
/// # }
/// # struct EventHandler {}
/// # impl EventHandler {
/// # async fn handle_event(&self, _: lightning::events::Event) {}
/// # async fn handle_event(&self, _: lightning::events::Event) -> Result<(), ()> { Ok(()) }
/// # }
/// # #[derive(Eq, PartialEq, Clone, Hash)]
/// # struct SocketDescriptor {}
Expand Down Expand Up @@ -646,7 +646,7 @@ pub async fn process_events_async<
G: 'static + Deref<Target = NetworkGraph<L>> + Send + Sync,
L: 'static + Deref + Send + Sync,
P: 'static + Deref + Send + Sync,
EventHandlerFuture: core::future::Future<Output = ()>,
EventHandlerFuture: core::future::Future<Output = Result<(),()>>,
EventHandler: Fn(Event) -> EventHandlerFuture,
PS: 'static + Deref + Send,
M: 'static + Deref<Target = ChainMonitor<<CM::Target as AChannelManager>::Signer, CF, T, F, L, P>> + Send + Sync,
Expand Down Expand Up @@ -692,12 +692,13 @@ where
if update_scorer(scorer, &event, duration_since_epoch) {
log_trace!(logger, "Persisting scorer after update");
if let Err(e) = persister.persist_scorer(&scorer) {
log_error!(logger, "Error: Failed to persist scorer, check your disk and permissions {}", e)
log_error!(logger, "Error: Failed to persist scorer, check your disk and permissions {}", e);
return Err(());
}
}
}
}
event_handler(event).await;
event_handler(event).await
}
};
define_run_body!(
Expand Down Expand Up @@ -731,7 +732,7 @@ where

#[cfg(feature = "futures")]
async fn process_onion_message_handler_events_async<
EventHandlerFuture: core::future::Future<Output = ()>,
EventHandlerFuture: core::future::Future<Output = Result<(), ()>>,
EventHandler: Fn(Event) -> EventHandlerFuture,
PM: 'static + Deref + Send + Sync,
>(
Expand All @@ -741,10 +742,11 @@ where
PM::Target: APeerManager + Send + Sync,
{
let events = core::cell::RefCell::new(Vec::new());
peer_manager.onion_message_handler().process_pending_events(&|e| events.borrow_mut().push(e));
peer_manager.onion_message_handler().process_pending_events(&|e| Ok(events.borrow_mut().push(e)));

for event in events.into_inner() {
handler(event).await
// Ignore any errors as onion messages are best effort anyways.
let _ = handler(event).await;
}
}

Expand Down Expand Up @@ -846,7 +848,7 @@ impl BackgroundProcessor {
}
}
}
event_handler.handle_event(event);
event_handler.handle_event(event)
};
define_run_body!(
persister, chain_monitor, chain_monitor.process_pending_events(&event_handler),
Expand Down Expand Up @@ -1424,7 +1426,7 @@ mod tests {
// Initiate the background processors to watch each node.
let data_dir = nodes[0].kv_store.get_data_dir();
let persister = Arc::new(Persister::new(data_dir));
let event_handler = |_: _| {};
let event_handler = |_: _| { Ok(()) };
let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].p2p_gossip_sync(), nodes[0].peer_manager.clone(), nodes[0].logger.clone(), Some(nodes[0].scorer.clone()));

macro_rules! check_persisted_data {
Expand Down Expand Up @@ -1491,7 +1493,7 @@ mod tests {
let (_, nodes) = create_nodes(1, "test_timer_tick_called");
let data_dir = nodes[0].kv_store.get_data_dir();
let persister = Arc::new(Persister::new(data_dir));
let event_handler = |_: _| {};
let event_handler = |_: _| { Ok(()) };
let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].no_gossip_sync(), nodes[0].peer_manager.clone(), nodes[0].logger.clone(), Some(nodes[0].scorer.clone()));
loop {
let log_entries = nodes[0].logger.lines.lock().unwrap();
Expand Down Expand Up @@ -1520,7 +1522,7 @@ mod tests {

let data_dir = nodes[0].kv_store.get_data_dir();
let persister = Arc::new(Persister::new(data_dir).with_manager_error(std::io::ErrorKind::Other, "test"));
let event_handler = |_: _| {};
let event_handler = |_: _| { Ok(()) };
let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].no_gossip_sync(), nodes[0].peer_manager.clone(), nodes[0].logger.clone(), Some(nodes[0].scorer.clone()));
match bg_processor.join() {
Ok(_) => panic!("Expected error persisting manager"),
Expand All @@ -1542,7 +1544,7 @@ mod tests {
let persister = Arc::new(Persister::new(data_dir).with_manager_error(std::io::ErrorKind::Other, "test"));

let bp_future = super::process_events_async(
persister, |_: _| {async {}}, nodes[0].chain_monitor.clone(), nodes[0].node.clone(),
persister, |_: _| {async { Ok(()) }}, nodes[0].chain_monitor.clone(), nodes[0].node.clone(),
nodes[0].rapid_gossip_sync(), nodes[0].peer_manager.clone(), nodes[0].logger.clone(),
Some(nodes[0].scorer.clone()), move |dur: Duration| {
Box::pin(async move {
Expand All @@ -1566,7 +1568,7 @@ mod tests {
let (_, nodes) = create_nodes(2, "test_persist_network_graph_error");
let data_dir = nodes[0].kv_store.get_data_dir();
let persister = Arc::new(Persister::new(data_dir).with_graph_error(std::io::ErrorKind::Other, "test"));
let event_handler = |_: _| {};
let event_handler = |_: _| { Ok(()) };
let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].p2p_gossip_sync(), nodes[0].peer_manager.clone(), nodes[0].logger.clone(), Some(nodes[0].scorer.clone()));

match bg_processor.stop() {
Expand All @@ -1584,7 +1586,7 @@ mod tests {
let (_, nodes) = create_nodes(2, "test_persist_scorer_error");
let data_dir = nodes[0].kv_store.get_data_dir();
let persister = Arc::new(Persister::new(data_dir).with_scorer_error(std::io::ErrorKind::Other, "test"));
let event_handler = |_: _| {};
let event_handler = |_: _| { Ok(()) };
let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].no_gossip_sync(), nodes[0].peer_manager.clone(), nodes[0].logger.clone(), Some(nodes[0].scorer.clone()));

match bg_processor.stop() {
Expand All @@ -1606,11 +1608,14 @@ mod tests {
// Set up a background event handler for FundingGenerationReady events.
let (funding_generation_send, funding_generation_recv) = std::sync::mpsc::sync_channel(1);
let (channel_pending_send, channel_pending_recv) = std::sync::mpsc::sync_channel(1);
let event_handler = move |event: Event| match event {
Event::FundingGenerationReady { .. } => funding_generation_send.send(handle_funding_generation_ready!(event, channel_value)).unwrap(),
Event::ChannelPending { .. } => channel_pending_send.send(()).unwrap(),
Event::ChannelReady { .. } => {},
_ => panic!("Unexpected event: {:?}", event),
let event_handler = move |event: Event| {
match event {
Event::FundingGenerationReady { .. } => funding_generation_send.send(handle_funding_generation_ready!(event, channel_value)).unwrap(),
Event::ChannelPending { .. } => channel_pending_send.send(()).unwrap(),
Event::ChannelReady { .. } => {},
_ => panic!("Unexpected event: {:?}", event),
}
Ok(())
};

let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].no_gossip_sync(), nodes[0].peer_manager.clone(), nodes[0].logger.clone(), Some(nodes[0].scorer.clone()));
Expand Down Expand Up @@ -1646,11 +1651,14 @@ mod tests {

// Set up a background event handler for SpendableOutputs events.
let (sender, receiver) = std::sync::mpsc::sync_channel(1);
let event_handler = move |event: Event| match event {
Event::SpendableOutputs { .. } => sender.send(event).unwrap(),
Event::ChannelReady { .. } => {},
Event::ChannelClosed { .. } => {},
_ => panic!("Unexpected event: {:?}", event),
let event_handler = move |event: Event| {
match event {
Event::SpendableOutputs { .. } => sender.send(event).unwrap(),
Event::ChannelReady { .. } => {},
Event::ChannelClosed { .. } => {},
_ => panic!("Unexpected event: {:?}", event),
}
Ok(())
};
let persister = Arc::new(Persister::new(data_dir));
let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].no_gossip_sync(), nodes[0].peer_manager.clone(), nodes[0].logger.clone(), Some(nodes[0].scorer.clone()));
Expand Down Expand Up @@ -1763,7 +1771,7 @@ mod tests {
let (_, nodes) = create_nodes(2, "test_scorer_persistence");
let data_dir = nodes[0].kv_store.get_data_dir();
let persister = Arc::new(Persister::new(data_dir));
let event_handler = |_: _| {};
let event_handler = |_: _| { Ok(()) };
let bg_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].no_gossip_sync(), nodes[0].peer_manager.clone(), nodes[0].logger.clone(), Some(nodes[0].scorer.clone()));

loop {
Expand Down Expand Up @@ -1836,7 +1844,7 @@ mod tests {
let data_dir = nodes[0].kv_store.get_data_dir();
let persister = Arc::new(Persister::new(data_dir).with_graph_persistence_notifier(sender));

let event_handler = |_: _| {};
let event_handler = |_: _| { Ok(()) };
let background_processor = BackgroundProcessor::start(persister, event_handler, nodes[0].chain_monitor.clone(), nodes[0].node.clone(), nodes[0].rapid_gossip_sync(), nodes[0].peer_manager.clone(), nodes[0].logger.clone(), Some(nodes[0].scorer.clone()));

do_test_not_pruning_network_graph_until_graph_sync_completion!(nodes,
Expand All @@ -1857,7 +1865,7 @@ mod tests {

let (exit_sender, exit_receiver) = tokio::sync::watch::channel(());
let bp_future = super::process_events_async(
persister, |_: _| {async {}}, nodes[0].chain_monitor.clone(), nodes[0].node.clone(),
persister, |_: _| {async { Ok(()) }}, nodes[0].chain_monitor.clone(), nodes[0].node.clone(),
nodes[0].rapid_gossip_sync(), nodes[0].peer_manager.clone(), nodes[0].logger.clone(),
Some(nodes[0].scorer.clone()), move |dur: Duration| {
let mut exit_receiver = exit_receiver.clone();
Expand Down Expand Up @@ -1984,12 +1992,15 @@ mod tests {
#[test]
fn test_payment_path_scoring() {
let (sender, receiver) = std::sync::mpsc::sync_channel(1);
let event_handler = move |event: Event| match event {
Event::PaymentPathFailed { .. } => sender.send(event).unwrap(),
Event::PaymentPathSuccessful { .. } => sender.send(event).unwrap(),
Event::ProbeSuccessful { .. } => sender.send(event).unwrap(),
Event::ProbeFailed { .. } => sender.send(event).unwrap(),
_ => panic!("Unexpected event: {:?}", event),
let event_handler = move |event: Event| {
match event {
Event::PaymentPathFailed { .. } => sender.send(event).unwrap(),
Event::PaymentPathSuccessful { .. } => sender.send(event).unwrap(),
Event::ProbeSuccessful { .. } => sender.send(event).unwrap(),
Event::ProbeFailed { .. } => sender.send(event).unwrap(),
_ => panic!("Unexpected event: {:?}", event),
}
Ok(())
};

let (_, nodes) = create_nodes(1, "test_payment_path_scoring");
Expand Down Expand Up @@ -2022,6 +2033,7 @@ mod tests {
Event::ProbeFailed { .. } => { sender_ref.send(event).await.unwrap() },
_ => panic!("Unexpected event: {:?}", event),
}
Ok(())
}
};

Expand Down
1 change: 1 addition & 0 deletions lightning-invoice/src/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1390,6 +1390,7 @@ mod test {
} else {
other_events.borrow_mut().push(event);
}
Ok(())
};
nodes[fwd_idx].node.process_pending_events(&forward_event_handler);
nodes[fwd_idx].node.process_pending_events(&forward_event_handler);
Expand Down
4 changes: 2 additions & 2 deletions lightning/src/chain/chainmonitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -516,7 +516,7 @@ where C::Target: chain::Filter,
pub fn get_and_clear_pending_events(&self) -> Vec<events::Event> {
use crate::events::EventsProvider;
let events = core::cell::RefCell::new(Vec::new());
let event_handler = |event: events::Event| events.borrow_mut().push(event);
let event_handler = |event: events::Event| Ok(events.borrow_mut().push(event));
self.process_pending_events(&event_handler);
events.into_inner()
}
Expand All @@ -527,7 +527,7 @@ where C::Target: chain::Filter,
/// See the trait-level documentation of [`EventsProvider`] for requirements.
///
/// [`EventsProvider`]: crate::events::EventsProvider
pub async fn process_pending_events_async<Future: core::future::Future, H: Fn(Event) -> Future>(
pub async fn process_pending_events_async<Future: core::future::Future<Output = Result<(),()>>, H: Fn(Event) -> Future>(
&self, handler: H
) {
// Sadly we can't hold the monitors read lock through an async call. Thus we have to do a
Expand Down
31 changes: 24 additions & 7 deletions lightning/src/chain/channelmonitor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1169,19 +1169,36 @@ macro_rules! _process_events_body {
pending_events = inner.pending_events.clone();
repeated_events = inner.get_repeated_events();
} else { break; }
let num_events = pending_events.len();

for event in pending_events.into_iter().chain(repeated_events.into_iter()) {
let mut num_handled_events = 0;
let mut handling_failed = false;
for event in pending_events.into_iter() {
$event_to_handle = event;
$handle_event;
match $handle_event {
Ok(()) => num_handled_events += 1,
Err(()) => {
// If we encounter an error we stop handling events and make sure to replay
// any unhandled events on the next invocation.
handling_failed = true;
break;
}
}
}

for event in repeated_events.into_iter() {
// For repeated events we ignore any errors as they will be replayed eventually
// anyways.
$event_to_handle = event;
$handle_event.ok();
}

if let Some(us) = $self_opt {
let mut inner = us.inner.lock().unwrap();
inner.pending_events.drain(..num_events);
inner.pending_events.drain(..num_handled_events);
inner.is_processing_pending_events = false;
if !inner.pending_events.is_empty() {
// If there's more events to process, go ahead and do so.
if !handling_failed && !inner.pending_events.is_empty() {
// If there's more events to process and we didn't fail so far, go ahead and do
// so.
continue;
}
}
Expand Down Expand Up @@ -1507,7 +1524,7 @@ impl<Signer: EcdsaChannelSigner> ChannelMonitor<Signer> {
/// Processes any events asynchronously.
///
/// See [`Self::process_pending_events`] for more information.
pub async fn process_pending_events_async<Future: core::future::Future, H: Fn(Event) -> Future>(
pub async fn process_pending_events_async<Future: core::future::Future<Output = Result<(), ()>>, H: Fn(Event) -> Future>(
&self, handler: &H
) {
let mut ev;
Expand Down
16 changes: 9 additions & 7 deletions lightning/src/events/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2193,8 +2193,10 @@ pub trait MessageSendEventsProvider {
///
/// In order to ensure no [`Event`]s are lost, implementors of this trait will persist [`Event`]s
/// and replay any unhandled events on startup. An [`Event`] is considered handled when
/// [`process_pending_events`] returns, thus handlers MUST fully handle [`Event`]s and persist any
/// relevant changes to disk *before* returning.
/// [`process_pending_events`] returns `Ok(())`, thus handlers MUST fully handle [`Event`]s and
/// persist any relevant changes to disk *before* returning `Ok(())`. In case of a (e.g.,
/// persistence failure) implementors should return `Err(())`, signalling to the [`EventsProvider`]
/// to replay unhandled events on the next invocation.
///
/// Further, because an application may crash between an [`Event`] being handled and the
/// implementor of this trait being re-serialized, [`Event`] handling must be idempotent - in
Expand Down Expand Up @@ -2225,22 +2227,22 @@ pub trait EventsProvider {
///
/// An async variation also exists for implementations of [`EventsProvider`] that support async
/// event handling. The async event handler should satisfy the generic bounds: `F:
/// core::future::Future, H: Fn(Event) -> F`.
/// core::future::Future<Output = Result<(), ()>>, H: Fn(Event) -> F`.
pub trait EventHandler {
/// Handles the given [`Event`].
///
/// See [`EventsProvider`] for details that must be considered when implementing this method.
fn handle_event(&self, event: Event);
fn handle_event(&self, event: Event) -> Result<(), ()>;
}

impl<F> EventHandler for F where F: Fn(Event) {
fn handle_event(&self, event: Event) {
impl<F> EventHandler for F where F: Fn(Event) -> Result<(), ()> {
fn handle_event(&self, event: Event) -> Result<(), ()> {
self(event)
}
}

impl<T: EventHandler> EventHandler for Arc<T> {
fn handle_event(&self, event: Event) {
fn handle_event(&self, event: Event) -> Result<(), ()> {
self.deref().handle_event(event)
}
}
Loading

0 comments on commit 34d7a9b

Please sign in to comment.