Skip to content

Commit

Permalink
collator-protocol: Handle unknown validator heads (#5538)
Browse files Browse the repository at this point in the history
There is a race condition when a validator sends its heads to the
collator, but the collator doesn't yet know these heads. Before it is
aware of these heads by importing the block(s), any collation registered
on the collator is not announced to the validators. The collations
aren't advertised, because the collator doesn't know yet that these
heads of the validator are descendants of the collations relay parent.

The solution is to store these unknown heads of the validators and to
handle them when the collator updates its own view.
  • Loading branch information
bkchr committed Sep 2, 2024
1 parent da65410 commit f58e2b8
Show file tree
Hide file tree
Showing 8 changed files with 297 additions and 171 deletions.
5 changes: 3 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -1178,7 +1178,7 @@ sc-transaction-pool-api = { path = "substrate/client/transaction-pool/api", defa
sc-utils = { path = "substrate/client/utils", default-features = false }
scale-info = { version = "2.11.1", default-features = false }
schemars = { version = "0.8.13", default-features = false }
schnellru = { version = "0.2.1" }
schnellru = { version = "0.2.3" }
schnorrkel = { version = "0.11.4", default-features = false }
seccompiler = { version = "0.4.0" }
secp256k1 = { version = "0.28.0", default-features = false }
Expand Down
1 change: 1 addition & 0 deletions polkadot/node/network/collator-protocol/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ bitvec = { features = ["alloc"], workspace = true }
futures = { workspace = true }
futures-timer = { workspace = true }
gum = { workspace = true, default-features = true }
schnellru.workspace = true

sp-core = { workspace = true, default-features = true }
sp-runtime = { workspace = true, default-features = true }
Expand Down
78 changes: 57 additions & 21 deletions polkadot/node/network/collator-protocol/src/collator_side/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ use bitvec::{bitvec, vec::BitVec};
use futures::{
channel::oneshot, future::Fuse, pin_mut, select, stream::FuturesUnordered, FutureExt, StreamExt,
};
use schnellru::{ByLength, LruMap};
use sp_core::Pair;

use polkadot_node_network_protocol::{
Expand All @@ -42,7 +43,7 @@ use polkadot_node_subsystem::{
CollatorProtocolMessage, NetworkBridgeEvent, NetworkBridgeTxMessage, ParentHeadData,
RuntimeApiMessage,
},
overseer, CollatorProtocolSenderTrait, FromOrchestra, OverseerSignal, PerLeafSpan,
overseer, FromOrchestra, OverseerSignal, PerLeafSpan,
};
use polkadot_node_subsystem_util::{
backing_implicit_view::View as ImplicitView,
Expand Down Expand Up @@ -200,6 +201,11 @@ struct PeerData {
view: View,
/// Network protocol version.
version: CollationVersion,
/// Unknown heads in the view.
///
/// This can happen when the validator is faster at importing a block and sending out its
/// `View` than the collator is able to import a block.
unknown_heads: LruMap<Hash, (), ByLength>,
}

/// A type wrapping a collation and it's designated core index.
Expand Down Expand Up @@ -1198,9 +1204,10 @@ async fn handle_peer_view_change<Context>(
peer_id: PeerId,
view: View,
) {
let PeerData { view: current, version } = match state.peer_data.get_mut(&peer_id) {
Some(peer_data) => peer_data,
None => return,
let Some(PeerData { view: current, version, unknown_heads }) =
state.peer_data.get_mut(&peer_id)
else {
return
};

let added: Vec<Hash> = view.difference(&*current).cloned().collect();
Expand Down Expand Up @@ -1228,15 +1235,18 @@ async fn handle_peer_view_change<Context>(
new_leaf = ?added,
"New leaf in peer's view is unknown",
);

unknown_heads.insert(added, ());

continue
},
};

for block_hash in block_hashes {
let per_relay_parent = match state.per_relay_parent.get_mut(block_hash) {
Some(per_relay_parent) => per_relay_parent,
None => continue,
let Some(per_relay_parent) = state.per_relay_parent.get_mut(block_hash) else {
continue
};

advertise_collation(
ctx,
*block_hash,
Expand Down Expand Up @@ -1282,10 +1292,13 @@ async fn handle_network_msg<Context>(
return Ok(())
},
};
state
.peer_data
.entry(peer_id)
.or_insert_with(|| PeerData { view: View::default(), version });
state.peer_data.entry(peer_id).or_insert_with(|| PeerData {
view: View::default(),
version,
// Unlikely that the collator is falling 10 blocks behind and if so, it probably is
// not able to keep up any way.
unknown_heads: LruMap::new(ByLength::new(10)),
});

if let Some(authority_ids) = maybe_authority {
gum::trace!(
Expand All @@ -1310,7 +1323,7 @@ async fn handle_network_msg<Context>(
},
OurViewChange(view) => {
gum::trace!(target: LOG_TARGET, ?view, "Own view change");
handle_our_view_change(ctx.sender(), state, view).await?;
handle_our_view_change(ctx, state, view).await?;
},
PeerMessage(remote, msg) => {
handle_incoming_peer_message(ctx, runtime, state, remote, msg).await?;
Expand All @@ -1332,21 +1345,19 @@ async fn handle_network_msg<Context>(
}

/// Handles our view changes.
async fn handle_our_view_change<Sender>(
sender: &mut Sender,
#[overseer::contextbounds(CollatorProtocol, prefix = crate::overseer)]
async fn handle_our_view_change<Context>(
ctx: &mut Context,
state: &mut State,
view: OurView,
) -> Result<()>
where
Sender: CollatorProtocolSenderTrait,
{
) -> Result<()> {
let current_leaves = state.active_leaves.clone();

let removed = current_leaves.iter().filter(|(h, _)| !view.contains(h));
let added = view.iter().filter(|h| !current_leaves.contains_key(h));

for leaf in added {
let mode = prospective_parachains_mode(sender, *leaf).await?;
let mode = prospective_parachains_mode(ctx.sender(), *leaf).await?;

if let Some(span) = view.span_per_head().get(leaf).cloned() {
let per_leaf_span = PerLeafSpan::new(span, "collator-side");
Expand All @@ -1359,19 +1370,44 @@ where
if mode.is_enabled() {
if let Some(ref mut implicit_view) = state.implicit_view {
implicit_view
.activate_leaf(sender, *leaf)
.activate_leaf(ctx.sender(), *leaf)
.await
.map_err(Error::ImplicitViewFetchError)?;

let allowed_ancestry = implicit_view
.known_allowed_relay_parents_under(leaf, state.collating_on)
.unwrap_or_default();

// Get the peers that already reported us this head, but we didn't knew it at this
// point.
let peers = state
.peer_data
.iter_mut()
.filter_map(|(id, data)| {
data.unknown_heads.remove(leaf).map(|_| (id, data.version))
})
.collect::<Vec<_>>();

for block_hash in allowed_ancestry {
state
let per_relay_parent = state
.per_relay_parent
.entry(*block_hash)
.or_insert_with(|| PerRelayParent::new(mode));

// Announce relevant collations to these peers.
for (peer_id, peer_version) in &peers {
advertise_collation(
ctx,
*block_hash,
per_relay_parent,
&peer_id,
*peer_version,
&state.peer_ids,
&mut state.advertisement_timeouts,
&state.metrics,
)
.await;
}
}
}
}
Expand Down
Loading

0 comments on commit f58e2b8

Please sign in to comment.