From 8f5d2599599e95a1cc8cde828ec63619d0a2cd25 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Tue, 19 Mar 2024 14:15:16 +0200 Subject: [PATCH 01/28] DistributeCollation includes CoreIndex Signed-off-by: Andrei Sandu --- polkadot/node/subsystem-types/src/messages.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/polkadot/node/subsystem-types/src/messages.rs b/polkadot/node/subsystem-types/src/messages.rs index 23773f7e3252e..41dab9e026c65 100644 --- a/polkadot/node/subsystem-types/src/messages.rs +++ b/polkadot/node/subsystem-types/src/messages.rs @@ -45,7 +45,7 @@ use polkadot_primitives::{ async_backing, slashing, vstaging::{ApprovalVotingParams, NodeFeatures}, AuthorityDiscoveryId, BackedCandidate, BlockNumber, CandidateEvent, CandidateHash, - CandidateIndex, CandidateReceipt, CollatorId, CommittedCandidateReceipt, CoreState, + CandidateIndex, CandidateReceipt, CollatorId, CommittedCandidateReceipt, CoreIndex, CoreState, DisputeState, ExecutorParams, GroupIndex, GroupRotationInfo, Hash, HeadData, Header as BlockHeader, Id as ParaId, InboundDownwardMessage, InboundHrmpMessage, MultiDisputeStatementSet, OccupiedCoreAssumption, PersistedValidationData, PvfCheckStatement, @@ -221,6 +221,8 @@ pub enum CollatorProtocolMessage { /// The result sender should be informed when at least one parachain validator seconded the /// collation. It is also completely okay to just drop the sender. result_sender: Option>, + /// The core index where the candidate should be backed. + core_index: CoreIndex, }, /// Report a collator as having provided an invalid collation. This should lead to disconnect /// and blacklist of the collator. From dc84796a8b75341c5a5463b15bee207a0d3a2e06 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Tue, 19 Mar 2024 14:15:50 +0200 Subject: [PATCH 02/28] SubmitCollationParams includes CoreIndex Signed-off-by: Andrei Sandu --- polkadot/node/primitives/src/lib.rs | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/polkadot/node/primitives/src/lib.rs b/polkadot/node/primitives/src/lib.rs index d295c21cce1dc..d96a61e069ad1 100644 --- a/polkadot/node/primitives/src/lib.rs +++ b/polkadot/node/primitives/src/lib.rs @@ -31,8 +31,8 @@ use serde::{de, Deserialize, Deserializer, Serialize, Serializer}; use polkadot_primitives::{ BlakeTwo256, BlockNumber, CandidateCommitments, CandidateHash, CollatorPair, - CommittedCandidateReceipt, CompactStatement, EncodeAs, Hash, HashT, HeadData, Id as ParaId, - PersistedValidationData, SessionIndex, Signed, UncheckedSigned, ValidationCode, + CommittedCandidateReceipt, CompactStatement, CoreIndex, EncodeAs, Hash, HashT, HeadData, + Id as ParaId, PersistedValidationData, SessionIndex, Signed, UncheckedSigned, ValidationCode, ValidationCodeHash, ValidatorIndex, MAX_CODE_SIZE, MAX_POV_SIZE, }; pub use sp_consensus_babe::{ @@ -524,6 +524,8 @@ pub struct SubmitCollationParams { /// okay to just drop it. However, if it is called, it should be called with the signed /// statement of a parachain validator seconding the collation. pub result_sender: Option>, + /// The core index on which the resulting candidate should be backed + pub core_index: CoreIndex, } /// This is the data we keep available for each candidate included in the relay chain. From 665ef06903ed12b3b60a06ab727194aa0c168718 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Tue, 19 Mar 2024 14:16:18 +0200 Subject: [PATCH 03/28] collator-protocol updates Signed-off-by: Andrei Sandu --- .../node/network/collator-protocol/src/collator_side/mod.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/polkadot/node/network/collator-protocol/src/collator_side/mod.rs b/polkadot/node/network/collator-protocol/src/collator_side/mod.rs index 9f306f288a162..4c140635d7450 100644 --- a/polkadot/node/network/collator-protocol/src/collator_side/mod.rs +++ b/polkadot/node/network/collator-protocol/src/collator_side/mod.rs @@ -350,6 +350,7 @@ async fn distribute_collation( pov: PoV, parent_head_data: HeadData, result_sender: Option>, + core_index: CoreIndex, ) -> Result<()> { let candidate_relay_parent = receipt.descriptor.relay_parent; let candidate_hash = receipt.hash(); @@ -790,6 +791,7 @@ async fn process_msg( pov, parent_head_data, result_sender, + core_index, } => { let _span1 = state .span_per_relay_parent @@ -820,6 +822,7 @@ async fn process_msg( pov, parent_head_data, result_sender, + core_index, ) .await?; }, From 78ee8b2f68ea235e056318f6154292815fa91502 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Tue, 19 Mar 2024 14:16:37 +0200 Subject: [PATCH 04/28] collation-generation pull support for chained collations Signed-off-by: Andrei Sandu --- polkadot/node/collation-generation/src/lib.rs | 75 +++++++++++++++---- 1 file changed, 60 insertions(+), 15 deletions(-) diff --git a/polkadot/node/collation-generation/src/lib.rs b/polkadot/node/collation-generation/src/lib.rs index a89351628a08f..590f99eb7e1d0 100644 --- a/polkadot/node/collation-generation/src/lib.rs +++ b/polkadot/node/collation-generation/src/lib.rs @@ -48,8 +48,8 @@ use polkadot_node_subsystem_util::{ }; use polkadot_primitives::{ collator_signature_payload, CandidateCommitments, CandidateDescriptor, CandidateReceipt, - CollatorPair, CoreState, Hash, Id as ParaId, OccupiedCoreAssumption, PersistedValidationData, - ValidationCodeHash, + CollatorPair, CoreIndex, CoreState, Hash, Id as ParaId, OccupiedCoreAssumption, + PersistedValidationData, ValidationCodeHash, }; use sp_core::crypto::Pair; use std::sync::Arc; @@ -224,6 +224,13 @@ async fn handle_new_activations( let n_validators = validators??.len(); let async_backing_params = async_backing_params?.ok(); + // The loop bellow will fill in cores that the para is assigned to. + let mut cores_to_build_on = Vec::new(); + // The latest on chain PVD. + let mut pvd = None; + // The validation code hash. + let mut validation_code_hash = None; + for (core_idx, core) in availability_cores.into_iter().enumerate() { let _availability_core_timer = metrics.time_new_activations_availability_core(); @@ -304,7 +311,7 @@ async fn handle_new_activations( }, }; - let validation_code_hash = match obtain_validation_code_hash_with_assumption( + let code_hash = match obtain_validation_code_hash_with_assumption( relay_parent, scheduled_core.para_id, assumption, @@ -326,12 +333,39 @@ async fn handle_new_activations( }, }; - let task_config = config.clone(); - let metrics = metrics.clone(); - let mut task_sender = ctx.sender().clone(); - ctx.spawn( - "collation-builder", - Box::pin(async move { + // Prepare data for building collation(s) outside the loop. + cores_to_build_on.push(CoreIndex(core_idx as u32)); + + // We only need the validation data only for the first collation we build, + // we will construct the other ones based on the outputs of prev collation. + // This should be fine as we always enact the chain candidates of the para at once + // in the runtime. + if pvd.is_none() { + pvd = Some(validation_data); + validation_code_hash = Some(code_hash); + } + } + + let task_config = config.clone(); + let metrics = metrics.clone(); + let mut task_sender = ctx.sender().clone(); + + let (mut validation_data, validation_code_hash) = if !cores_to_build_on.is_empty() { + ( + pvd.expect("If cores is not empty, `pvd` is always Some; qed"), + validation_code_hash + .expect("If cores is not empty, `validation_code_hash` is always Some; qed"), + ) + } else { + // Bail out if no cores to build on. + return Ok(()) + }; + + let para_id = config.para_id; + ctx.spawn( + "chained-collation-builder", + Box::pin(async move { + for core_index in cores_to_build_on { let collator_fn = match task_config.collator.as_ref() { Some(x) => x, None => return, @@ -343,21 +377,23 @@ async fn handle_new_activations( None => { gum::debug!( target: LOG_TARGET, - para_id = %scheduled_core.para_id, + ?para_id, "collator returned no collation on collate", ); return }, }; + let parent_head = collation.head_data.clone(); construct_and_distribute_receipt( PreparedCollation { collation, - para_id: scheduled_core.para_id, + para_id, relay_parent, - validation_data, + validation_data: validation_data.clone(), validation_code_hash, n_validators, + core_index, }, task_config.key.clone(), &mut task_sender, @@ -365,9 +401,13 @@ async fn handle_new_activations( &metrics, ) .await; - }), - )?; - } + + // Chain the collations. All else stays the same as we build the chained + // collation on same relay parent. + validation_data.parent_head = parent_head; + } + }), + )?; } Ok(()) @@ -388,6 +428,7 @@ async fn handle_submit_collation( parent_head, validation_code_hash, result_sender, + core_index, } = params; let validators = request_validators(relay_parent, ctx.sender()).await.await??; @@ -424,6 +465,7 @@ async fn handle_submit_collation( validation_data, validation_code_hash, n_validators, + core_index, }; construct_and_distribute_receipt( @@ -445,6 +487,7 @@ struct PreparedCollation { validation_data: PersistedValidationData, validation_code_hash: ValidationCodeHash, n_validators: usize, + core_index: CoreIndex, } /// Takes a prepared collation, along with its context, and produces a candidate receipt @@ -463,6 +506,7 @@ async fn construct_and_distribute_receipt( validation_data, validation_code_hash, n_validators, + core_index, } = collation; let persisted_validation_data_hash = validation_data.hash(); @@ -558,6 +602,7 @@ async fn construct_and_distribute_receipt( pov, parent_head_data, result_sender, + core_index, }) .await; } From a61d64ac8ee6615ca75bbaa7af5da07e7327d102 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Wed, 20 Mar 2024 10:49:17 +0200 Subject: [PATCH 05/28] adjust validator buffer capacity for elastic scaling Signed-off-by: Andrei Sandu --- .../src/collator_side/validators_buffer.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/polkadot/node/network/collator-protocol/src/collator_side/validators_buffer.rs b/polkadot/node/network/collator-protocol/src/collator_side/validators_buffer.rs index 1533f2eda5a57..f1a59fc24b063 100644 --- a/polkadot/node/network/collator-protocol/src/collator_side/validators_buffer.rs +++ b/polkadot/node/network/collator-protocol/src/collator_side/validators_buffer.rs @@ -45,11 +45,18 @@ use futures::FutureExt; use polkadot_node_network_protocol::PeerId; use polkadot_primitives::{AuthorityDiscoveryId, CandidateHash, GroupIndex, SessionIndex}; +/// Elastic scaling: how many candidates per relay chain block the collator supports building. +pub const MAX_CHAINED_CANDIDATES_PER_RCB: NonZeroUsize = match NonZeroUsize::new(3) { + Some(cap) => cap, + None => panic!("max candidates per rcb cannot be zero"), +}; + /// The ring buffer stores at most this many unique validator groups. /// /// This value should be chosen in way that all groups assigned to our para -/// in the view can fit into the buffer. -pub const VALIDATORS_BUFFER_CAPACITY: NonZeroUsize = match NonZeroUsize::new(3) { +/// in the view can fit into the buffer multiplied by amount of candidates we support per relay chain block +/// in the case of elastic scaling. +pub const VALIDATORS_BUFFER_CAPACITY: NonZeroUsize = match NonZeroUsize::new(3 * MAX_CHAINED_CANDIDATES_PER_RCB.get()) { Some(cap) => cap, None => panic!("buffer capacity must be non-zero"), }; From 1cbb090d5f53fcc56d0cd2e9962f458f2286d625 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Wed, 20 Mar 2024 11:22:18 +0200 Subject: [PATCH 06/28] fmt Signed-off-by: Andrei Sandu --- .../src/collator_side/validators_buffer.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/polkadot/node/network/collator-protocol/src/collator_side/validators_buffer.rs b/polkadot/node/network/collator-protocol/src/collator_side/validators_buffer.rs index f1a59fc24b063..fbb3ff4328a51 100644 --- a/polkadot/node/network/collator-protocol/src/collator_side/validators_buffer.rs +++ b/polkadot/node/network/collator-protocol/src/collator_side/validators_buffer.rs @@ -54,12 +54,13 @@ pub const MAX_CHAINED_CANDIDATES_PER_RCB: NonZeroUsize = match NonZeroUsize::new /// The ring buffer stores at most this many unique validator groups. /// /// This value should be chosen in way that all groups assigned to our para -/// in the view can fit into the buffer multiplied by amount of candidates we support per relay chain block -/// in the case of elastic scaling. -pub const VALIDATORS_BUFFER_CAPACITY: NonZeroUsize = match NonZeroUsize::new(3 * MAX_CHAINED_CANDIDATES_PER_RCB.get()) { - Some(cap) => cap, - None => panic!("buffer capacity must be non-zero"), -}; +/// in the view can fit into the buffer multiplied by amount of candidates we support per relay +/// chain block in the case of elastic scaling. +pub const VALIDATORS_BUFFER_CAPACITY: NonZeroUsize = + match NonZeroUsize::new(3 * MAX_CHAINED_CANDIDATES_PER_RCB.get()) { + Some(cap) => cap, + None => panic!("buffer capacity must be non-zero"), + }; /// Unique identifier of a validators group. #[derive(Debug)] From 493a7a2062525b6ddc70ff98121e82d29372ceea Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Wed, 20 Mar 2024 11:22:37 +0200 Subject: [PATCH 07/28] Per core index tracking of multiple collations Signed-off-by: Andrei Sandu --- .../src/collator_side/mod.rs | 120 +++++++++++++----- 1 file changed, 86 insertions(+), 34 deletions(-) diff --git a/polkadot/node/network/collator-protocol/src/collator_side/mod.rs b/polkadot/node/network/collator-protocol/src/collator_side/mod.rs index 4c140635d7450..06cee3ec05e15 100644 --- a/polkadot/node/network/collator-protocol/src/collator_side/mod.rs +++ b/polkadot/node/network/collator-protocol/src/collator_side/mod.rs @@ -203,20 +203,40 @@ struct PeerData { version: CollationVersion, } +/// A type wrapping a collation and it's designated core index. +struct CollationWithCoreIndex(Collation, CoreIndex); + +impl CollationWithCoreIndex { + /// Returns inner collation ref. + pub fn collation(&self) -> &Collation { + &self.0 + } + + /// Returns inner collation mut ref. + pub fn collation_mut(&mut self) -> &mut Collation { + &mut self.0 + } + + /// Returns inner core index. + pub fn core_index(&self) -> &CoreIndex { + &self.1 + } +} + struct PerRelayParent { prospective_parachains_mode: ProspectiveParachainsMode, - /// Validators group responsible for backing candidates built + /// Per core index validators group responsible for backing candidates built /// on top of this relay parent. - validator_group: ValidatorGroup, + validator_group: HashMap, /// Distributed collations. - collations: HashMap, + collations: HashMap, } impl PerRelayParent { fn new(mode: ProspectiveParachainsMode) -> Self { Self { prospective_parachains_mode: mode, - validator_group: ValidatorGroup::default(), + validator_group: HashMap::default(), collations: HashMap::new(), } } @@ -423,7 +443,22 @@ async fn distribute_collation( ); } - let our_core = our_cores[0]; + // Double check that the specified `core_index` is among the ones our para has assignments for. + if !our_cores.iter().any(|assigned_core| assigned_core == &core_index) { + gum::warn!( + target: LOG_TARGET, + para_id = %id, + relay_parent = ?candidate_relay_parent, + cores = ?our_cores, + ?core_index, + "Attempting to distribute collation for a core we are not assigned to ", + ); + + return Ok(()) + } + + let our_core = core_index; + // Determine the group on that core. // // When prospective parachains are disabled, candidate relay parent here is @@ -465,10 +500,12 @@ async fn distribute_collation( "Accepted collation, connecting to validators." ); - let validators_at_relay_parent = &mut per_relay_parent.validator_group.validators; - if validators_at_relay_parent.is_empty() { - *validators_at_relay_parent = validators; - } + // Insert validator group for the `core_index` at relay parent. + per_relay_parent.validator_group.entry(core_index).or_insert_with(|| { + let mut group = ValidatorGroup::default(); + group.validators = validators; + group + }); // Update a set of connected validators if necessary. connect_to_validators(ctx, &state.validator_groups_buf).await; @@ -485,7 +522,10 @@ async fn distribute_collation( per_relay_parent.collations.insert( candidate_hash, - Collation { receipt, pov, parent_head_data, status: CollationStatus::Created }, + CollationWithCoreIndex( + Collation { receipt, pov, parent_head_data, status: CollationStatus::Created }, + core_index, + ), ); // If prospective parachains are disabled, a leaf should be known to peer. @@ -691,7 +731,10 @@ async fn advertise_collation( advertisement_timeouts: &mut FuturesUnordered, metrics: &Metrics, ) { - for (candidate_hash, collation) in per_relay_parent.collations.iter_mut() { + for (candidate_hash, collation_and_core) in per_relay_parent.collations.iter_mut() { + let core_index = collation_and_core.core_index().clone(); + let collation = collation_and_core.collation_mut(); + // Check that peer will be able to request the collation. if let CollationVersion::V1 = protocol_version { if per_relay_parent.prospective_parachains_mode.is_enabled() { @@ -705,10 +748,12 @@ async fn advertise_collation( } } - let should_advertise = - per_relay_parent - .validator_group - .should_advertise_to(candidate_hash, peer_ids, &peer); + let should_advertise = per_relay_parent + .validator_group + .get(&core_index) + .map_or(ShouldAdvertiseTo::NotAuthority, |group| { + group.should_advertise_to(candidate_hash, peer_ids, &peer) + }); match should_advertise { ShouldAdvertiseTo::Yes => {}, @@ -759,6 +804,8 @@ async fn advertise_collation( per_relay_parent .validator_group + .get_mut(&core_index) + .expect("should_advertise returned `ShouldAdvertiseTo::Yes` above; qed") .advertised_to_peer(candidate_hash, &peer_ids, peer); advertisement_timeouts.push(ResetInterestTimeout::new( @@ -1056,7 +1103,7 @@ async fn handle_incoming_request( }; let mode = per_relay_parent.prospective_parachains_mode; - let collation = match &req { + let collation_with_core = match &req { VersionedCollationRequest::V1(_) if !mode.is_enabled() => per_relay_parent.collations.values_mut().next(), VersionedCollationRequest::V2(req) => @@ -1073,22 +1120,24 @@ async fn handle_incoming_request( return Ok(()) }, }; - let (receipt, pov, parent_head_data) = if let Some(collation) = collation { - collation.status.advance_to_requested(); - ( - collation.receipt.clone(), - collation.pov.clone(), - collation.parent_head_data.clone(), - ) - } else { - gum::warn!( - target: LOG_TARGET, - relay_parent = %relay_parent, - "received a `RequestCollation` for a relay parent we don't have collation stored.", - ); + let (receipt, pov, parent_head_data) = + if let Some(collation_with_core) = collation_with_core { + let collation = collation_with_core.collation_mut(); + collation.status.advance_to_requested(); + ( + collation.receipt.clone(), + collation.pov.clone(), + collation.parent_head_data.clone(), + ) + } else { + gum::warn!( + target: LOG_TARGET, + relay_parent = %relay_parent, + "received a `RequestCollation` for a relay parent we don't have collation stored.", + ); - return Ok(()) - }; + return Ok(()) + }; state.metrics.on_collation_sent_requested(); @@ -1343,7 +1392,9 @@ where .remove(removed) .map(|per_relay_parent| per_relay_parent.collations) .unwrap_or_default(); - for collation in collations.into_values() { + for collation_with_core in collations.into_values() { + let collation = collation_with_core.collation(); + let candidate_hash = collation.receipt.hash(); state.collation_result_senders.remove(&candidate_hash); state.validator_groups_buf.remove_candidate(&candidate_hash); @@ -1480,7 +1531,7 @@ async fn run_inner( continue }; - let next_collation = { + let next_collation_with_core = { let per_relay_parent = match state.per_relay_parent.get(&relay_parent) { Some(per_relay_parent) => per_relay_parent, None => continue, @@ -1500,7 +1551,8 @@ async fn run_inner( } }; - if let Some(collation) = next_collation { + if let Some(collation_with_core) = next_collation_with_core { + let collation = collation_with_core.collation(); let receipt = collation.receipt.clone(); let pov = collation.pov.clone(); let parent_head_data = collation.parent_head_data.clone(); From 84974135dc57ca067d29f21771dcbd1797936fb2 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Wed, 20 Mar 2024 11:24:47 +0200 Subject: [PATCH 08/28] fix tests Signed-off-by: Andrei Sandu --- .../network/collator-protocol/src/collator_side/tests/mod.rs | 1 + .../src/collator_side/tests/prospective_parachains.rs | 2 ++ 2 files changed, 3 insertions(+) diff --git a/polkadot/node/network/collator-protocol/src/collator_side/tests/mod.rs b/polkadot/node/network/collator-protocol/src/collator_side/tests/mod.rs index 38e6780eb7d20..bcf0b34e631f9 100644 --- a/polkadot/node/network/collator-protocol/src/collator_side/tests/mod.rs +++ b/polkadot/node/network/collator-protocol/src/collator_side/tests/mod.rs @@ -377,6 +377,7 @@ async fn distribute_collation_with_receipt( pov: pov.clone(), parent_head_data: HeadData(vec![1, 2, 3]), result_sender: None, + core_index: CoreIndex(0), }, ) .await; diff --git a/polkadot/node/network/collator-protocol/src/collator_side/tests/prospective_parachains.rs b/polkadot/node/network/collator-protocol/src/collator_side/tests/prospective_parachains.rs index e419cd5444f5a..707053545630a 100644 --- a/polkadot/node/network/collator-protocol/src/collator_side/tests/prospective_parachains.rs +++ b/polkadot/node/network/collator-protocol/src/collator_side/tests/prospective_parachains.rs @@ -277,6 +277,7 @@ fn distribute_collation_from_implicit_view() { pov: pov.clone(), parent_head_data: HeadData(vec![1, 2, 3]), result_sender: None, + core_index: CoreIndex(0), }, ) .await; @@ -358,6 +359,7 @@ fn distribute_collation_up_to_limit() { pov: pov.clone(), parent_head_data: HeadData(vec![1, 2, 3]), result_sender: None, + core_index: CoreIndex(0), }, ) .await; From 0dbc63ab2b2cc3a04b94a18688b6ecbe515f8ef5 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Fri, 22 Mar 2024 13:33:56 +0200 Subject: [PATCH 09/28] enable collator-protocol elastic scaling extension for adder colaltor Signed-off-by: Andrei Sandu --- polkadot/parachain/test-parachains/adder/collator/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polkadot/parachain/test-parachains/adder/collator/Cargo.toml b/polkadot/parachain/test-parachains/adder/collator/Cargo.toml index cb283c271191f..cdc819732ed6b 100644 --- a/polkadot/parachain/test-parachains/adder/collator/Cargo.toml +++ b/polkadot/parachain/test-parachains/adder/collator/Cargo.toml @@ -24,7 +24,7 @@ log = { workspace = true, default-features = true } test-parachain-adder = { path = ".." } polkadot-primitives = { path = "../../../../primitives" } polkadot-cli = { path = "../../../../cli" } -polkadot-service = { path = "../../../../node/service", features = ["rococo-native"] } +polkadot-service = { path = "../../../../node/service", features = ["rococo-native", "elastic-scaling-experimental"] } polkadot-node-primitives = { path = "../../../../node/primitives" } polkadot-node-subsystem = { path = "../../../../node/subsystem" } From 3906d3436292712faf12b42aec800e1c9f119b70 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Fri, 22 Mar 2024 13:33:20 +0200 Subject: [PATCH 10/28] Use backing state to get candidates pending availability Signed-off-by: Andrei Sandu --- .../node/collation-generation/src/error.rs | 2 + polkadot/node/collation-generation/src/lib.rs | 199 +++++++----------- 2 files changed, 80 insertions(+), 121 deletions(-) diff --git a/polkadot/node/collation-generation/src/error.rs b/polkadot/node/collation-generation/src/error.rs index ac5db6cd7f285..852c50f306825 100644 --- a/polkadot/node/collation-generation/src/error.rs +++ b/polkadot/node/collation-generation/src/error.rs @@ -28,6 +28,8 @@ pub enum Error { Util(#[from] polkadot_node_subsystem_util::Error), #[error(transparent)] Erasure(#[from] polkadot_erasure_coding::Error), + #[error("Parachain backing state not available in runtime.")] + MissingParaBackingState, } pub type Result = std::result::Result; diff --git a/polkadot/node/collation-generation/src/lib.rs b/polkadot/node/collation-generation/src/lib.rs index 590f99eb7e1d0..8f834c275bf8d 100644 --- a/polkadot/node/collation-generation/src/lib.rs +++ b/polkadot/node/collation-generation/src/lib.rs @@ -43,13 +43,13 @@ use polkadot_node_subsystem::{ SubsystemContext, SubsystemError, SubsystemResult, }; use polkadot_node_subsystem_util::{ - request_async_backing_params, request_availability_cores, request_persisted_validation_data, + request_availability_cores, request_para_backing_state, request_persisted_validation_data, request_validation_code, request_validation_code_hash, request_validators, }; use polkadot_primitives::{ collator_signature_payload, CandidateCommitments, CandidateDescriptor, CandidateReceipt, - CollatorPair, CoreIndex, CoreState, Hash, Id as ParaId, OccupiedCoreAssumption, - PersistedValidationData, ValidationCodeHash, + CollatorPair, CoreIndex, Hash, Id as ParaId, OccupiedCoreAssumption, PersistedValidationData, + ValidationCodeHash, }; use sp_core::crypto::Pair; use std::sync::Arc; @@ -208,160 +208,117 @@ async fn handle_new_activations( if config.collator.is_none() { return Ok(()) } + let para_id = config.para_id; let _overall_timer = metrics.time_new_activations(); for relay_parent in activated { let _relay_parent_timer = metrics.time_new_activations_relay_parent(); - let (availability_cores, validators, async_backing_params) = join!( + let (availability_cores, validators, para_backing_state) = join!( request_availability_cores(relay_parent, ctx.sender()).await, request_validators(relay_parent, ctx.sender()).await, - request_async_backing_params(relay_parent, ctx.sender()).await, + request_para_backing_state(relay_parent, config.para_id, ctx.sender()).await, ); let availability_cores = availability_cores??; let n_validators = validators??.len(); - let async_backing_params = async_backing_params?.ok(); + let para_backing_state = if let Some(para_backing_state) = para_backing_state?? { + para_backing_state + } else { + return Err(crate::error::Error::MissingParaBackingState) + }; // The loop bellow will fill in cores that the para is assigned to. let mut cores_to_build_on = Vec::new(); - // The latest on chain PVD. - let mut pvd = None; - // The validation code hash. - let mut validation_code_hash = None; for (core_idx, core) in availability_cores.into_iter().enumerate() { - let _availability_core_timer = metrics.time_new_activations_availability_core(); - - let (scheduled_core, assumption) = match core { - CoreState::Scheduled(scheduled_core) => - (scheduled_core, OccupiedCoreAssumption::Free), - CoreState::Occupied(occupied_core) => match async_backing_params { - Some(params) if params.max_candidate_depth >= 1 => { - // maximum candidate depth when building on top of a block - // pending availability is necessarily 1 - the depth of the - // pending block is 0 so the child has depth 1. - - // TODO [now]: this assumes that next up == current. - // in practice we should only set `OccupiedCoreAssumption::Included` - // when the candidate occupying the core is also of the same para. - if let Some(scheduled) = occupied_core.next_up_on_available { - (scheduled, OccupiedCoreAssumption::Included) - } else { - continue - } - }, - _ => { - gum::trace!( - target: LOG_TARGET, - core_idx = %core_idx, - relay_parent = ?relay_parent, - "core is occupied. Keep going.", - ); - continue - }, - }, - CoreState::Free => { - gum::trace!( - target: LOG_TARGET, - core_idx = %core_idx, - "core is free. Keep going.", - ); - continue - }, - }; + let scheduled_para_id = core.para_id(); - if scheduled_core.para_id != config.para_id { + if scheduled_para_id != Some(config.para_id) { gum::trace!( target: LOG_TARGET, core_idx = %core_idx, relay_parent = ?relay_parent, our_para = %config.para_id, - their_para = %scheduled_core.para_id, + their_para = ?scheduled_para_id, "core is not assigned to our para. Keep going.", ); continue } - // we get validation data and validation code synchronously for each core instead of - // within the subtask loop, because we have only a single mutable handle to the - // context, so the work can't really be distributed - - let validation_data = match request_persisted_validation_data( - relay_parent, - scheduled_core.para_id, - assumption, - ctx.sender(), - ) - .await - .await?? - { - Some(v) => v, - None => { - gum::trace!( - target: LOG_TARGET, - core_idx = %core_idx, - relay_parent = ?relay_parent, - our_para = %config.para_id, - their_para = %scheduled_core.para_id, - "validation data is not available", - ); - continue - }, - }; - - let code_hash = match obtain_validation_code_hash_with_assumption( - relay_parent, - scheduled_core.para_id, - assumption, - ctx.sender(), - ) - .await? - { - Some(v) => v, - None => { - gum::trace!( - target: LOG_TARGET, - core_idx = %core_idx, - relay_parent = ?relay_parent, - our_para = %config.para_id, - their_para = %scheduled_core.para_id, - "validation code hash is not found.", - ); - continue - }, - }; - - // Prepare data for building collation(s) outside the loop. + // Accumulate cores for building collation(s) outside the loop. cores_to_build_on.push(CoreIndex(core_idx as u32)); + } - // We only need the validation data only for the first collation we build, - // we will construct the other ones based on the outputs of prev collation. - // This should be fine as we always enact the chain candidates of the para at once - // in the runtime. - if pvd.is_none() { - pvd = Some(validation_data); - validation_code_hash = Some(code_hash); - } + // Skip to next relay parent if there is no core assigned to us. + if cores_to_build_on.is_empty() { + continue } + // We are being very optimistic here, but one of the cores could pend availability some more + // block, ore even time out. + // For timeout assumption the collator can't really know because it doesn't receive bitfield + // gossip. + let assumption = if para_backing_state.pending_availability.is_empty() { + OccupiedCoreAssumption::Free + } else { + OccupiedCoreAssumption::Included + }; + + gum::debug!( + target: LOG_TARGET, + relay_parent = ?relay_parent, + our_para = %config.para_id, + ?assumption, + "Occupied core(s) assumption", + ); + + let mut validation_data = match request_persisted_validation_data( + relay_parent, + config.para_id, + assumption, + ctx.sender(), + ) + .await + .await?? + { + Some(v) => v, + None => { + gum::debug!( + target: LOG_TARGET, + relay_parent = ?relay_parent, + our_para = %config.para_id, + "validation data is not available", + ); + continue + }, + }; + + let validation_code_hash = match obtain_validation_code_hash_with_assumption( + relay_parent, + config.para_id, + assumption, + ctx.sender(), + ) + .await? + { + Some(v) => v, + None => { + gum::debug!( + target: LOG_TARGET, + relay_parent = ?relay_parent, + our_para = %config.para_id, + "validation code hash is not found.", + ); + continue + }, + }; + let task_config = config.clone(); let metrics = metrics.clone(); let mut task_sender = ctx.sender().clone(); - let (mut validation_data, validation_code_hash) = if !cores_to_build_on.is_empty() { - ( - pvd.expect("If cores is not empty, `pvd` is always Some; qed"), - validation_code_hash - .expect("If cores is not empty, `validation_code_hash` is always Some; qed"), - ) - } else { - // Bail out if no cores to build on. - return Ok(()) - }; - - let para_id = config.para_id; ctx.spawn( "chained-collation-builder", Box::pin(async move { From d344a2dacc76d44a5000d7ed783eab7a6308b084 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Fri, 22 Mar 2024 13:43:04 +0200 Subject: [PATCH 11/28] use ok_or Signed-off-by: Andrei Sandu --- polkadot/node/collation-generation/src/lib.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/polkadot/node/collation-generation/src/lib.rs b/polkadot/node/collation-generation/src/lib.rs index 8203b5c854da5..09b275dfe16db 100644 --- a/polkadot/node/collation-generation/src/lib.rs +++ b/polkadot/node/collation-generation/src/lib.rs @@ -226,11 +226,8 @@ async fn handle_new_activations( let availability_cores = availability_cores??; let n_validators = validators??.len(); - let para_backing_state = if let Some(para_backing_state) = para_backing_state?? { - para_backing_state - } else { - return Err(crate::error::Error::MissingParaBackingState) - }; + let para_backing_state = + para_backing_state??.ok_or(crate::error::Error::MissingParaBackingState); // The loop bellow will fill in cores that the para is assigned to. let mut cores_to_build_on = Vec::new(); From 16e1cb41d33aacedb7023c9502c780d9166fff76 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Fri, 22 Mar 2024 13:43:47 +0200 Subject: [PATCH 12/28] subsystem-util request_para_backing_state Signed-off-by: Andrei Sandu --- polkadot/node/subsystem-util/src/lib.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/polkadot/node/subsystem-util/src/lib.rs b/polkadot/node/subsystem-util/src/lib.rs index aaae30db50cdb..8c0d83cf55fd2 100644 --- a/polkadot/node/subsystem-util/src/lib.rs +++ b/polkadot/node/subsystem-util/src/lib.rs @@ -31,6 +31,7 @@ use polkadot_node_subsystem::{ overseer, SubsystemSender, }; use polkadot_primitives::{slashing, CoreIndex, ExecutorParams}; +use polkadot_primitives::{async_backing::BackingState, slashing, ExecutorParams}; pub use overseer::{ gen::{OrchestraError as OverseerError, Timeout}, @@ -308,6 +309,7 @@ specialize_requests! { fn request_disabled_validators() -> Vec; DisabledValidators; fn request_async_backing_params() -> AsyncBackingParams; AsyncBackingParams; fn request_claim_queue() -> BTreeMap>; ClaimQueue; + fn request_para_backing_state(para_id: ParaId) -> Option; ParaBackingState; } /// Requests executor parameters from the runtime effective at given relay-parent. First obtains From 91f9c408a2db63afbdaadb35230d542b12a4f39f Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Fri, 22 Mar 2024 14:31:03 +0200 Subject: [PATCH 13/28] Use the exposed claim queue Signed-off-by: Andrei Sandu --- polkadot/node/collation-generation/src/lib.rs | 117 ++++++++++++++---- polkadot/node/subsystem-util/src/lib.rs | 3 +- 2 files changed, 93 insertions(+), 27 deletions(-) diff --git a/polkadot/node/collation-generation/src/lib.rs b/polkadot/node/collation-generation/src/lib.rs index 09b275dfe16db..70633b8d82c29 100644 --- a/polkadot/node/collation-generation/src/lib.rs +++ b/polkadot/node/collation-generation/src/lib.rs @@ -43,13 +43,14 @@ use polkadot_node_subsystem::{ SubsystemContext, SubsystemError, SubsystemResult, }; use polkadot_node_subsystem_util::{ - request_availability_cores, request_para_backing_state, request_persisted_validation_data, + has_required_runtime, request_async_backing_params, request_availability_cores, + request_claim_queue, request_para_backing_state, request_persisted_validation_data, request_validation_code, request_validation_code_hash, request_validators, }; use polkadot_primitives::{ collator_signature_payload, CandidateCommitments, CandidateDescriptor, CandidateReceipt, - CollatorPair, CoreIndex, Hash, Id as ParaId, OccupiedCoreAssumption, PersistedValidationData, - ValidationCodeHash, + CollatorPair, CoreIndex, CoreState, Hash, Id as ParaId, OccupiedCoreAssumption, + PersistedValidationData, ValidationCodeHash, }; use sp_core::crypto::Pair; use std::{ @@ -218,34 +219,104 @@ async fn handle_new_activations( for relay_parent in activated { let _relay_parent_timer = metrics.time_new_activations_relay_parent(); - let (availability_cores, validators, para_backing_state) = join!( + let (availability_cores, validators, para_backing_state, async_backing_params) = join!( request_availability_cores(relay_parent, ctx.sender()).await, request_validators(relay_parent, ctx.sender()).await, request_para_backing_state(relay_parent, config.para_id, ctx.sender()).await, + request_async_backing_params(relay_parent, ctx.sender()).await, ); let availability_cores = availability_cores??; + let async_backing_params = async_backing_params?.ok(); let n_validators = validators??.len(); let para_backing_state = - para_backing_state??.ok_or(crate::error::Error::MissingParaBackingState); + para_backing_state??.ok_or(crate::error::Error::MissingParaBackingState)?; - // The loop bellow will fill in cores that the para is assigned to. + let maybe_claim_queue = fetch_claim_queue(ctx.sender(), relay_parent).await?; + + // The loop bellow will fill in cores that the para is allowed to build on. let mut cores_to_build_on = Vec::new(); for (core_idx, core) in availability_cores.into_iter().enumerate() { - let scheduled_para_id = core.para_id(); - - if scheduled_para_id != Some(config.para_id) { - gum::trace!( - target: LOG_TARGET, - core_idx = %core_idx, - relay_parent = ?relay_parent, - our_para = %config.para_id, - their_para = ?scheduled_para_id, - "core is not assigned to our para. Keep going.", - ); - continue - } + match core { + CoreState::Scheduled(scheduled_core) => { + // Core is scheduled and free, but is it ours ? + if scheduled_core.para_id != config.para_id { + gum::trace!( + target: LOG_TARGET, + core_idx = %core_idx, + relay_parent = ?relay_parent, + our_para = %config.para_id, + their_para = ?scheduled_core.para_id, + "core is not assigned to our para. Keep going.", + ); + continue + } + }, + CoreState::Occupied(occupied_core) => match async_backing_params { + Some(params) if params.max_candidate_depth >= 1 => { + // maximum candidate depth when building on top of a block + // pending availability is necessarily 1 - the depth of the + // pending block is 0 so the child has depth 1. + + // Use claim queue if available, or fallback to `next_up_on_available` + if let Some(ref claim_queue) = maybe_claim_queue { + // Check if our para is scheduled assuming the candidate pending + // availability gets included in the very next block + let next_para_scheduled = fetch_next_scheduled_on_core( + &claim_queue, + CoreIndex(core_idx as _), + ); + if Some(config.para_id) != next_para_scheduled { + // Not our para scheduled + gum::trace!( + target: LOG_TARGET, + core_idx = %core_idx, + relay_parent = ?relay_parent, + our_para = %config.para_id, + para_id = ?next_para_scheduled, + "core is scheduled to some other para. Keep going.", + ); + continue + } else { + // Nothing scheduled + gum::trace!( + target: LOG_TARGET, + core_idx = %core_idx, + relay_parent = ?relay_parent, + our_para = %config.para_id, + "no para is scheduled on core at next block.", + ); + continue + } + } else if let Some(next_scheduled) = occupied_core.next_up_on_available { + if next_scheduled.para_id != config.para_id { + continue + } + } else { + // Nothing scheduled + continue + } + }, + _ => { + gum::trace!( + target: LOG_TARGET, + core_idx = %core_idx, + relay_parent = ?relay_parent, + "core is occupied. Keep going.", + ); + continue + }, + }, + CoreState::Free => { + gum::trace!( + target: LOG_TARGET, + core_idx = %core_idx, + "core is not assigned to any para. Keep going.", + ); + continue + }, + }; // Accumulate cores for building collation(s) outside the loop. cores_to_build_on.push(CoreIndex(core_idx as u32)); @@ -629,10 +700,6 @@ async fn fetch_claim_queue( fn fetch_next_scheduled_on_core( claim_queue: &BTreeMap>, core_idx: CoreIndex, -) -> Option { - claim_queue - .get(&core_idx)? - .front() - .cloned() - .map(|para_id| ScheduledCore { para_id, collator: None }) +) -> Option { + claim_queue.get(&core_idx)?.front().cloned() } diff --git a/polkadot/node/subsystem-util/src/lib.rs b/polkadot/node/subsystem-util/src/lib.rs index 8c0d83cf55fd2..85de93cedd69c 100644 --- a/polkadot/node/subsystem-util/src/lib.rs +++ b/polkadot/node/subsystem-util/src/lib.rs @@ -30,8 +30,7 @@ use polkadot_node_subsystem::{ messages::{RuntimeApiMessage, RuntimeApiRequest, RuntimeApiSender}, overseer, SubsystemSender, }; -use polkadot_primitives::{slashing, CoreIndex, ExecutorParams}; -use polkadot_primitives::{async_backing::BackingState, slashing, ExecutorParams}; +use polkadot_primitives::{slashing, CoreIndex, ExecutorParams, async_backing::BackingState}; pub use overseer::{ gen::{OrchestraError as OverseerError, Timeout}, From 154f3ddf5199e60f5ba4e8abe40de5900bfebf11 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Fri, 22 Mar 2024 15:41:24 +0200 Subject: [PATCH 14/28] refactor Signed-off-by: Andrei Sandu --- polkadot/node/collation-generation/src/lib.rs | 94 ++++++++----------- 1 file changed, 40 insertions(+), 54 deletions(-) diff --git a/polkadot/node/collation-generation/src/lib.rs b/polkadot/node/collation-generation/src/lib.rs index 70633b8d82c29..455bc38567e77 100644 --- a/polkadot/node/collation-generation/src/lib.rs +++ b/polkadot/node/collation-generation/src/lib.rs @@ -50,7 +50,7 @@ use polkadot_node_subsystem_util::{ use polkadot_primitives::{ collator_signature_payload, CandidateCommitments, CandidateDescriptor, CandidateReceipt, CollatorPair, CoreIndex, CoreState, Hash, Id as ParaId, OccupiedCoreAssumption, - PersistedValidationData, ValidationCodeHash, + PersistedValidationData, ScheduledCore, ValidationCodeHash, }; use sp_core::crypto::Pair; use std::{ @@ -238,21 +238,8 @@ async fn handle_new_activations( let mut cores_to_build_on = Vec::new(); for (core_idx, core) in availability_cores.into_iter().enumerate() { - match core { - CoreState::Scheduled(scheduled_core) => { - // Core is scheduled and free, but is it ours ? - if scheduled_core.para_id != config.para_id { - gum::trace!( - target: LOG_TARGET, - core_idx = %core_idx, - relay_parent = ?relay_parent, - our_para = %config.para_id, - their_para = ?scheduled_core.para_id, - "core is not assigned to our para. Keep going.", - ); - continue - } - }, + let scheduled_core = match core { + CoreState::Scheduled(scheduled_core) => scheduled_core, CoreState::Occupied(occupied_core) => match async_backing_params { Some(params) if params.max_candidate_depth >= 1 => { // maximum candidate depth when building on top of a block @@ -260,42 +247,25 @@ async fn handle_new_activations( // pending block is 0 so the child has depth 1. // Use claim queue if available, or fallback to `next_up_on_available` - if let Some(ref claim_queue) = maybe_claim_queue { - // Check if our para is scheduled assuming the candidate pending - // availability gets included in the very next block - let next_para_scheduled = fetch_next_scheduled_on_core( - &claim_queue, - CoreIndex(core_idx as _), - ); - if Some(config.para_id) != next_para_scheduled { - // Not our para scheduled - gum::trace!( - target: LOG_TARGET, - core_idx = %core_idx, - relay_parent = ?relay_parent, - our_para = %config.para_id, - para_id = ?next_para_scheduled, - "core is scheduled to some other para. Keep going.", - ); - continue - } else { - // Nothing scheduled - gum::trace!( - target: LOG_TARGET, - core_idx = %core_idx, - relay_parent = ?relay_parent, - our_para = %config.para_id, - "no para is scheduled on core at next block.", - ); - continue - } - } else if let Some(next_scheduled) = occupied_core.next_up_on_available { - if next_scheduled.para_id != config.para_id { - continue - } - } else { - // Nothing scheduled - continue + let res = match maybe_claim_queue { + Some(ref claim_queue) => { + // read what's in the claim queue for this core + fetch_next_scheduled_on_core( + claim_queue, + CoreIndex(core_idx as u32), + ) + }, + None => { + // Runtime doesn't support claim queue runtime api. Fallback to + // `next_up_on_available` + occupied_core.next_up_on_available + }, + } + .map(|scheduled: ScheduledCore| scheduled); + + match res { + Some(res) => res, + None => continue, } }, _ => { @@ -318,6 +288,18 @@ async fn handle_new_activations( }, }; + if scheduled_core.para_id != config.para_id { + gum::trace!( + target: LOG_TARGET, + core_idx = %core_idx, + relay_parent = ?relay_parent, + our_para = %config.para_id, + their_para = ?scheduled_core.para_id, + "core is not assigned to our para. Keep going.", + ); + continue + } + // Accumulate cores for building collation(s) outside the loop. cores_to_build_on.push(CoreIndex(core_idx as u32)); } @@ -700,6 +682,10 @@ async fn fetch_claim_queue( fn fetch_next_scheduled_on_core( claim_queue: &BTreeMap>, core_idx: CoreIndex, -) -> Option { - claim_queue.get(&core_idx)?.front().cloned() +) -> Option { + claim_queue + .get(&core_idx)? + .front() + .cloned() + .map(|para_id| ScheduledCore { para_id, collator: None }) } From 60ec79fb682ab2432c906a775e11f99e541edde4 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Fri, 22 Mar 2024 15:43:14 +0200 Subject: [PATCH 15/28] remove type annotation Signed-off-by: Andrei Sandu --- polkadot/node/collation-generation/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polkadot/node/collation-generation/src/lib.rs b/polkadot/node/collation-generation/src/lib.rs index 455bc38567e77..8c3681c9f4b0c 100644 --- a/polkadot/node/collation-generation/src/lib.rs +++ b/polkadot/node/collation-generation/src/lib.rs @@ -261,7 +261,7 @@ async fn handle_new_activations( occupied_core.next_up_on_available }, } - .map(|scheduled: ScheduledCore| scheduled); + .map(|scheduled| scheduled); match res { Some(res) => res, From a6c4afe1c0625ed82b6bef7f2b1b7b329d90f472 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Fri, 22 Mar 2024 15:47:41 +0200 Subject: [PATCH 16/28] remove more merge damage Signed-off-by: Andrei Sandu --- polkadot/node/collation-generation/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polkadot/node/collation-generation/src/lib.rs b/polkadot/node/collation-generation/src/lib.rs index 8c3681c9f4b0c..da158f0b102b6 100644 --- a/polkadot/node/collation-generation/src/lib.rs +++ b/polkadot/node/collation-generation/src/lib.rs @@ -294,7 +294,7 @@ async fn handle_new_activations( core_idx = %core_idx, relay_parent = ?relay_parent, our_para = %config.para_id, - their_para = ?scheduled_core.para_id, + their_para = %scheduled_core.para_id, "core is not assigned to our para. Keep going.", ); continue From 62c13dcf7b2a15fc025ee9b3e4df05ad138708c4 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Fri, 22 Mar 2024 16:12:24 +0200 Subject: [PATCH 17/28] make tests compile Signed-off-by: Andrei Sandu --- polkadot/node/collation-generation/src/lib.rs | 4 ++-- polkadot/node/collation-generation/src/tests.rs | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/polkadot/node/collation-generation/src/lib.rs b/polkadot/node/collation-generation/src/lib.rs index da158f0b102b6..1194962a1d4b1 100644 --- a/polkadot/node/collation-generation/src/lib.rs +++ b/polkadot/node/collation-generation/src/lib.rs @@ -219,11 +219,11 @@ async fn handle_new_activations( for relay_parent in activated { let _relay_parent_timer = metrics.time_new_activations_relay_parent(); - let (availability_cores, validators, para_backing_state, async_backing_params) = join!( + let (availability_cores, validators, async_backing_params, para_backing_state) = join!( request_availability_cores(relay_parent, ctx.sender()).await, request_validators(relay_parent, ctx.sender()).await, - request_para_backing_state(relay_parent, config.para_id, ctx.sender()).await, request_async_backing_params(relay_parent, ctx.sender()).await, + request_para_backing_state(relay_parent, config.para_id, ctx.sender()).await, ); let availability_cores = availability_cores??; diff --git a/polkadot/node/collation-generation/src/tests.rs b/polkadot/node/collation-generation/src/tests.rs index 9b16980e6af43..8e5f88e22f869 100644 --- a/polkadot/node/collation-generation/src/tests.rs +++ b/polkadot/node/collation-generation/src/tests.rs @@ -611,6 +611,7 @@ fn submit_collation_is_no_op_before_initialization() { parent_head: vec![1, 2, 3].into(), validation_code_hash: Hash::repeat_byte(1).into(), result_sender: None, + core_index: CoreIndex(0), }), }) .await; @@ -647,6 +648,7 @@ fn submit_collation_leads_to_distribution() { parent_head: vec![1, 2, 3].into(), validation_code_hash, result_sender: None, + core_index: CoreIndex(0), }), }) .await; From ddf057e2f1ac070f5f8923b0aebf7cd30cab16b3 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Fri, 22 Mar 2024 16:16:58 +0200 Subject: [PATCH 18/28] happy clippy is good clippy Signed-off-by: Andrei Sandu --- polkadot/node/collation-generation/src/lib.rs | 3 +-- .../node/network/collator-protocol/src/collator_side/mod.rs | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/polkadot/node/collation-generation/src/lib.rs b/polkadot/node/collation-generation/src/lib.rs index 1194962a1d4b1..2e84aa876974c 100644 --- a/polkadot/node/collation-generation/src/lib.rs +++ b/polkadot/node/collation-generation/src/lib.rs @@ -260,8 +260,7 @@ async fn handle_new_activations( // `next_up_on_available` occupied_core.next_up_on_available }, - } - .map(|scheduled| scheduled); + }; match res { Some(res) => res, diff --git a/polkadot/node/network/collator-protocol/src/collator_side/mod.rs b/polkadot/node/network/collator-protocol/src/collator_side/mod.rs index 06cee3ec05e15..a25c53eda7242 100644 --- a/polkadot/node/network/collator-protocol/src/collator_side/mod.rs +++ b/polkadot/node/network/collator-protocol/src/collator_side/mod.rs @@ -732,7 +732,7 @@ async fn advertise_collation( metrics: &Metrics, ) { for (candidate_hash, collation_and_core) in per_relay_parent.collations.iter_mut() { - let core_index = collation_and_core.core_index().clone(); + let core_index = *collation_and_core.core_index(); let collation = collation_and_core.collation_mut(); // Check that peer will be able to request the collation. From 13b149f278076e90b9a9c35a0cd18e9370bc0b52 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Mon, 25 Mar 2024 11:13:12 +0200 Subject: [PATCH 19/28] lookahead use core_index in SubmitCollationParams Signed-off-by: Andrei Sandu --- .../consensus/aura/src/collators/lookahead.rs | 32 ++++++++++++------- polkadot/node/subsystem-util/src/lib.rs | 2 +- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/lookahead.rs b/cumulus/client/consensus/aura/src/collators/lookahead.rs index 161f10d55a193..17e4a5fb2013d 100644 --- a/cumulus/client/consensus/aura/src/collators/lookahead.rs +++ b/cumulus/client/consensus/aura/src/collators/lookahead.rs @@ -49,7 +49,7 @@ use polkadot_node_subsystem::messages::{ CollationGenerationMessage, RuntimeApiMessage, RuntimeApiRequest, }; use polkadot_overseer::Handle as OverseerHandle; -use polkadot_primitives::{CollatorPair, Id as ParaId, OccupiedCoreAssumption}; +use polkadot_primitives::{CollatorPair, CoreIndex, Id as ParaId, OccupiedCoreAssumption}; use futures::{channel::oneshot, prelude::*}; use sc_client_api::{backend::AuxStore, BlockBackend, BlockOf}; @@ -184,7 +184,15 @@ where while let Some(relay_parent_header) = import_notifications.next().await { let relay_parent = relay_parent_header.hash(); - if !is_para_scheduled(relay_parent, params.para_id, &mut params.overseer_handle).await { + let core_index = if let Some(core_index) = fist_core_scheduled_for_para( + relay_parent, + params.para_id, + &mut params.overseer_handle, + ) + .await + { + core_index + } else { tracing::trace!( target: crate::LOG_TARGET, ?relay_parent, @@ -193,7 +201,7 @@ where ); continue - } + }; let max_pov_size = match params .relay_client @@ -396,6 +404,7 @@ where parent_head: parent_header.encode().into(), validation_code_hash, result_sender: None, + core_index, }, ), "SubmitCollation", @@ -480,14 +489,12 @@ async fn max_ancestry_lookback( } } -// Checks if there exists a scheduled core for the para at the provided relay parent. -// -// Falls back to `false` in case of an error. -async fn is_para_scheduled( +// Checks the first `CoreIndex` assigned to the para at the provided relay parent. +async fn fist_core_scheduled_for_para( relay_parent: PHash, para_id: ParaId, overseer_handle: &mut OverseerHandle, -) -> bool { +) -> Option { let (tx, rx) = oneshot::channel(); let request = RuntimeApiRequest::AvailabilityCores(tx); overseer_handle @@ -503,7 +510,7 @@ async fn is_para_scheduled( ?relay_parent, "Failed to query availability cores runtime API", ); - return false + return None }, Err(oneshot::Canceled) => { tracing::error!( @@ -511,9 +518,12 @@ async fn is_para_scheduled( ?relay_parent, "Sender for availability cores runtime request dropped", ); - return false + return None }, }; - cores.iter().any(|core| core.para_id() == Some(para_id)) + cores + .iter() + .position(|core| core.para_id() == Some(para_id)) + .map(|index| CoreIndex(index as _)) } diff --git a/polkadot/node/subsystem-util/src/lib.rs b/polkadot/node/subsystem-util/src/lib.rs index 85de93cedd69c..a2a24973d90aa 100644 --- a/polkadot/node/subsystem-util/src/lib.rs +++ b/polkadot/node/subsystem-util/src/lib.rs @@ -30,7 +30,7 @@ use polkadot_node_subsystem::{ messages::{RuntimeApiMessage, RuntimeApiRequest, RuntimeApiSender}, overseer, SubsystemSender, }; -use polkadot_primitives::{slashing, CoreIndex, ExecutorParams, async_backing::BackingState}; +use polkadot_primitives::{async_backing::BackingState, slashing, CoreIndex, ExecutorParams}; pub use overseer::{ gen::{OrchestraError as OverseerError, Timeout}, From 64a53f4c9f27a70b2e487a7a425c9be6eab5a662 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Mon, 25 Mar 2024 11:23:28 +0200 Subject: [PATCH 20/28] taplo fix Signed-off-by: Andrei Sandu --- polkadot/parachain/test-parachains/adder/collator/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polkadot/parachain/test-parachains/adder/collator/Cargo.toml b/polkadot/parachain/test-parachains/adder/collator/Cargo.toml index cdc819732ed6b..30bce806f9ffa 100644 --- a/polkadot/parachain/test-parachains/adder/collator/Cargo.toml +++ b/polkadot/parachain/test-parachains/adder/collator/Cargo.toml @@ -24,7 +24,7 @@ log = { workspace = true, default-features = true } test-parachain-adder = { path = ".." } polkadot-primitives = { path = "../../../../primitives" } polkadot-cli = { path = "../../../../cli" } -polkadot-service = { path = "../../../../node/service", features = ["rococo-native", "elastic-scaling-experimental"] } +polkadot-service = { path = "../../../../node/service", features = ["elastic-scaling-experimental", "rococo-native"] } polkadot-node-primitives = { path = "../../../../node/primitives" } polkadot-node-subsystem = { path = "../../../../node/subsystem" } From 732440bb5d94bb612b601c6c84651144a19f28ae Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Mon, 25 Mar 2024 11:31:34 +0200 Subject: [PATCH 21/28] enable elastic scaling in undying collator (also enables in zombienet tests) Signed-off-by: Andrei Sandu --- polkadot/parachain/test-parachains/undying/collator/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polkadot/parachain/test-parachains/undying/collator/Cargo.toml b/polkadot/parachain/test-parachains/undying/collator/Cargo.toml index 238b98a66801c..bede10a7673be 100644 --- a/polkadot/parachain/test-parachains/undying/collator/Cargo.toml +++ b/polkadot/parachain/test-parachains/undying/collator/Cargo.toml @@ -24,7 +24,7 @@ log = { workspace = true, default-features = true } test-parachain-undying = { path = ".." } polkadot-primitives = { path = "../../../../primitives" } polkadot-cli = { path = "../../../../cli" } -polkadot-service = { path = "../../../../node/service", features = ["rococo-native"] } +polkadot-service = { path = "../../../../node/service", features = ["elastic-scaling-experimental", "rococo-native"] } polkadot-node-primitives = { path = "../../../../node/primitives" } polkadot-node-subsystem = { path = "../../../../node/subsystem" } From f80897de7dbf024163b68af5a265e51a6c62c38c Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Mon, 25 Mar 2024 13:45:40 +0200 Subject: [PATCH 22/28] Collation generation test fixes and new tests Signed-off-by: Andrei Sandu --- .../node/collation-generation/src/tests.rs | 259 +++++++++++++++++- 1 file changed, 254 insertions(+), 5 deletions(-) diff --git a/polkadot/node/collation-generation/src/tests.rs b/polkadot/node/collation-generation/src/tests.rs index 8e5f88e22f869..3cb3e61a35a1c 100644 --- a/polkadot/node/collation-generation/src/tests.rs +++ b/polkadot/node/collation-generation/src/tests.rs @@ -30,13 +30,16 @@ use polkadot_node_subsystem::{ use polkadot_node_subsystem_test_helpers::{subsystem_test_harness, TestSubsystemContextHandle}; use polkadot_node_subsystem_util::TimeoutExt; use polkadot_primitives::{ - AsyncBackingParams, CollatorPair, HeadData, Id as ParaId, Id, PersistedValidationData, + async_backing::{BackingState, CandidatePendingAvailability}, + AsyncBackingParams, BlockNumber, CollatorPair, HeadData, PersistedValidationData, ScheduledCore, ValidationCode, }; use rstest::rstest; use sp_keyring::sr25519::Keyring as Sr25519Keyring; use std::pin::Pin; -use test_helpers::{dummy_candidate_descriptor, dummy_hash, dummy_head_data, dummy_validator}; +use test_helpers::{ + dummy_candidate_descriptor, dummy_hash, dummy_head_data, dummy_validator, make_candidate, +}; type VirtualOverseer = TestSubsystemContextHandle; @@ -105,9 +108,9 @@ impl Future for TestCollator { impl Unpin for TestCollator {} -async fn overseer_recv(overseer: &mut VirtualOverseer) -> AllMessages { - const TIMEOUT: std::time::Duration = std::time::Duration::from_millis(2000); +const TIMEOUT: std::time::Duration = std::time::Duration::from_millis(2000); +async fn overseer_recv(overseer: &mut VirtualOverseer) -> AllMessages { overseer .recv() .timeout(TIMEOUT) @@ -135,6 +138,41 @@ fn scheduled_core_for>(para_id: Id) -> ScheduledCore { ScheduledCore { para_id: para_id.into(), collator: None } } +fn dummy_candidate_pending_availability( + para_id: ParaId, + candidate_relay_parent: Hash, + relay_parent_number: BlockNumber, +) -> CandidatePendingAvailability { + let (candidate, _pvd) = make_candidate( + candidate_relay_parent, + relay_parent_number, + para_id, + dummy_head_data(), + HeadData(vec![1]), + ValidationCode(vec![1, 2, 3]).hash(), + ); + let candidate_hash = candidate.hash(); + + CandidatePendingAvailability { + candidate_hash, + descriptor: candidate.descriptor, + commitments: candidate.commitments, + relay_parent_number, + max_pov_size: 5 * 1024 * 1024, + } +} + +fn dummy_backing_state(pending_availability: Vec) -> BackingState { + let constraints = helpers::dummy_constraints( + 0, + vec![0], + dummy_head_data(), + ValidationCodeHash::from(Hash::repeat_byte(42)), + ); + + BackingState { constraints, pending_availability } +} + #[rstest] #[case(RuntimeApiRequest::CLAIM_QUEUE_RUNTIME_REQUIREMENT - 1)] #[case(RuntimeApiRequest::CLAIM_QUEUE_RUNTIME_REQUIREMENT)] @@ -176,6 +214,12 @@ fn requests_availability_per_relay_parent(#[case] runtime_version: u32) { ))) if runtime_version >= RuntimeApiRequest::CLAIM_QUEUE_RUNTIME_REQUIREMENT => { tx.send(Ok(BTreeMap::new())).unwrap(); }, + Some(AllMessages::RuntimeApi(RuntimeApiMessage::Request( + _hash, + RuntimeApiRequest::ParaBackingState(_para_id, tx), + ))) => { + tx.send(Ok(Some(dummy_backing_state(vec![])))).unwrap(); + }, Some(msg) => panic!("didn't expect any other overseer requests given no availability cores; got {:?}", msg), } } @@ -273,6 +317,12 @@ fn requests_validation_data_for_scheduled_matches(#[case] runtime_version: u32) ))) if runtime_version >= RuntimeApiRequest::CLAIM_QUEUE_RUNTIME_REQUIREMENT => { tx.send(Ok(BTreeMap::new())).unwrap(); }, + Some(AllMessages::RuntimeApi(RuntimeApiMessage::Request( + _hash, + RuntimeApiRequest::ParaBackingState(_para_id, tx), + ))) => { + tx.send(Ok(Some(dummy_backing_state(vec![])))).unwrap(); + }, Some(msg) => { panic!("didn't expect any other overseer requests; got {:?}", msg) }, @@ -384,6 +434,12 @@ fn sends_distribute_collation_message(#[case] runtime_version: u32) { ))) if runtime_version >= RuntimeApiRequest::CLAIM_QUEUE_RUNTIME_REQUIREMENT => { tx.send(Ok(BTreeMap::new())).unwrap(); }, + Some(AllMessages::RuntimeApi(RuntimeApiMessage::Request( + _hash, + RuntimeApiRequest::ParaBackingState(_para_id, tx), + ))) => { + tx.send(Ok(Some(dummy_backing_state(vec![])))).unwrap(); + }, Some(msg @ AllMessages::CollatorProtocol(_)) => { inner_to_collator_protocol.lock().await.push(msg); }, @@ -564,6 +620,12 @@ fn fallback_when_no_validation_code_hash_api(#[case] runtime_version: u32) { let res = BTreeMap::>::new(); tx.send(Ok(res)).unwrap(); }, + Some(AllMessages::RuntimeApi(RuntimeApiMessage::Request( + _hash, + RuntimeApiRequest::ParaBackingState(_para_id, tx), + ))) => { + tx.send(Ok(Some(dummy_backing_state(vec![])))).unwrap(); + }, Some(msg) => { panic!("didn't expect any other overseer requests; got {:?}", msg) }, @@ -723,6 +785,9 @@ fn distribute_collation_for_occupied_core_with_async_backing_enabled(#[case] run test_harness(|mut virtual_overseer| async move { helpers::initialize_collator(&mut virtual_overseer, para_id).await; helpers::activate_new_head(&mut virtual_overseer, activated_hash).await; + + let pending_availability = + vec![dummy_candidate_pending_availability(para_id, activated_hash, 1)]; helpers::handle_runtime_calls_on_new_head_activation( &mut virtual_overseer, activated_hash, @@ -730,14 +795,140 @@ fn distribute_collation_for_occupied_core_with_async_backing_enabled(#[case] run cores, runtime_version, claim_queue, + pending_availability, + ) + .await; + helpers::handle_core_processing_for_a_leaf( + &mut virtual_overseer, + activated_hash, + para_id, + // `CoreState` is `Occupied` => `OccupiedCoreAssumption` is `Included` + OccupiedCoreAssumption::Included, + 1, + ) + .await; + + virtual_overseer + }); +} + +// There are variable number of cores of cores in `Occupied` state and async backing is enabled. +// On new head activation `CollationGeneration` should produce and distribute a new collation +// with proper assumption about the para candidate chain availability at next block. +#[rstest] +#[case(0)] +#[case(1)] +#[case(2)] +fn distribute_collation_for_occupied_cores_with_async_backing_enabled_and_elastic_scaling( + #[case] candidates_pending_avail: u32, +) { + let activated_hash: Hash = [1; 32].into(); + let para_id = ParaId::from(5); + + let cores = (0..candidates_pending_avail) + .into_iter() + .map(|idx| { + CoreState::Occupied(polkadot_primitives::OccupiedCore { + next_up_on_available: Some(ScheduledCore { para_id, collator: None }), + occupied_since: 0, + time_out_at: 10, + next_up_on_time_out: Some(ScheduledCore { para_id, collator: None }), + availability: Default::default(), // doesn't matter + group_responsible: polkadot_primitives::GroupIndex(idx as u32), + candidate_hash: Default::default(), + candidate_descriptor: dummy_candidate_descriptor(dummy_hash()), + }) + }) + .collect::>(); + + let pending_availability = (0..candidates_pending_avail) + .into_iter() + .map(|_idx| dummy_candidate_pending_availability(para_id, activated_hash, 0)) + .collect::>(); + + let claim_queue = cores + .iter() + .enumerate() + .map(|(idx, _core)| (CoreIndex::from(idx as u32), VecDeque::from([para_id]))) + .collect::>(); + let total_cores = cores.len(); + + test_harness(|mut virtual_overseer| async move { + helpers::initialize_collator(&mut virtual_overseer, para_id).await; + helpers::activate_new_head(&mut virtual_overseer, activated_hash).await; + helpers::handle_runtime_calls_on_new_head_activation( + &mut virtual_overseer, + activated_hash, + AsyncBackingParams { max_candidate_depth: 1, allowed_ancestry_len: 1 }, + cores, + // Using latest runtime with the fancy claim queue exposed. + RuntimeApiRequest::CLAIM_QUEUE_RUNTIME_REQUIREMENT, + claim_queue, + pending_availability, ) .await; + helpers::handle_core_processing_for_a_leaf( &mut virtual_overseer, activated_hash, para_id, // `CoreState` is `Occupied` => `OccupiedCoreAssumption` is `Included` OccupiedCoreAssumption::Included, + total_cores, + ) + .await; + + virtual_overseer + }); +} + +// There are variable number of cores of cores in `Free` state and async backing is enabled. +// On new head activation `CollationGeneration` should produce and distribute a new collation +// with proper assumption about the para candidate chain availability at next block. +#[rstest] +#[case(0)] +#[case(1)] +#[case(2)] +fn distribute_collation_for_free_cores_with_async_backing_enabled_and_elastic_scaling( + #[case] candidates_pending_avail: u32, +) { + let activated_hash: Hash = [1; 32].into(); + let para_id = ParaId::from(5); + + let cores = (0..candidates_pending_avail) + .into_iter() + .map(|_idx| CoreState::Scheduled(ScheduledCore { para_id, collator: None })) + .collect::>(); + + let claim_queue = cores + .iter() + .enumerate() + .map(|(idx, _core)| (CoreIndex::from(idx as u32), VecDeque::from([para_id]))) + .collect::>(); + let total_cores = cores.len(); + + test_harness(|mut virtual_overseer| async move { + helpers::initialize_collator(&mut virtual_overseer, para_id).await; + helpers::activate_new_head(&mut virtual_overseer, activated_hash).await; + helpers::handle_runtime_calls_on_new_head_activation( + &mut virtual_overseer, + activated_hash, + AsyncBackingParams { max_candidate_depth: 1, allowed_ancestry_len: 1 }, + cores, + // Using latest runtime with the fancy claim queue exposed. + RuntimeApiRequest::CLAIM_QUEUE_RUNTIME_REQUIREMENT, + claim_queue, + vec![], + ) + .await; + + helpers::handle_core_processing_for_a_leaf( + &mut virtual_overseer, + activated_hash, + para_id, + // `CoreState` is `Free` => `OccupiedCoreAssumption` is `Free` + OccupiedCoreAssumption::Free, + total_cores, ) .await; @@ -779,6 +970,7 @@ fn no_collation_is_distributed_for_occupied_core_with_async_backing_disabled( cores, runtime_version, claim_queue, + vec![], ) .await; @@ -787,8 +979,38 @@ fn no_collation_is_distributed_for_occupied_core_with_async_backing_disabled( } mod helpers { + use polkadot_primitives::{ + async_backing::{Constraints, InboundHrmpLimitations}, + BlockNumber, + }; + use super::*; + // A set for dummy constraints for `ParaBackingState`` + pub(crate) fn dummy_constraints( + min_relay_parent_number: BlockNumber, + valid_watermarks: Vec, + required_parent: HeadData, + validation_code_hash: ValidationCodeHash, + ) -> Constraints { + Constraints { + min_relay_parent_number, + max_pov_size: 5 * 1024 * 1024, + max_code_size: 1_000_000, + ump_remaining: 10, + ump_remaining_bytes: 1_000, + max_ump_num_per_candidate: 10, + dmp_remaining_messages: vec![], + hrmp_inbound: InboundHrmpLimitations { valid_watermarks }, + hrmp_channels_out: vec![], + max_hrmp_num_per_candidate: 0, + required_parent, + validation_code_hash, + upgrade_restriction: None, + future_validation_code: None, + } + } + // Sends `Initialize` with a collator config pub async fn initialize_collator(virtual_overseer: &mut VirtualOverseer, para_id: ParaId) { virtual_overseer @@ -824,7 +1046,8 @@ mod helpers { async_backing_params: AsyncBackingParams, cores: Vec, runtime_version: u32, - claim_queue: BTreeMap>, + claim_queue: BTreeMap>, + pending_availability: Vec, ) { assert_matches!( overseer_recv(virtual_overseer).await, @@ -859,6 +1082,25 @@ mod helpers { } ); + // Process the `ParaBackingState` message, and return some dummy state. + let message = overseer_recv(virtual_overseer).await; + let para_id = match message { + AllMessages::RuntimeApi(RuntimeApiMessage::Request( + _, + RuntimeApiRequest::ParaBackingState(p_id, _), + )) => p_id, + _ => panic!("received unexpected message {:?}", message), + }; + + assert_matches!( + message, + AllMessages::RuntimeApi( + RuntimeApiMessage::Request(parent, RuntimeApiRequest::ParaBackingState(p_id, tx)) + ) if parent == activated_hash && p_id == para_id => { + tx.send(Ok(Some(dummy_backing_state(pending_availability)))).unwrap(); + } + ); + assert_matches!( overseer_recv(virtual_overseer).await, AllMessages::RuntimeApi(RuntimeApiMessage::Request( @@ -891,7 +1133,14 @@ mod helpers { activated_hash: Hash, para_id: ParaId, expected_occupied_core_assumption: OccupiedCoreAssumption, + cores_assigned: usize, ) { + // Expect no messages if no cores is assigned to the para + if cores_assigned == 0 { + assert!(overseer_recv(virtual_overseer).timeout(TIMEOUT / 2).await.is_none()); + return + } + // Some hardcoded data - if needed, extract to parameters let validation_code_hash = ValidationCodeHash::from(Hash::repeat_byte(42)); let parent_head = HeadData::from(vec![1, 2, 3]); From 83af4e1528299a2d023a6af47ae43c554fc69856 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Mon, 25 Mar 2024 14:54:34 +0200 Subject: [PATCH 23/28] bring test-runtime up to date Signed-off-by: Andrei Sandu --- polkadot/runtime/test-runtime/src/lib.rs | 34 +++++++++++++++++++++--- 1 file changed, 30 insertions(+), 4 deletions(-) diff --git a/polkadot/runtime/test-runtime/src/lib.rs b/polkadot/runtime/test-runtime/src/lib.rs index 8a3cd9309dbd8..62c3741c56d6e 100644 --- a/polkadot/runtime/test-runtime/src/lib.rs +++ b/polkadot/runtime/test-runtime/src/lib.rs @@ -27,10 +27,11 @@ use sp_std::{collections::btree_map::BTreeMap, prelude::*}; use polkadot_runtime_parachains::{ assigner_parachains as parachains_assigner_parachains, configuration as parachains_configuration, disputes as parachains_disputes, - disputes::slashing as parachains_slashing, dmp as parachains_dmp, hrmp as parachains_hrmp, - inclusion as parachains_inclusion, initializer as parachains_initializer, - origin as parachains_origin, paras as parachains_paras, - paras_inherent as parachains_paras_inherent, runtime_api_impl::v7 as runtime_impl, + disputes::slashing as parachains_slashing, + dmp as parachains_dmp, hrmp as parachains_hrmp, inclusion as parachains_inclusion, + initializer as parachains_initializer, origin as parachains_origin, paras as parachains_paras, + paras_inherent as parachains_paras_inherent, + runtime_api_impl::{v7 as runtime_impl, vstaging as staging_runtime_impl}, scheduler as parachains_scheduler, session_info as parachains_session_info, shared as parachains_shared, }; @@ -829,6 +830,7 @@ sp_api::impl_runtime_apis! { } } + #[api_version(10)] impl primitives::runtime_api::ParachainHost for Runtime { fn validators() -> Vec { runtime_impl::validators::() @@ -956,6 +958,30 @@ sp_api::impl_runtime_apis! { key_ownership_proof, ) } + + fn minimum_backing_votes() -> u32 { + runtime_impl::minimum_backing_votes::() + } + + fn para_backing_state(para_id: ParaId) -> Option { + runtime_impl::backing_state::(para_id) + } + + fn async_backing_params() -> primitives::AsyncBackingParams { + runtime_impl::async_backing_params::() + } + + fn approval_voting_params() -> primitives::vstaging::ApprovalVotingParams { + staging_runtime_impl::approval_voting_params::() + } + + fn disabled_validators() -> Vec { + staging_runtime_impl::disabled_validators::() + } + + fn node_features() -> primitives::vstaging::NodeFeatures { + staging_runtime_impl::node_features::() + } } impl beefy_primitives::BeefyApi for Runtime { From 9721b69c3d9dfbdd979af0993d08770aaae52019 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Mon, 25 Mar 2024 16:29:12 +0200 Subject: [PATCH 24/28] review feedback Signed-off-by: Andrei Sandu --- .../consensus/aura/src/collators/lookahead.rs | 35 +++++++++++-------- polkadot/node/collation-generation/src/lib.rs | 18 +++++----- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/cumulus/client/consensus/aura/src/collators/lookahead.rs b/cumulus/client/consensus/aura/src/collators/lookahead.rs index 17e4a5fb2013d..580058336174d 100644 --- a/cumulus/client/consensus/aura/src/collators/lookahead.rs +++ b/cumulus/client/consensus/aura/src/collators/lookahead.rs @@ -184,14 +184,14 @@ where while let Some(relay_parent_header) = import_notifications.next().await { let relay_parent = relay_parent_header.hash(); - let core_index = if let Some(core_index) = fist_core_scheduled_for_para( - relay_parent, - params.para_id, - &mut params.overseer_handle, - ) - .await + // TODO: Currently we use just the first core here, but for elastic scaling + // we iterate and build on all of the cores returned. + let core_index = if let Some(core_index) = + cores_scheduled_for_para(relay_parent, params.para_id, &mut params.overseer_handle) + .await + .get(0) { - core_index + *core_index } else { tracing::trace!( target: crate::LOG_TARGET, @@ -489,12 +489,12 @@ async fn max_ancestry_lookback( } } -// Checks the first `CoreIndex` assigned to the para at the provided relay parent. -async fn fist_core_scheduled_for_para( +// Return all the cores assigned to the para at the provided relay parent. +async fn cores_scheduled_for_para( relay_parent: PHash, para_id: ParaId, overseer_handle: &mut OverseerHandle, -) -> Option { +) -> Vec { let (tx, rx) = oneshot::channel(); let request = RuntimeApiRequest::AvailabilityCores(tx); overseer_handle @@ -510,7 +510,7 @@ async fn fist_core_scheduled_for_para( ?relay_parent, "Failed to query availability cores runtime API", ); - return None + return Vec::new() }, Err(oneshot::Canceled) => { tracing::error!( @@ -518,12 +518,19 @@ async fn fist_core_scheduled_for_para( ?relay_parent, "Sender for availability cores runtime request dropped", ); - return None + return Vec::new() }, }; cores .iter() - .position(|core| core.para_id() == Some(para_id)) - .map(|index| CoreIndex(index as _)) + .enumerate() + .filter_map(|(index, core)| { + if core.para_id() == Some(para_id) { + Some(CoreIndex(index as u32)) + } else { + None + } + }) + .collect() } diff --git a/polkadot/node/collation-generation/src/lib.rs b/polkadot/node/collation-generation/src/lib.rs index 2e84aa876974c..8cf2809db8fea 100644 --- a/polkadot/node/collation-generation/src/lib.rs +++ b/polkadot/node/collation-generation/src/lib.rs @@ -219,19 +219,15 @@ async fn handle_new_activations( for relay_parent in activated { let _relay_parent_timer = metrics.time_new_activations_relay_parent(); - let (availability_cores, validators, async_backing_params, para_backing_state) = join!( + let (availability_cores, validators, async_backing_params) = join!( request_availability_cores(relay_parent, ctx.sender()).await, request_validators(relay_parent, ctx.sender()).await, request_async_backing_params(relay_parent, ctx.sender()).await, - request_para_backing_state(relay_parent, config.para_id, ctx.sender()).await, ); let availability_cores = availability_cores??; let async_backing_params = async_backing_params?.ok(); let n_validators = validators??.len(); - let para_backing_state = - para_backing_state??.ok_or(crate::error::Error::MissingParaBackingState)?; - let maybe_claim_queue = fetch_claim_queue(ctx.sender(), relay_parent).await?; // The loop bellow will fill in cores that the para is allowed to build on. @@ -296,11 +292,10 @@ async fn handle_new_activations( their_para = %scheduled_core.para_id, "core is not assigned to our para. Keep going.", ); - continue + } else { + // Accumulate cores for building collation(s) outside the loop. + cores_to_build_on.push(CoreIndex(core_idx as u32)); } - - // Accumulate cores for building collation(s) outside the loop. - cores_to_build_on.push(CoreIndex(core_idx as u32)); } // Skip to next relay parent if there is no core assigned to us. @@ -308,6 +303,11 @@ async fn handle_new_activations( continue } + let para_backing_state = + request_para_backing_state(relay_parent, config.para_id, ctx.sender()) + .await?? + .ok_or(crate::error::Error::MissingParaBackingState)?; + // We are being very optimistic here, but one of the cores could pend availability some more // block, ore even time out. // For timeout assumption the collator can't really know because it doesn't receive bitfield From 4eb804edd0871c8fb50b0398cf94c40e45398ad0 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Mon, 25 Mar 2024 16:43:34 +0200 Subject: [PATCH 25/28] await await Signed-off-by: Andrei Sandu --- polkadot/node/collation-generation/src/lib.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polkadot/node/collation-generation/src/lib.rs b/polkadot/node/collation-generation/src/lib.rs index 8cf2809db8fea..f86e168a6ff4b 100644 --- a/polkadot/node/collation-generation/src/lib.rs +++ b/polkadot/node/collation-generation/src/lib.rs @@ -305,7 +305,7 @@ async fn handle_new_activations( let para_backing_state = request_para_backing_state(relay_parent, config.para_id, ctx.sender()) - .await?? + .await.await?? .ok_or(crate::error::Error::MissingParaBackingState)?; // We are being very optimistic here, but one of the cores could pend availability some more From 8b70bc2a21d3114f0506be2f637a56c1c4c08d25 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Wed, 27 Mar 2024 13:31:38 +0200 Subject: [PATCH 26/28] review feedback Signed-off-by: Andrei Sandu --- polkadot/node/collation-generation/src/lib.rs | 3 ++- .../src/collator_side/mod.rs | 25 +++++++++++-------- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/polkadot/node/collation-generation/src/lib.rs b/polkadot/node/collation-generation/src/lib.rs index f86e168a6ff4b..3164f6078bc07 100644 --- a/polkadot/node/collation-generation/src/lib.rs +++ b/polkadot/node/collation-generation/src/lib.rs @@ -305,7 +305,8 @@ async fn handle_new_activations( let para_backing_state = request_para_backing_state(relay_parent, config.para_id, ctx.sender()) - .await.await?? + .await + .await?? .ok_or(crate::error::Error::MissingParaBackingState)?; // We are being very optimistic here, but one of the cores could pend availability some more diff --git a/polkadot/node/network/collator-protocol/src/collator_side/mod.rs b/polkadot/node/network/collator-protocol/src/collator_side/mod.rs index a25c53eda7242..a600f402c7262 100644 --- a/polkadot/node/network/collator-protocol/src/collator_side/mod.rs +++ b/polkadot/node/network/collator-protocol/src/collator_side/mod.rs @@ -748,13 +748,20 @@ async fn advertise_collation( } } - let should_advertise = per_relay_parent - .validator_group - .get(&core_index) - .map_or(ShouldAdvertiseTo::NotAuthority, |group| { - group.should_advertise_to(candidate_hash, peer_ids, &peer) - }); + let validator_group = + if let Some(validator_group) = per_relay_parent.validator_group.get_mut(&core_index) { + validator_group + } else { + gum::debug!( + target: LOG_TARGET, + ?relay_parent, + ?core_index, + "Skipping advertising to validator, validator group for core not found", + ); + return + }; + let should_advertise = validator_group.should_advertise_to(candidate_hash, peer_ids, &peer); match should_advertise { ShouldAdvertiseTo::Yes => {}, ShouldAdvertiseTo::NotAuthority | ShouldAdvertiseTo::AlreadyAdvertised => { @@ -802,11 +809,7 @@ async fn advertise_collation( )) .await; - per_relay_parent - .validator_group - .get_mut(&core_index) - .expect("should_advertise returned `ShouldAdvertiseTo::Yes` above; qed") - .advertised_to_peer(candidate_hash, &peer_ids, peer); + validator_group.advertised_to_peer(candidate_hash, &peer_ids, peer); advertisement_timeouts.push(ResetInterestTimeout::new( *candidate_hash, From 16607dd4ffb3a8b332909d73211857ac066d6d75 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Wed, 27 Mar 2024 15:07:31 +0200 Subject: [PATCH 27/28] prdoc Signed-off-by: Andrei Sandu --- prdoc/pr_3795.prdoc | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 prdoc/pr_3795.prdoc diff --git a/prdoc/pr_3795.prdoc b/prdoc/pr_3795.prdoc new file mode 100644 index 0000000000000..da01fcbec821c --- /dev/null +++ b/prdoc/pr_3795.prdoc @@ -0,0 +1,14 @@ +title: Enable collators to build on multiple cores + +doc: + - audience: Node Dev + description: | + Introduces a `CoreIndex` parameter in `SubmitCollationParams`. This enables + the collators to make use of potentially multiple cores assigned at some relay + chain block. This extra parameter is used by the collator protocol and collation + generation subsystems to forward the collation to the approapriate backing group. + +crates: +- name: polkadot-node-collation-generation +- name: polkadot-collator-protocol + bump: minor From 69a63cc878dd02053afda19f804bb6455a4a2333 Mon Sep 17 00:00:00 2001 From: Andrei Sandu Date: Wed, 27 Mar 2024 16:21:02 +0200 Subject: [PATCH 28/28] nice new syntax Signed-off-by: Andrei Sandu --- .../src/collator_side/mod.rs | 21 ++++++++----------- 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/polkadot/node/network/collator-protocol/src/collator_side/mod.rs b/polkadot/node/network/collator-protocol/src/collator_side/mod.rs index a600f402c7262..e6aa55235b7a8 100644 --- a/polkadot/node/network/collator-protocol/src/collator_side/mod.rs +++ b/polkadot/node/network/collator-protocol/src/collator_side/mod.rs @@ -748,18 +748,15 @@ async fn advertise_collation( } } - let validator_group = - if let Some(validator_group) = per_relay_parent.validator_group.get_mut(&core_index) { - validator_group - } else { - gum::debug!( - target: LOG_TARGET, - ?relay_parent, - ?core_index, - "Skipping advertising to validator, validator group for core not found", - ); - return - }; + let Some(validator_group) = per_relay_parent.validator_group.get_mut(&core_index) else { + gum::debug!( + target: LOG_TARGET, + ?relay_parent, + ?core_index, + "Skipping advertising to validator, validator group for core not found", + ); + return + }; let should_advertise = validator_group.should_advertise_to(candidate_hash, peer_ids, &peer); match should_advertise {