From 79375e7c9eb1a6f880d849fc3b394b59c157f2ba Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe <49718502+alexggh@users.noreply.github.com> Date: Wed, 11 Dec 2024 10:21:05 +0200 Subject: [PATCH 1/2] Make approval-distribution aggression a bit more robust and less spammy (#6696) After finality started lagging on kusama around `2025-11-25 15:55:40` nodes started being overloaded with messages and some restarted with ``` Subsystem approval-distribution-subsystem appears unresponsive when sending a message of type polkadot_node_subsystem_types::messages::ApprovalDistributionMessage. origin=polkadot_service::relay_chain_selection::SelectRelayChainInner, sp_runtime::OpaqueExtrinsic>>, polkadot_overseer::Handle> ``` I think this happened because our aggression in the current form is way too spammy and create problems in situation where we already constructed blocks with a load of candidates to check which what happened around `#25933682` before and after. However aggression, does help in the nightmare scenario where the network is segmented and sparsely connected, so I tend to think we shouldn't completely remove it. The current configuration is: ``` l1_threshold: Some(16), l2_threshold: Some(28), resend_unfinalized_period: Some(8), ``` The way aggression works right now : 1. After L1 is triggered all nodes send all messages they created to all the other nodes and all messages they would have they already send according to the topology. 2. Because of resend_unfinalized_period for each block all messages at step 1) are sent every 8 blocks, so for example let's say we have blocks 1 to 24 unfinalized, then at block 25, all messages for block 1, 9 will be resent, and consequently at block 26, all messages for block 2, 10 will be resent, this becomes worse as more blocks are created if backing backpressure did not kick in yet. In total this logic makes that each node receive 3 * total_number_of messages_per_block 3. L2 aggression is way too spammy, when L2 aggression is enabled all nodes sends all messages of a block on GridXY, that means that all messages are received and sent by node at least 2*sqrt(num_validators), so on kusama would be 66 * NUM_MESSAGES_AT_FIRST_UNFINALIZED_BLOCK, so even with a reasonable number of messages like 10K, which you can have if you escalated because of no shows, you end-up sending and receiving ~660k messages at once, I think that's what makes the approval-distribution to appear unresponsive on some nodes. 4. Duplicate messages are received by the nodes which turn, mark the node as banned, which may create more no-shows. ## Proposed improvements: 1. Make L2 trigger way later 28 blocks, instead of 64, this should literally the last resort, until then we should try to let the approval-voting escalation mechanism to do its things and cover the no-shows. 2. On L1 aggression don't send messages for blocks too far from the first_unfinalized there is no point in sending the messages for block 20, if block 1 is still unfinalized. 3. On L1 aggression, send messages then back-off for 3 * resend_unfinalized_period to give time for everyone to clear up their queues. 4. If aggression is enabled accept duplicate messages from validators and don't punish them by reducting their reputation which, which may create no-shows. --------- Signed-off-by: Alexandru Gheorghe Co-authored-by: Andrei Sandu <54316454+sandreim@users.noreply.github.com> (cherry picked from commit 85dd228d9a7d3d736e0dab7c0b084e3fc2c1b003) --- .../network/approval-distribution/src/lib.rs | 143 ++++++++++++++---- .../approval-distribution/src/tests.rs | 137 ++++++++++++++++- prdoc/pr_6696.prdoc | 15 ++ 3 files changed, 261 insertions(+), 34 deletions(-) create mode 100644 prdoc/pr_6696.prdoc diff --git a/polkadot/node/network/approval-distribution/src/lib.rs b/polkadot/node/network/approval-distribution/src/lib.rs index 2fd5be568f9d..83ea9977b460 100644 --- a/polkadot/node/network/approval-distribution/src/lib.rs +++ b/polkadot/node/network/approval-distribution/src/lib.rs @@ -316,7 +316,7 @@ impl Default for AggressionConfig { fn default() -> Self { AggressionConfig { l1_threshold: Some(16), - l2_threshold: Some(28), + l2_threshold: Some(64), resend_unfinalized_period: Some(8), } } @@ -515,6 +515,8 @@ struct BlockEntry { vrf_story: RelayVRFStory, /// The block slot. slot: Slot, + /// Backing off from re-sending messages to peers. + last_resent_at_block_number: Option, } impl BlockEntry { @@ -890,6 +892,7 @@ impl State { candidates_metadata: meta.candidates, vrf_story: meta.vrf_story, slot: meta.slot, + last_resent_at_block_number: None, }); self.topologies.inc_session_refs(meta.session); @@ -1330,6 +1333,33 @@ impl State { self.enable_aggression(network_sender, Resend::No, metrics).await; } + // When finality is lagging as a last resort nodes start sending the messages they have + // multiples times. This means it is safe to accept duplicate messages without punishing the + // peer and reduce the reputation and can end up banning the Peer, which in turn will create + // more no-shows. + fn accept_duplicates_from_validators( + blocks_by_number: &BTreeMap>, + topologies: &SessionGridTopologies, + aggression_config: &AggressionConfig, + entry: &BlockEntry, + peer: PeerId, + ) -> bool { + let topology = topologies.get_topology(entry.session); + let min_age = blocks_by_number.iter().next().map(|(num, _)| num); + let max_age = blocks_by_number.iter().rev().next().map(|(num, _)| num); + + // Return if we don't have at least 1 block. + let (min_age, max_age) = match (min_age, max_age) { + (Some(min), Some(max)) => (*min, *max), + _ => return false, + }; + + let age = max_age.saturating_sub(min_age); + + aggression_config.should_trigger_aggression(age) && + topology.map(|topology| topology.is_validator(&peer)).unwrap_or(false) + } + async fn import_and_circulate_assignment( &mut self, approval_voting_sender: &mut A, @@ -1409,20 +1439,29 @@ impl State { if peer_knowledge.contains(&message_subject, message_kind) { // wasn't included before if !peer_knowledge.received.insert(message_subject.clone(), message_kind) { - gum::debug!( - target: LOG_TARGET, - ?peer_id, - ?message_subject, - "Duplicate assignment", - ); - - modify_reputation( - &mut self.reputation, - network_sender, + if !Self::accept_duplicates_from_validators( + &self.blocks_by_number, + &self.topologies, + &self.aggression_config, + entry, peer_id, - COST_DUPLICATE_MESSAGE, - ) - .await; + ) { + gum::debug!( + target: LOG_TARGET, + ?peer_id, + ?message_subject, + "Duplicate assignment", + ); + + modify_reputation( + &mut self.reputation, + network_sender, + peer_id, + COST_DUPLICATE_MESSAGE, + ) + .await; + } + metrics.on_assignment_duplicate(); } else { gum::trace!( @@ -1738,6 +1777,9 @@ impl State { assignments_knowledge_key: &Vec<(MessageSubject, MessageKind)>, approval_knowledge_key: &(MessageSubject, MessageKind), entry: &mut BlockEntry, + blocks_by_number: &BTreeMap>, + topologies: &SessionGridTopologies, + aggression_config: &AggressionConfig, reputation: &mut ReputationAggregator, peer_id: PeerId, metrics: &Metrics, @@ -1766,20 +1808,27 @@ impl State { .received .insert(approval_knowledge_key.0.clone(), approval_knowledge_key.1) { - gum::trace!( - target: LOG_TARGET, - ?peer_id, - ?approval_knowledge_key, - "Duplicate approval", - ); - - modify_reputation( - reputation, - network_sender, + if !Self::accept_duplicates_from_validators( + blocks_by_number, + topologies, + aggression_config, + entry, peer_id, - COST_DUPLICATE_MESSAGE, - ) - .await; + ) { + gum::trace!( + target: LOG_TARGET, + ?peer_id, + ?approval_knowledge_key, + "Duplicate approval", + ); + modify_reputation( + reputation, + network_sender, + peer_id, + COST_DUPLICATE_MESSAGE, + ) + .await; + } metrics.on_approval_duplicate(); } return false @@ -1886,6 +1935,9 @@ impl State { &assignments_knowledge_keys, &approval_knwowledge_key, entry, + &self.blocks_by_number, + &self.topologies, + &self.aggression_config, &mut self.reputation, peer_id, metrics, @@ -2304,18 +2356,43 @@ impl State { &self.topologies, |block_entry| { let block_age = max_age - block_entry.number; + // We want to resend only for blocks of min_age, there is no point in + // resending for blocks newer than that, because we are just going to create load + // and not gain anything. + let diff_from_min_age = block_entry.number - min_age; + + // We want to back-off on resending for blocks that have been resent recently, to + // give time for nodes to process all the extra messages, if we still have not + // finalized we are going to resend again after unfinalized_period * 2 since the + // last resend. + let blocks_since_last_sent = block_entry + .last_resent_at_block_number + .map(|last_resent_at_block_number| max_age - last_resent_at_block_number); + + let can_resend_at_this_age = blocks_since_last_sent + .zip(config.resend_unfinalized_period) + .map(|(blocks_since_last_sent, unfinalized_period)| { + blocks_since_last_sent >= unfinalized_period * 2 + }) + .unwrap_or(true); if resend == Resend::Yes && - config - .resend_unfinalized_period - .as_ref() - .map_or(false, |p| block_age > 0 && block_age % p == 0) - { + config.resend_unfinalized_period.as_ref().map_or(false, |p| { + block_age > 0 && + block_age % p == 0 && diff_from_min_age == 0 && + can_resend_at_this_age + }) { // Retry sending to all peers. for (_, knowledge) in block_entry.known_by.iter_mut() { knowledge.sent = Knowledge::default(); } - + block_entry.last_resent_at_block_number = Some(max_age); + gum::debug!( + target: LOG_TARGET, + block_number = ?block_entry.number, + ?max_age, + "Aggression enabled with resend for block", + ); true } else { false diff --git a/polkadot/node/network/approval-distribution/src/tests.rs b/polkadot/node/network/approval-distribution/src/tests.rs index 4ee9320e0e45..c6ef4229a7c4 100644 --- a/polkadot/node/network/approval-distribution/src/tests.rs +++ b/polkadot/node/network/approval-distribution/src/tests.rs @@ -1030,6 +1030,141 @@ fn peer_sending_us_the_same_we_just_sent_them_is_ok() { ); } +#[test] +fn peer_sending_us_duplicates_while_aggression_enabled_is_ok() { + let parent_hash = Hash::repeat_byte(0xFF); + let hash = Hash::repeat_byte(0xAA); + + let peers = make_peers_and_authority_ids(8); + let peer_a = peers.first().unwrap().0; + + let _ = test_harness( + Arc::new(MockAssignmentCriteria { tranche: Ok(0) }), + Arc::new(SystemClock {}), + state_without_reputation_delay(), + |mut virtual_overseer| async move { + let overseer = &mut virtual_overseer; + let peer = &peer_a; + setup_peer_with_view(overseer, peer, view![], ValidationVersion::V3).await; + + let peers_with_optional_peer_id = peers + .iter() + .map(|(peer_id, authority)| (Some(*peer_id), authority.clone())) + .collect_vec(); + // Setup a topology where peer_a is neighbor to current node. + setup_gossip_topology( + overseer, + make_gossip_topology(1, &peers_with_optional_peer_id, &[0], &[2], 1), + ) + .await; + + // new block `hash` with 1 candidates + let meta = BlockApprovalMeta { + hash, + parent_hash, + number: 1, + candidates: vec![Default::default(); 1], + slot: 1.into(), + session: 1, + vrf_story: RelayVRFStory(Default::default()), + }; + let msg = ApprovalDistributionMessage::NewBlocks(vec![meta]); + overseer_send(overseer, msg).await; + + // import an assignment related to `hash` locally + let validator_index = ValidatorIndex(0); + let candidate_indices: CandidateBitfield = + vec![0 as CandidateIndex].try_into().unwrap(); + let candidate_bitfields = vec![CoreIndex(0)].try_into().unwrap(); + let cert = fake_assignment_cert_v2(hash, validator_index, candidate_bitfields); + overseer_send( + overseer, + ApprovalDistributionMessage::DistributeAssignment( + cert.clone().into(), + candidate_indices.clone(), + ), + ) + .await; + + // update peer view to include the hash + overseer_send( + overseer, + ApprovalDistributionMessage::NetworkBridgeUpdate( + NetworkBridgeEvent::PeerViewChange(*peer, view![hash]), + ), + ) + .await; + + // we should send them the assignment + assert_matches!( + overseer_recv(overseer).await, + AllMessages::NetworkBridgeTx(NetworkBridgeTxMessage::SendValidationMessage( + peers, + Versioned::V3(protocol_v3::ValidationProtocol::ApprovalDistribution( + protocol_v3::ApprovalDistributionMessage::Assignments(assignments) + )) + )) => { + assert_eq!(peers.len(), 1); + assert_eq!(assignments.len(), 1); + } + ); + + // but if someone else is sending it the same assignment + // the peer could send us it as well + let assignments = vec![(cert, candidate_indices)]; + let msg = protocol_v3::ApprovalDistributionMessage::Assignments(assignments); + send_message_from_peer_v3(overseer, peer, msg.clone()).await; + + assert!( + overseer.recv().timeout(TIMEOUT).await.is_none(), + "we should not punish the peer" + ); + + // send the assignments again + send_message_from_peer_v3(overseer, peer, msg.clone()).await; + + // now we should + expect_reputation_change(overseer, peer, COST_DUPLICATE_MESSAGE).await; + + // Peers will be continously punished for sending duplicates until approval-distribution + // aggression kicks, at which point they aren't anymore. + let mut parent_hash = hash; + for level in 0..16 { + // As long as the lag is bellow l1 aggression, punish peers for duplicates. + send_message_from_peer_v3(overseer, peer, msg.clone()).await; + expect_reputation_change(overseer, peer, COST_DUPLICATE_MESSAGE).await; + + let number = 1 + level + 1; // first block had number 1 + let hash = BlakeTwo256::hash_of(&(parent_hash, number)); + let meta = BlockApprovalMeta { + hash, + parent_hash, + number, + candidates: vec![], + slot: (level as u64).into(), + session: 1, + vrf_story: RelayVRFStory(Default::default()), + }; + + let msg = ApprovalDistributionMessage::ApprovalCheckingLagUpdate(level + 1); + overseer_send(overseer, msg).await; + + let msg = ApprovalDistributionMessage::NewBlocks(vec![meta]); + overseer_send(overseer, msg).await; + + parent_hash = hash; + } + + // send the assignments again, we should not punish the peer because aggression is + // enabled. + send_message_from_peer_v3(overseer, peer, msg).await; + + assert!(overseer.recv().timeout(TIMEOUT).await.is_none(), "no message should be sent"); + virtual_overseer + }, + ); +} + #[test] fn import_approval_happy_path_v1_v2_peers() { let peers = make_peers_and_authority_ids(15); @@ -3892,7 +4027,7 @@ fn resends_messages_periodically() { // Add blocks until resend is done. { let mut parent_hash = hash; - for level in 0..2 { + for level in 0..4 { number = number + 1; let hash = BlakeTwo256::hash_of(&(parent_hash, number)); let meta = BlockApprovalMeta { diff --git a/prdoc/pr_6696.prdoc b/prdoc/pr_6696.prdoc new file mode 100644 index 000000000000..c5c73f831886 --- /dev/null +++ b/prdoc/pr_6696.prdoc @@ -0,0 +1,15 @@ +# Schema: Polkadot SDK PRDoc Schema (prdoc) v1.0.0 +# See doc at https://raw.githubusercontent.com/paritytech/polkadot-sdk/master/prdoc/schema_user.json + +title: Make approval-distribution aggression a bit more robust and less spammy + +doc: + - audience: Node Dev + description: | + The problem with the current implementation of approval-distribution aggression is that is too spammy, + and can overload the nodes, so make it less spammy by moving back the moment we trigger L2 aggression + and make resend enable only for the latest unfinalized block. + +crates: + - name: polkadot-approval-distribution + bump: minor From 2d4834b165df793b98e65a5935901dced72699e7 Mon Sep 17 00:00:00 2001 From: Alexandru Gheorghe Date: Wed, 11 Dec 2024 11:23:56 +0200 Subject: [PATCH 2/2] Fixup compilation for backported unittest Signed-off-by: Alexandru Gheorghe --- polkadot/node/network/approval-distribution/src/tests.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/polkadot/node/network/approval-distribution/src/tests.rs b/polkadot/node/network/approval-distribution/src/tests.rs index c6ef4229a7c4..68db04a00cfc 100644 --- a/polkadot/node/network/approval-distribution/src/tests.rs +++ b/polkadot/node/network/approval-distribution/src/tests.rs @@ -1040,7 +1040,7 @@ fn peer_sending_us_duplicates_while_aggression_enabled_is_ok() { let _ = test_harness( Arc::new(MockAssignmentCriteria { tranche: Ok(0) }), - Arc::new(SystemClock {}), + Box::new(SystemClock {}), state_without_reputation_delay(), |mut virtual_overseer| async move { let overseer = &mut virtual_overseer;