Skip to content

Commit

Permalink
Strict match of errors in range sync
Browse files Browse the repository at this point in the history
  • Loading branch information
dapplion committed Oct 21, 2024
1 parent 6eaa370 commit 32fab3a
Show file tree
Hide file tree
Showing 2 changed files with 140 additions and 123 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ pub enum Error {
blob_commitment: KzgCommitment,
block_commitment: KzgCommitment,
},
UnableToDetermineImportRequirement,
Unexpected,
SszTypes(ssz_types::Error),
MissingBlobs,
Expand Down Expand Up @@ -44,7 +43,6 @@ impl Error {
| Error::Unexpected
| Error::ParentStateMissing(_)
| Error::BlockReplayError(_)
| Error::UnableToDetermineImportRequirement
| Error::RebuildingStateCaches(_)
| Error::SlotClockError => ErrorCategory::Internal,
Error::InvalidBlobs { .. }
Expand Down
261 changes: 140 additions & 121 deletions beacon_node/network/src/network_beacon_processor/sync_methods.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use crate::metrics;
use crate::network_beacon_processor::{NetworkBeaconProcessor, FUTURE_SLOT_TOLERANCE};
use crate::network_beacon_processor::NetworkBeaconProcessor;
use crate::sync::BatchProcessResult;
use crate::sync::{
manager::{BlockProcessType, SyncMessage},
Expand All @@ -9,6 +9,7 @@ use beacon_chain::block_verification_types::{AsBlock, RpcBlock};
use beacon_chain::data_availability_checker::AvailabilityCheckError;
use beacon_chain::data_availability_checker::MaybeAvailableBlock;
use beacon_chain::data_column_verification::verify_kzg_for_data_column_list;
use beacon_chain::ExecutionPayloadError;
use beacon_chain::{
validator_monitor::get_slot_delay_ms, AvailabilityProcessingStatus, BeaconChainError,
BeaconChainTypes, BlockError, ChainSegmentResult, HistoricalBlockError, NotifyExecutionLayer,
Expand Down Expand Up @@ -44,6 +45,45 @@ struct ChainSegmentFailed {
peer_action: Option<PeerAction>,
}

#[derive(Debug, PartialEq, Eq)]
pub enum ErrorCategory {
/// Internal Errors (not caused by peers).
///
/// An internal no-retry error is permanent and block processing should not be
/// re-attempted.
Internal { retry: bool },
/// Errors caused by faulty / malicious peers.
///
/// No retry errors are deterministic against the block's root. Re-downloading data
/// key-ed by block root MUST result in the same no-retry error (i.e. invalid parent,
/// invalid state root, etc).
///
/// The error also indicates which block component index is malicious if applicable.
Malicious { retry: bool, index: usize },
}

impl ErrorCategory {
// Helper functions for readibility on large match statements
pub fn internal_no_retry() -> Self {
Self::Internal { retry: false }
}
pub fn internal_retry() -> Self {
Self::Internal { retry: true }
}
pub fn malicious_no_retry() -> Self {
Self::Malicious {
retry: false,
index: 0,
}
}
pub fn malicious_retry() -> Self {
Self::Malicious {
retry: true,
index: 0,
}
}
}

impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {
/// Returns an async closure which processes a beacon block received via RPC.
///
Expand Down Expand Up @@ -709,130 +749,109 @@ impl<T: BeaconChainTypes> NetworkBeaconProcessor<T> {

/// Helper function to handle a `BlockError` from `process_chain_segment`
fn handle_failed_chain_segment(&self, error: BlockError) -> Result<(), ChainSegmentFailed> {
match error {
BlockError::ParentUnknown { parent_root, .. } => {
// blocks should be sequential and all parents should exist
Err(ChainSegmentFailed {
message: format!("Block has an unknown parent: {}", parent_root),
// Peers are faulty if they send non-sequential blocks.
peer_action: Some(PeerAction::LowToleranceError),
})
let result = match &error {
// blocks should be sequential and all parents should exist
BlockError::ParentUnknown { .. } => Err(ErrorCategory::malicious_retry()),
// A peer may craft a block that is at a future slot. It's possible that
// eventually the slot will no longer be in the future. However, since it's
// malicious action to serve an RPC with a future slot we will not retry.
BlockError::FutureSlot { .. } => Err(ErrorCategory::malicious_no_retry()),

// Okay to reprocess blocks that are finalized.
BlockError::GenesisBlock | BlockError::WouldRevertFinalizedSlot { .. } => Ok(()),

// Since we request blocks by slot we should retry all conditions of invalid block.
BlockError::StateRootMismatch { .. }
| BlockError::NotFinalizedDescendant { .. }
| BlockError::BlockSlotLimitReached
| BlockError::IncorrectBlockProposer { .. }
| BlockError::UnknownValidator { .. }
| BlockError::BlockIsNotLaterThanParent { .. }
| BlockError::PerBlockProcessingError(_)
| BlockError::InconsistentFork(_)
| BlockError::WeakSubjectivityConflict
| BlockError::ProposalSignatureInvalid
| BlockError::InvalidSignature
| BlockError::NonLinearParentRoots
| BlockError::NonLinearSlots => Err(ErrorCategory::malicious_retry()),

// This can happen for many reasons. Head sync's can download multiples and parent
// lookups can download blocks before range sync
BlockError::DuplicateFullyImported { .. } => Ok(()),
BlockError::DuplicateImportStatusUnknown { .. } => {
// This is unreachable because RPC blocks do not undergo gossip verification, and
// this error can *only* come from gossip verification.
Err(ErrorCategory::internal_no_retry())
}
BlockError::DuplicateFullyImported(_)
| BlockError::DuplicateImportStatusUnknown(..) => {
// This can happen for many reasons. Head sync's can download multiples and parent
// lookups can download blocks before range sync
Ok(())
}
BlockError::FutureSlot {
present_slot,
block_slot,
} => {
if present_slot + FUTURE_SLOT_TOLERANCE >= block_slot {
// The block is too far in the future, drop it.
warn!(
self.log, "Block is ahead of our slot clock";
"msg" => "block for future slot rejected, check your time",
"present_slot" => present_slot,
"block_slot" => block_slot,
"FUTURE_SLOT_TOLERANCE" => FUTURE_SLOT_TOLERANCE,
);
} else {
// The block is in the future, but not too far.
debug!(
self.log, "Block is slightly ahead of our slot clock. Ignoring.";
"present_slot" => present_slot,
"block_slot" => block_slot,
"FUTURE_SLOT_TOLERANCE" => FUTURE_SLOT_TOLERANCE,
);
// TODO: review `ExecutionPayloadError` variants
BlockError::ExecutionPayloadError(e) => Err(match e {
// The peer has nothing to do with this error, do not penalize them.
ExecutionPayloadError::NoExecutionConnection => ErrorCategory::internal_no_retry(),
// The peer has nothing to do with this error, do not penalize them.
ExecutionPayloadError::RequestFailed(_) => ErrorCategory::internal_retry(),
// Execution payload is invalid
ExecutionPayloadError::RejectedByExecutionEngine { .. }
| ExecutionPayloadError::InvalidPayloadTimestamp { .. }
| ExecutionPayloadError::InvalidTerminalPoWBlock { .. }
| ExecutionPayloadError::InvalidActivationEpoch { .. }
| ExecutionPayloadError::InvalidTerminalBlockHash { .. } => {
ErrorCategory::malicious_retry()
}

Err(ChainSegmentFailed {
message: format!(
"Block with slot {} is higher than the current slot {}",
block_slot, present_slot
),
// Peers are faulty if they send blocks from the future.
peer_action: Some(PeerAction::LowToleranceError),
})
}
BlockError::WouldRevertFinalizedSlot { .. } => {
debug!(self.log, "Finalized or earlier block processed";);
Ok(())
}
BlockError::GenesisBlock => {
debug!(self.log, "Genesis block was processed");
Ok(())
}
BlockError::BeaconChainError(e) => {
warn!(
self.log, "BlockProcessingFailure";
"msg" => "unexpected condition in processing block.",
"outcome" => ?e,
);

Err(ChainSegmentFailed {
message: format!("Internal error whilst processing block: {:?}", e),
// Do not penalize peers for internal errors.
peer_action: None,
})
}
ref err @ BlockError::ExecutionPayloadError(ref epe) => {
if !epe.penalize_peer() {
// These errors indicate an issue with the EL and not the `ChainSegment`.
// Pause the syncing while the EL recovers
debug!(self.log,
"Execution layer verification failed";
"outcome" => "pausing sync",
"err" => ?err
);
Err(ChainSegmentFailed {
message: format!("Execution layer offline. Reason: {:?}", err),
// Do not penalize peers for internal errors.
peer_action: None,
})
} else {
debug!(self.log,
"Invalid execution payload";
"error" => ?err
);
Err(ChainSegmentFailed {
message: format!(
"Peer sent a block containing invalid execution payload. Reason: {:?}",
err
),
peer_action: Some(PeerAction::LowToleranceError),
})
// Do not penalize the peer since it's not their fault that *we're* optimistic.
ExecutionPayloadError::UnverifiedNonOptimisticCandidate => {
ErrorCategory::internal_retry()
}
}),
// Should retry? Probably
BlockError::ParentExecutionPayloadInvalid { .. } => {
Err(ErrorCategory::malicious_retry())
}
ref err @ BlockError::ParentExecutionPayloadInvalid { ref parent_root } => {
warn!(
self.log,
"Failed to sync chain built on invalid parent";
"parent_root" => ?parent_root,
"advice" => "check execution node for corruption then restart it and Lighthouse",
);
Err(ChainSegmentFailed {
message: format!("Peer sent invalid block. Reason: {err:?}"),
// We need to penalise harshly in case this represents an actual attack. In case
// of a faulty EL it will usually require manual intervention to fix anyway, so
// it's not too bad if we drop most of our peers.
peer_action: Some(PeerAction::LowToleranceError),
})
}
other => {
debug!(
self.log, "Invalid block received";
"msg" => "peer sent invalid block",
"outcome" => %other,
);

Err(ChainSegmentFailed {
message: format!("Peer sent invalid block. Reason: {:?}", other),
// Do not penalize peers for internal errors.
peer_action: None,
})
// TODO: Review AvailabilityCheckError variants
BlockError::AvailabilityCheck(e) => Err(match e {
AvailabilityCheckError::SszTypes(_)
| AvailabilityCheckError::StoreError(_)
| AvailabilityCheckError::Unexpected
| AvailabilityCheckError::ParentStateMissing(_)
| AvailabilityCheckError::BlockReplayError(_)
| AvailabilityCheckError::RebuildingStateCaches(_)
| AvailabilityCheckError::SlotClockError => ErrorCategory::internal_retry(),
AvailabilityCheckError::InvalidColumn(index, _) => ErrorCategory::Malicious {
retry: true,
index: *index as usize,
},
AvailabilityCheckError::InvalidBlobs { .. }
| AvailabilityCheckError::MissingBlobs
| AvailabilityCheckError::MissingCustodyColumns
| AvailabilityCheckError::DecodeError(_)
| AvailabilityCheckError::ReconstructColumnsError { .. }
| AvailabilityCheckError::BlobIndexInvalid(_)
| AvailabilityCheckError::DataColumnIndexInvalid(_)
| AvailabilityCheckError::KzgCommitmentMismatch { .. } => {
ErrorCategory::malicious_retry()
} // Do not use a fallback match, handle all errors explicitly
}),
// The proposer making a slashable block is not the peer's fault nor ours. Mark
// as internal (don't penalize peer), and retry: the serving peer could send the wrong
// block at this slot, which happens to not be slashable.
BlockError::Slashable => Err(ErrorCategory::internal_retry()),
// TODO: BeaconChainError should be retried?
BlockError::BeaconChainError(_) | BlockError::InternalError(_) => {
Err(ErrorCategory::internal_no_retry())
}
}
// unreachable, this error is only part of gossip
BlockError::BlobNotRequired(_) => Err(ErrorCategory::malicious_retry()),
};
// Do not use a fallback match, handle all errors explicitly

// Pass retry and malicious signals to range sync to do retry and score properly
result.map_err(|err| ChainSegmentFailed {
message: format!("{:?}", error),
// This is an internal error, don't penalize the peer.
peer_action: if matches!(err, ErrorCategory::Malicious { .. }) {
Some(PeerAction::LowToleranceError)
} else {
None
},
})
}
}

0 comments on commit 32fab3a

Please sign in to comment.