From b71f1125811b6539b22d4104343554e388f873d7 Mon Sep 17 00:00:00 2001 From: Michael Sproul Date: Tue, 8 Dec 2020 13:31:08 +1100 Subject: [PATCH 1/3] Revert fork choice if disk write fails --- beacon_node/beacon_chain/src/beacon_chain.rs | 62 ++++++++++++++++++-- beacon_node/beacon_chain/src/builder.rs | 23 ++------ beacon_node/beacon_chain/src/errors.rs | 3 + 3 files changed, 65 insertions(+), 23 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 51f0c2ea471..2d8b1ba4416 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -160,6 +160,15 @@ pub trait BeaconChainTypes: Send + Sync + 'static { type EthSpec: types::EthSpec; } +pub type BeaconForkChoice = ForkChoice< + BeaconForkChoiceStore< + ::EthSpec, + ::HotStore, + ::ColdStore, + >, + ::EthSpec, +>; + /// Represents the "Beacon Chain" component of Ethereum 2.0. Allows import of blocks and block /// operations and chooses a canonical head. pub struct BeaconChain { @@ -207,13 +216,9 @@ pub struct BeaconChain { pub genesis_state_root: Hash256, /// The root of the list of genesis validators, used during syncing. pub genesis_validators_root: Hash256, - - #[allow(clippy::type_complexity)] /// A state-machine that is updated with information from the network and chooses a canonical /// head block. - pub fork_choice: RwLock< - ForkChoice, T::EthSpec>, - >, + pub fork_choice: RwLock>, /// A handler for events generated by the beacon chain. This is only initialized when the /// HTTP server is enabled. pub event_handler: Option>, @@ -284,6 +289,27 @@ impl BeaconChain { persisted_fork_choice.as_kv_store_op(FORK_CHOICE_DB_KEY) } + /// Load fork choice from disk, returning `None` if it isn't found. + pub fn load_fork_choice( + store: Arc>, + ) -> Result>, Error> { + let persisted_fork_choice = + match store.get_item::(&FORK_CHOICE_DB_KEY)? { + Some(fc) => fc, + None => return Ok(None), + }; + + let fc_store = BeaconForkChoiceStore::from_persisted( + persisted_fork_choice.fork_choice_store, + store.clone(), + )?; + + Ok(Some(ForkChoice::from_persisted( + persisted_fork_choice.fork_choice, + fc_store, + )?)) + } + /// Persists `self.op_pool` to disk. /// /// ## Notes @@ -1715,13 +1741,37 @@ impl BeaconChain { // Store the block and its state, and execute the confirmation batch for the intermediate // states, which will delete their temporary flags. + // If the write fails, revert fork choice to the version from disk, else we can + // end up with blocks in fork choice that are missing from disk. + // See https://github.com/sigp/lighthouse/issues/2028 ops.push(StoreOp::PutBlock( block_root, Box::new(signed_block.clone()), )); ops.push(StoreOp::PutState(block.state_root, &state)); let txn_lock = self.store.hot_db.begin_rw_transaction(); - self.store.do_atomically(ops)?; + + if let Err(e) = self.store.do_atomically(ops) { + error!( + self.log, + "Database write failed!"; + "msg" => "Restoring fork choice from disk", + "error" => ?e, + ); + match Self::load_fork_choice(self.store.clone())? { + Some(persisted_fork_choice) => { + *fork_choice = persisted_fork_choice; + } + None => { + crit!( + self.log, + "No stored fork choice found to restore from"; + "warning" => "The database is likely corrupt now" + ); + } + } + return Err(e.into()); + } drop(txn_lock); // The fork choice write-lock is dropped *after* the on-disk database has been updated. diff --git a/beacon_node/beacon_chain/src/builder.rs b/beacon_node/beacon_chain/src/builder.rs index 5276d115282..a03192a52e4 100644 --- a/beacon_node/beacon_chain/src/builder.rs +++ b/beacon_node/beacon_chain/src/builder.rs @@ -1,11 +1,8 @@ -use crate::beacon_chain::{ - BEACON_CHAIN_DB_KEY, ETH1_CACHE_DB_KEY, FORK_CHOICE_DB_KEY, OP_POOL_DB_KEY, -}; +use crate::beacon_chain::{BEACON_CHAIN_DB_KEY, ETH1_CACHE_DB_KEY, OP_POOL_DB_KEY}; use crate::eth1_chain::{CachingEth1Backend, SszEth1}; use crate::head_tracker::HeadTracker; use crate::migrate::{BackgroundMigrator, MigratorConfig}; use crate::persisted_beacon_chain::PersistedBeaconChain; -use crate::persisted_fork_choice::PersistedForkChoice; use crate::shuffling_cache::ShufflingCache; use crate::snapshot_cache::{SnapshotCache, DEFAULT_SNAPSHOT_CACHE_SIZE}; use crate::timeout_rw_lock::TimeoutRwLock; @@ -248,20 +245,12 @@ where .to_string() })?; - let persisted_fork_choice = store - .get_item::(&FORK_CHOICE_DB_KEY) - .map_err(|e| format!("DB error when reading persisted fork choice: {:?}", e))? - .ok_or("No persisted fork choice present in database.")?; - - let fc_store = BeaconForkChoiceStore::from_persisted( - persisted_fork_choice.fork_choice_store, - store.clone(), - ) - .map_err(|e| format!("Unable to load ForkChoiceStore: {:?}", e))?; - let fork_choice = - ForkChoice::from_persisted(persisted_fork_choice.fork_choice, fc_store) - .map_err(|e| format!("Unable to parse persisted fork choice from disk: {:?}", e))?; + BeaconChain::>::load_fork_choice( + store.clone(), + ) + .map_err(|e| format!("Unable to load fork choice from disk: {:?}", e))? + .ok_or("Fork choice not found in store")?; let genesis_block = store .get_item::>(&chain.genesis_block_root) diff --git a/beacon_node/beacon_chain/src/errors.rs b/beacon_node/beacon_chain/src/errors.rs index 4153ac7021e..2b71636e917 100644 --- a/beacon_node/beacon_chain/src/errors.rs +++ b/beacon_node/beacon_chain/src/errors.rs @@ -1,4 +1,5 @@ use crate::beacon_chain::ForkChoiceError; +use crate::beacon_fork_choice_store::Error as ForkChoiceStoreError; use crate::eth1_chain::Error as Eth1ChainError; use crate::migrate::PruningError; use crate::naive_aggregation_pool::Error as NaiveAggregationError; @@ -46,6 +47,7 @@ pub enum BeaconChainError { DBInconsistent(String), DBError(store::Error), ForkChoiceError(ForkChoiceError), + ForkChoiceStoreError(ForkChoiceStoreError), MissingBeaconBlock(Hash256), MissingBeaconState(Hash256), SlotProcessingError(SlotProcessingError), @@ -106,6 +108,7 @@ easy_from_to!(ObservedBlockProducersError, BeaconChainError); easy_from_to!(BlockSignatureVerifierError, BeaconChainError); easy_from_to!(PruningError, BeaconChainError); easy_from_to!(ArithError, BeaconChainError); +easy_from_to!(ForkChoiceStoreError, BeaconChainError); #[derive(Debug)] pub enum BlockProductionError { From 63facbdfc282d274f26cad4c024e0feac55434e2 Mon Sep 17 00:00:00 2001 From: Michael Sproul Date: Wed, 9 Dec 2020 10:52:03 +1100 Subject: [PATCH 2/3] Satisfy Clippy --- beacon_node/beacon_chain/src/beacon_chain.rs | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index 2d8b1ba4416..d72f392a910 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -299,10 +299,8 @@ impl BeaconChain { None => return Ok(None), }; - let fc_store = BeaconForkChoiceStore::from_persisted( - persisted_fork_choice.fork_choice_store, - store.clone(), - )?; + let fc_store = + BeaconForkChoiceStore::from_persisted(persisted_fork_choice.fork_choice_store, store)?; Ok(Some(ForkChoice::from_persisted( persisted_fork_choice.fork_choice, From fe764f73e4f1e10d829d4808d9e9b53e07c2ba58 Mon Sep 17 00:00:00 2001 From: Michael Sproul Date: Wed, 9 Dec 2020 15:41:52 +1100 Subject: [PATCH 3/3] Suggest --purge-db --- beacon_node/beacon_chain/src/beacon_chain.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/beacon_node/beacon_chain/src/beacon_chain.rs b/beacon_node/beacon_chain/src/beacon_chain.rs index d72f392a910..cdc1c251fe3 100644 --- a/beacon_node/beacon_chain/src/beacon_chain.rs +++ b/beacon_node/beacon_chain/src/beacon_chain.rs @@ -1764,7 +1764,7 @@ impl BeaconChain { crit!( self.log, "No stored fork choice found to restore from"; - "warning" => "The database is likely corrupt now" + "warning" => "The database is likely corrupt now, consider --purge-db" ); } }