Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Merged by Bors] - Revert fork choice if disk write fails #2068

Closed
wants to merge 3 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 54 additions & 6 deletions beacon_node/beacon_chain/src/beacon_chain.rs
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,15 @@ pub trait BeaconChainTypes: Send + Sync + 'static {
type EthSpec: types::EthSpec;
}

pub type BeaconForkChoice<T> = ForkChoice<
BeaconForkChoiceStore<
<T as BeaconChainTypes>::EthSpec,
<T as BeaconChainTypes>::HotStore,
<T as BeaconChainTypes>::ColdStore,
>,
<T as BeaconChainTypes>::EthSpec,
>;

/// Represents the "Beacon Chain" component of Ethereum 2.0. Allows import of blocks and block
/// operations and chooses a canonical head.
pub struct BeaconChain<T: BeaconChainTypes> {
Expand Down Expand Up @@ -207,13 +216,9 @@ pub struct BeaconChain<T: BeaconChainTypes> {
pub genesis_state_root: Hash256,
/// The root of the list of genesis validators, used during syncing.
pub genesis_validators_root: Hash256,

#[allow(clippy::type_complexity)]
/// A state-machine that is updated with information from the network and chooses a canonical
/// head block.
pub fork_choice: RwLock<
ForkChoice<BeaconForkChoiceStore<T::EthSpec, T::HotStore, T::ColdStore>, T::EthSpec>,
>,
pub fork_choice: RwLock<BeaconForkChoice<T>>,
/// A handler for events generated by the beacon chain. This is only initialized when the
/// HTTP server is enabled.
pub event_handler: Option<ServerSentEventHandler<T::EthSpec>>,
Expand Down Expand Up @@ -284,6 +289,25 @@ impl<T: BeaconChainTypes> BeaconChain<T> {
persisted_fork_choice.as_kv_store_op(FORK_CHOICE_DB_KEY)
}

/// Load fork choice from disk, returning `None` if it isn't found.
pub fn load_fork_choice(
store: Arc<HotColdDB<T::EthSpec, T::HotStore, T::ColdStore>>,
) -> Result<Option<BeaconForkChoice<T>>, Error> {
let persisted_fork_choice =
match store.get_item::<PersistedForkChoice>(&FORK_CHOICE_DB_KEY)? {
Some(fc) => fc,
None => return Ok(None),
};

let fc_store =
BeaconForkChoiceStore::from_persisted(persisted_fork_choice.fork_choice_store, store)?;

Ok(Some(ForkChoice::from_persisted(
persisted_fork_choice.fork_choice,
fc_store,
)?))
}

/// Persists `self.op_pool` to disk.
///
/// ## Notes
Expand Down Expand Up @@ -1715,13 +1739,37 @@ impl<T: BeaconChainTypes> BeaconChain<T> {

// Store the block and its state, and execute the confirmation batch for the intermediate
// states, which will delete their temporary flags.
// If the write fails, revert fork choice to the version from disk, else we can
// end up with blocks in fork choice that are missing from disk.
// See https://github.com/sigp/lighthouse/issues/2028
ops.push(StoreOp::PutBlock(
block_root,
Box::new(signed_block.clone()),
));
ops.push(StoreOp::PutState(block.state_root, &state));
let txn_lock = self.store.hot_db.begin_rw_transaction();
self.store.do_atomically(ops)?;

if let Err(e) = self.store.do_atomically(ops) {
error!(
self.log,
"Database write failed!";
"msg" => "Restoring fork choice from disk",
"error" => ?e,
);
match Self::load_fork_choice(self.store.clone())? {
Some(persisted_fork_choice) => {
*fork_choice = persisted_fork_choice;
}
None => {
crit!(
self.log,
"No stored fork choice found to restore from";
"warning" => "The database is likely corrupt now, consider --purge-db"
);
}
}
return Err(e.into());
}
drop(txn_lock);

// The fork choice write-lock is dropped *after* the on-disk database has been updated.
Expand Down
23 changes: 6 additions & 17 deletions beacon_node/beacon_chain/src/builder.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
use crate::beacon_chain::{
BEACON_CHAIN_DB_KEY, ETH1_CACHE_DB_KEY, FORK_CHOICE_DB_KEY, OP_POOL_DB_KEY,
};
use crate::beacon_chain::{BEACON_CHAIN_DB_KEY, ETH1_CACHE_DB_KEY, OP_POOL_DB_KEY};
use crate::eth1_chain::{CachingEth1Backend, SszEth1};
use crate::head_tracker::HeadTracker;
use crate::migrate::{BackgroundMigrator, MigratorConfig};
use crate::persisted_beacon_chain::PersistedBeaconChain;
use crate::persisted_fork_choice::PersistedForkChoice;
use crate::shuffling_cache::ShufflingCache;
use crate::snapshot_cache::{SnapshotCache, DEFAULT_SNAPSHOT_CACHE_SIZE};
use crate::timeout_rw_lock::TimeoutRwLock;
Expand Down Expand Up @@ -248,20 +245,12 @@ where
.to_string()
})?;

let persisted_fork_choice = store
.get_item::<PersistedForkChoice>(&FORK_CHOICE_DB_KEY)
.map_err(|e| format!("DB error when reading persisted fork choice: {:?}", e))?
.ok_or("No persisted fork choice present in database.")?;

let fc_store = BeaconForkChoiceStore::from_persisted(
persisted_fork_choice.fork_choice_store,
store.clone(),
)
.map_err(|e| format!("Unable to load ForkChoiceStore: {:?}", e))?;

let fork_choice =
ForkChoice::from_persisted(persisted_fork_choice.fork_choice, fc_store)
.map_err(|e| format!("Unable to parse persisted fork choice from disk: {:?}", e))?;
BeaconChain::<Witness<TSlotClock, TEth1Backend, _, _, _>>::load_fork_choice(
store.clone(),
)
.map_err(|e| format!("Unable to load fork choice from disk: {:?}", e))?
.ok_or("Fork choice not found in store")?;

let genesis_block = store
.get_item::<SignedBeaconBlock<TEthSpec>>(&chain.genesis_block_root)
Expand Down
3 changes: 3 additions & 0 deletions beacon_node/beacon_chain/src/errors.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use crate::beacon_chain::ForkChoiceError;
use crate::beacon_fork_choice_store::Error as ForkChoiceStoreError;
use crate::eth1_chain::Error as Eth1ChainError;
use crate::migrate::PruningError;
use crate::naive_aggregation_pool::Error as NaiveAggregationError;
Expand Down Expand Up @@ -46,6 +47,7 @@ pub enum BeaconChainError {
DBInconsistent(String),
DBError(store::Error),
ForkChoiceError(ForkChoiceError),
ForkChoiceStoreError(ForkChoiceStoreError),
MissingBeaconBlock(Hash256),
MissingBeaconState(Hash256),
SlotProcessingError(SlotProcessingError),
Expand Down Expand Up @@ -106,6 +108,7 @@ easy_from_to!(ObservedBlockProducersError, BeaconChainError);
easy_from_to!(BlockSignatureVerifierError, BeaconChainError);
easy_from_to!(PruningError, BeaconChainError);
easy_from_to!(ArithError, BeaconChainError);
easy_from_to!(ForkChoiceStoreError, BeaconChainError);

#[derive(Debug)]
pub enum BlockProductionError {
Expand Down