Skip to content

Commit

Permalink
Change forks pruning algorithm. (paritytech#3962)
Browse files Browse the repository at this point in the history
This PR changes the fork calculation and pruning algorithm to enable
future block header pruning. It's required because the previous
algorithm relied on the block header persistence. It follows the
[related
discussion](paritytech#1570)

The previous code contained this comment describing the situation:
```
	/// Note a block height finalized, displacing all leaves with number less than the finalized
	/// block's.
	///
	/// Although it would be more technically correct to also prune out leaves at the
	/// same number as the finalized block, but with different hashes, the current behavior
	/// is simpler and our assumptions about how finalization works means that those leaves
	/// will be pruned soon afterwards anyway.
	pub fn finalize_height(&mut self, number: N) -> FinalizationOutcome<H, N> {
```

The previous algorithm relied on the existing block headers to prune
forks later and to enable block header pruning we need to clear all
obsolete forks right after the block finalization to not depend on the
related block headers in the future.

---------

Co-authored-by: Bastian Köcher <git@kchr.de>
  • Loading branch information
2 people authored and TarekkMA committed Aug 2, 2024
1 parent fa2aeaf commit 1d9a63d
Show file tree
Hide file tree
Showing 12 changed files with 196 additions and 249 deletions.
12 changes: 12 additions & 0 deletions prdoc/pr_3962.prdoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
title: Change fork calculation algorithm.

doc:
- audience: Node Dev
description: |
This PR changes the fork calculation and pruning algorithm to enable future block header pruning.
During the finalization of the block we prune known stale forks, so forks are pruned faster.

crates:
- name: sc-client-api
- name: sc-client-db
- name: sp-blockchain
14 changes: 0 additions & 14 deletions substrate/client/api/src/in_mem.rs
Original file line number Diff line number Diff line change
Expand Up @@ -419,20 +419,6 @@ impl<Block: BlockT> blockchain::Backend<Block> for Blockchain<Block> {
Ok(self.storage.read().leaves.hashes())
}

fn displaced_leaves_after_finalizing(
&self,
block_number: NumberFor<Block>,
) -> sp_blockchain::Result<Vec<Block::Hash>> {
Ok(self
.storage
.read()
.leaves
.displaced_by_finalize_height(block_number)
.leaves()
.cloned()
.collect::<Vec<_>>())
}

fn children(&self, _parent_hash: Block::Hash) -> sp_blockchain::Result<Vec<Block::Hash>> {
unimplemented!()
}
Expand Down
108 changes: 17 additions & 91 deletions substrate/client/api/src/leaves.rs
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ pub struct FinalizationOutcome<H, N> {
removed: BTreeMap<Reverse<N>, Vec<H>>,
}

impl<H, N: Ord> FinalizationOutcome<H, N> {
impl<H: Copy, N: Ord> FinalizationOutcome<H, N> {
/// Merge with another. This should only be used for displaced items that
/// are produced within one transaction of each other.
pub fn merge(&mut self, mut other: Self) {
Expand All @@ -63,6 +63,16 @@ impl<H, N: Ord> FinalizationOutcome<H, N> {
pub fn leaves(&self) -> impl Iterator<Item = &H> {
self.removed.values().flatten()
}

/// Constructor
pub fn new(new_displaced: impl Iterator<Item = (H, N)>) -> Self {
let mut removed = BTreeMap::<Reverse<N>, Vec<H>>::new();
for (hash, number) in new_displaced {
removed.entry(Reverse(number)).or_default().push(hash);
}

FinalizationOutcome { removed }
}
}

/// list of leaf hashes ordered by number (descending).
Expand Down Expand Up @@ -151,39 +161,12 @@ where
Some(RemoveOutcome { inserted, removed: LeafSetItem { hash, number } })
}

/// Note a block height finalized, displacing all leaves with number less than the finalized
/// block's.
///
/// Although it would be more technically correct to also prune out leaves at the
/// same number as the finalized block, but with different hashes, the current behavior
/// is simpler and our assumptions about how finalization works means that those leaves
/// will be pruned soon afterwards anyway.
pub fn finalize_height(&mut self, number: N) -> FinalizationOutcome<H, N> {
let boundary = if number == N::zero() {
return FinalizationOutcome { removed: BTreeMap::new() }
} else {
number - N::one()
};

let below_boundary = self.storage.split_off(&Reverse(boundary));
FinalizationOutcome { removed: below_boundary }
}

/// The same as [`Self::finalize_height`], but it only simulates the operation.
///
/// This means that no changes are done.
///
/// Returns the leaves that would be displaced by finalizing the given block.
pub fn displaced_by_finalize_height(&self, number: N) -> FinalizationOutcome<H, N> {
let boundary = if number == N::zero() {
return FinalizationOutcome { removed: BTreeMap::new() }
} else {
number - N::one()
};

let below_boundary = self.storage.range(&Reverse(boundary)..);
FinalizationOutcome {
removed: below_boundary.map(|(k, v)| (k.clone(), v.clone())).collect(),
/// Remove all leaves displaced by the last block finalization.
pub fn remove_displaced_leaves(&mut self, displaced_leaves: &FinalizationOutcome<H, N>) {
for (number, hashes) in &displaced_leaves.removed {
for hash in hashes.iter() {
self.remove_leaf(number, hash);
}
}
}

Expand Down Expand Up @@ -420,32 +403,6 @@ mod tests {
assert!(set.contains(11, 11_2));
}

#[test]
fn finalization_works() {
let mut set = LeafSet::new();
set.import(9_1u32, 9u32, 0u32);
set.import(10_1, 10, 9_1);
set.import(10_2, 10, 9_1);
set.import(11_1, 11, 10_1);
set.import(11_2, 11, 10_1);
set.import(12_1, 12, 11_2);

let outcome = set.finalize_height(11);
assert_eq!(set.count(), 2);
assert!(set.contains(11, 11_1));
assert!(set.contains(12, 12_1));
assert_eq!(
outcome.removed,
[(Reverse(10), vec![10_2])].into_iter().collect::<BTreeMap<_, _>>(),
);

set.undo().undo_finalization(outcome);
assert_eq!(set.count(), 3);
assert!(set.contains(11, 11_1));
assert!(set.contains(12, 12_1));
assert!(set.contains(10, 10_2));
}

#[test]
fn flush_to_disk() {
const PREFIX: &[u8] = b"abcdefg";
Expand Down Expand Up @@ -479,35 +436,4 @@ mod tests {
assert!(set.contains(10, 1_2));
assert!(!set.contains(10, 1_3));
}

#[test]
fn finalization_consistent_with_disk() {
const PREFIX: &[u8] = b"prefix";
let db = Arc::new(sp_database::MemDb::default());

let mut set = LeafSet::new();
set.import(10_1u32, 10u32, 0u32);
set.import(11_1, 11, 10_2);
set.import(11_2, 11, 10_2);
set.import(12_1, 12, 11_123);

assert!(set.contains(10, 10_1));

let mut tx = Transaction::new();
set.prepare_transaction(&mut tx, 0, PREFIX);
db.commit(tx).unwrap();

let _ = set.finalize_height(11);
let mut tx = Transaction::new();
set.prepare_transaction(&mut tx, 0, PREFIX);
db.commit(tx).unwrap();

assert!(set.contains(11, 11_1));
assert!(set.contains(11, 11_2));
assert!(set.contains(12, 12_1));
assert!(!set.contains(10, 10_1));

let set2 = LeafSet::read_from_db(&*db, 0, PREFIX).unwrap();
assert_eq!(set, set2);
}
}
5 changes: 3 additions & 2 deletions substrate/client/consensus/babe/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -562,9 +562,10 @@ fn aux_storage_cleanup<C: HeaderMetadata<Block> + HeaderBackend<Block>, Block: B
// Cleans data for stale forks.
let stale_forks = match client.expand_forks(&notification.stale_heads) {
Ok(stale_forks) => stale_forks,
Err((stale_forks, e)) => {
Err(e) => {
warn!(target: LOG_TARGET, "{:?}", e);
stale_forks

Default::default()
},
};
hashes.extend(stale_forks.iter());
Expand Down
4 changes: 2 additions & 2 deletions substrate/client/consensus/babe/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1094,8 +1094,8 @@ async fn obsolete_blocks_aux_data_cleanup() {
assert!(aux_data_check(&fork1_hashes[2..3], false));
// Present: A4
assert!(aux_data_check(&fork1_hashes[3..], true));
// Present C4, C5
assert!(aux_data_check(&fork3_hashes, true));
// Wiped C4, C5
assert!(aux_data_check(&fork3_hashes, false));
}

#[tokio::test]
Expand Down
72 changes: 25 additions & 47 deletions substrate/client/db/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ use sc_client_api::{
use sc_state_db::{IsPruned, LastCanonicalized, StateDb};
use sp_arithmetic::traits::Saturating;
use sp_blockchain::{
Backend as _, CachedHeaderMetadata, Error as ClientError, HeaderBackend, HeaderMetadata,
HeaderMetadataCache, Result as ClientResult,
Backend as _, CachedHeaderMetadata, DisplacedLeavesAfterFinalization, Error as ClientError,
HeaderBackend, HeaderMetadata, HeaderMetadataCache, Result as ClientResult,
};
use sp_core::{
offchain::OffchainOverlayedChange,
Expand Down Expand Up @@ -747,19 +747,6 @@ impl<Block: BlockT> sc_client_api::blockchain::Backend<Block> for BlockchainDb<B
Ok(self.leaves.read().hashes())
}

fn displaced_leaves_after_finalizing(
&self,
block_number: NumberFor<Block>,
) -> ClientResult<Vec<Block::Hash>> {
Ok(self
.leaves
.read()
.displaced_by_finalize_height(block_number)
.leaves()
.cloned()
.collect::<Vec<_>>())
}

fn children(&self, parent_hash: Block::Hash) -> ClientResult<Vec<Block::Hash>> {
children::read_children(&*self.db, columns::META, meta_keys::CHILDREN_PREFIX, parent_hash)
}
Expand Down Expand Up @@ -1813,14 +1800,13 @@ impl<Block: BlockT> Backend<Block> {
apply_state_commit(transaction, commit);
}

let new_displaced = self.blockchain.leaves.write().finalize_height(f_num);
self.prune_blocks(
transaction,
f_num,
f_hash,
&new_displaced,
current_transaction_justifications,
)?;
let new_displaced = self.blockchain.displaced_leaves_after_finalizing(f_hash, f_num)?;
let finalization_outcome =
FinalizationOutcome::new(new_displaced.displaced_leaves.clone().into_iter());

self.blockchain.leaves.write().remove_displaced_leaves(&finalization_outcome);

self.prune_blocks(transaction, f_num, &new_displaced, current_transaction_justifications)?;

Ok(())
}
Expand All @@ -1829,8 +1815,7 @@ impl<Block: BlockT> Backend<Block> {
&self,
transaction: &mut Transaction<DbHash>,
finalized_number: NumberFor<Block>,
finalized_hash: Block::Hash,
displaced: &FinalizationOutcome<Block::Hash, NumberFor<Block>>,
displaced: &DisplacedLeavesAfterFinalization<Block>,
current_transaction_justifications: &mut HashMap<Block::Hash, Justification>,
) -> ClientResult<()> {
match self.blocks_pruning {
Expand Down Expand Up @@ -1858,10 +1843,10 @@ impl<Block: BlockT> Backend<Block> {

self.prune_block(transaction, BlockId::<Block>::number(number))?;
}
self.prune_displaced_branches(transaction, finalized_hash, displaced)?;
self.prune_displaced_branches(transaction, displaced)?;
},
BlocksPruning::KeepFinalized => {
self.prune_displaced_branches(transaction, finalized_hash, displaced)?;
self.prune_displaced_branches(transaction, displaced)?;
},
}
Ok(())
Expand All @@ -1870,21 +1855,13 @@ impl<Block: BlockT> Backend<Block> {
fn prune_displaced_branches(
&self,
transaction: &mut Transaction<DbHash>,
finalized: Block::Hash,
displaced: &FinalizationOutcome<Block::Hash, NumberFor<Block>>,
displaced: &DisplacedLeavesAfterFinalization<Block>,
) -> ClientResult<()> {
// Discard all blocks from displaced branches
for h in displaced.leaves() {
match sp_blockchain::tree_route(&self.blockchain, *h, finalized) {
Ok(tree_route) =>
for r in tree_route.retracted() {
self.blockchain.insert_persisted_body_if_pinned(r.hash)?;
self.prune_block(transaction, BlockId::<Block>::hash(r.hash))?;
},
Err(sp_blockchain::Error::UnknownBlock(_)) => {
// Sometimes routes can't be calculated. E.g. after warp sync.
},
Err(e) => Err(e)?,
for (_, tree_route) in displaced.tree_routes.iter() {
for r in tree_route.retracted() {
self.blockchain.insert_persisted_body_if_pinned(r.hash)?;
self.prune_block(transaction, BlockId::<Block>::hash(r.hash))?;
}
}
Ok(())
Expand Down Expand Up @@ -3190,6 +3167,9 @@ pub(crate) mod tests {

#[test]
fn test_leaves_pruned_on_finality() {
// / 1b - 2b - 3b
// 0 - 1a - 2a
// \ 1c
let backend: Backend<Block> = Backend::new_test(10, 10);
let block0 = insert_header(&backend, 0, Default::default(), None, Default::default());

Expand All @@ -3201,18 +3181,16 @@ pub(crate) mod tests {

let block2_a = insert_header(&backend, 2, block1_a, None, Default::default());
let block2_b = insert_header(&backend, 2, block1_b, None, Default::default());
let block2_c = insert_header(&backend, 2, block1_b, None, [1; 32].into());

assert_eq!(
backend.blockchain().leaves().unwrap(),
vec![block2_a, block2_b, block2_c, block1_c]
);
let block3_b = insert_header(&backend, 3, block2_b, None, [3; 32].into());

assert_eq!(backend.blockchain().leaves().unwrap(), vec![block3_b, block2_a, block1_c]);

backend.finalize_block(block1_a, None).unwrap();
backend.finalize_block(block2_a, None).unwrap();

// leaves at same height stay. Leaves at lower heights pruned.
assert_eq!(backend.blockchain().leaves().unwrap(), vec![block2_a, block2_b, block2_c]);
// All leaves are pruned that are known to not belong to canonical branch
assert_eq!(backend.blockchain().leaves().unwrap(), vec![block2_a]);
}

#[test]
Expand Down
13 changes: 6 additions & 7 deletions substrate/client/merkle-mountain-range/src/offchain_mmr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ use sp_runtime::{
traits::{Block, Header, NumberFor, One},
Saturating,
};
use std::{collections::VecDeque, sync::Arc};
use std::{collections::VecDeque, default::Default, sync::Arc};

/// `OffchainMMR` exposes MMR offchain canonicalization and pruning logic.
pub struct OffchainMmr<B: Block, BE: Backend<B>, C> {
Expand Down Expand Up @@ -273,12 +273,11 @@ where
self.write_gadget_state_or_log();

// Remove offchain MMR nodes for stale forks.
let stale_forks = self.client.expand_forks(&notification.stale_heads).unwrap_or_else(
|(stale_forks, e)| {
warn!(target: LOG_TARGET, "{:?}", e);
stale_forks
},
);
let stale_forks = self.client.expand_forks(&notification.stale_heads).unwrap_or_else(|e| {
warn!(target: LOG_TARGET, "{:?}", e);

Default::default()
});
for hash in stale_forks.iter() {
self.prune_branch(hash);
}
Expand Down
Loading

0 comments on commit 1d9a63d

Please sign in to comment.