From ca73b61572bfe7c02472569d813753d4f2ff1597 Mon Sep 17 00:00:00 2001 From: Darshan Kathiriya <8559992+lakshya-sky@users.noreply.github.com> Date: Fri, 28 Jun 2024 03:23:13 -0400 Subject: [PATCH] feat(trie): in-memory trie node overlay (#8199) Co-authored-by: Roman Krasiuk --- crates/trie/trie/src/trie.rs | 127 +++++++++++- .../trie/src/trie_cursor/database_cursors.rs | 14 +- crates/trie/trie/src/trie_cursor/mod.rs | 11 +- crates/trie/trie/src/trie_cursor/noop.rs | 11 +- crates/trie/trie/src/trie_cursor/update.rs | 191 ++++++++++++++++++ crates/trie/trie/src/updates.rs | 42 ++++ 6 files changed, 384 insertions(+), 12 deletions(-) create mode 100644 crates/trie/trie/src/trie_cursor/update.rs diff --git a/crates/trie/trie/src/trie.rs b/crates/trie/trie/src/trie.rs index e56946747169a..6671840f1b413 100644 --- a/crates/trie/trie/src/trie.rs +++ b/crates/trie/trie/src/trie.rs @@ -546,9 +546,11 @@ where mod tests { use super::*; use crate::{ + hashed_cursor::HashedPostStateCursorFactory, prefix_set::PrefixSetMut, test_utils::{state_root, state_root_prehashed, storage_root, storage_root_prehashed}, - BranchNodeCompact, TrieMask, + trie_cursor::TrieUpdatesCursorFactory, + BranchNodeCompact, HashedPostState, HashedStorage, TrieMask, }; use proptest::{prelude::ProptestConfig, proptest}; use proptest_arbitrary_interop::arb; @@ -562,6 +564,7 @@ mod tests { use reth_trie_common::triehash::KeccakHasher; use std::{ collections::{BTreeMap, HashMap}, + iter, ops::Mul, str::FromStr, sync::Arc, @@ -1369,4 +1372,126 @@ mod tests { assert_eq!(node.root_hash, None); assert_eq!(node.hashes.len(), 1); } + + #[test] + fn trie_updates_across_multiple_iterations() { + let address = Address::ZERO; + let hashed_address = keccak256(address); + + let factory = create_test_provider_factory(); + + let mut hashed_storage = BTreeMap::default(); + let mut post_state = HashedPostState::default(); + + // Block #1 + // Update specific storage slots + let mut modified_storage = BTreeMap::default(); + + // 0x0f.. + let modified_key_prefix = Nibbles::from_nibbles( + [0x0, 0xf].into_iter().chain(iter::repeat(0).take(62)).collect::>(), + ); + + // 0x0faa0.. + let mut modified_entry1 = modified_key_prefix.clone(); + modified_entry1.set_at(2, 0xa); + modified_entry1.set_at(3, 0xa); + + // 0x0faaa.. + let mut modified_entry2 = modified_key_prefix.clone(); + modified_entry2.set_at(2, 0xa); + modified_entry2.set_at(3, 0xa); + modified_entry2.set_at(4, 0xa); + + // 0x0fab0.. + let mut modified_entry3 = modified_key_prefix.clone(); + modified_entry3.set_at(2, 0xa); + modified_entry3.set_at(3, 0xb); + + // 0x0fba0.. + let mut modified_entry4 = modified_key_prefix; + modified_entry4.set_at(2, 0xb); + modified_entry4.set_at(3, 0xa); + + [modified_entry1, modified_entry2, modified_entry3.clone(), modified_entry4] + .into_iter() + .for_each(|key| { + modified_storage.insert(B256::from_slice(&key.pack()), U256::from(1)); + }); + + // Update main hashed storage. + hashed_storage.extend(modified_storage.clone()); + post_state.extend(HashedPostState::default().with_storages([( + hashed_address, + HashedStorage::from_iter(false, modified_storage.clone()), + )])); + + let (storage_root, block1_updates) = compute_storage_root( + address, + factory.provider().unwrap().tx_ref(), + &post_state, + &TrieUpdates::default(), + ); + assert_eq!(storage_root, storage_root_prehashed(hashed_storage.clone())); + + // Block #2 + // Set 0x0fab0.. hashed slot to 0 + modified_storage.insert(B256::from_slice(&modified_entry3.pack()), U256::ZERO); + + // Update main hashed storage. + hashed_storage.remove(&B256::from_slice(&modified_entry3.pack())); + post_state.extend(HashedPostState::default().with_storages([( + hashed_address, + HashedStorage::from_iter(false, modified_storage.clone()), + )])); + + let (storage_root, block2_updates) = compute_storage_root( + address, + factory.provider().unwrap().tx_ref(), + &post_state, + &block1_updates, + ); + assert_eq!(storage_root, storage_root_prehashed(hashed_storage.clone())); + + // Commit trie updates + { + let mut updates = block1_updates; + updates.extend(block2_updates); + + let provider_rw = factory.provider_rw().unwrap(); + let mut hashed_storage_cursor = + provider_rw.tx_ref().cursor_dup_write::().unwrap(); + for (hashed_slot, value) in &hashed_storage { + hashed_storage_cursor + .upsert(hashed_address, StorageEntry { key: *hashed_slot, value: *value }) + .unwrap(); + } + updates.flush(provider_rw.tx_ref()).unwrap(); + provider_rw.commit().unwrap(); + } + + // Recompute storage root for block #3 + let storage_root = + StorageRoot::from_tx(factory.provider().unwrap().tx_ref(), address).root().unwrap(); + assert_eq!(storage_root, storage_root_prehashed(hashed_storage.clone())); + } + + fn compute_storage_root( + address: Address, + tx: &TX, + post_state: &HashedPostState, + update: &TrieUpdates, + ) -> (B256, TrieUpdates) { + let mut prefix_sets = post_state.construct_prefix_sets(); + let (root, _, updates) = StorageRoot::from_tx(tx, address) + .with_hashed_cursor_factory(HashedPostStateCursorFactory::new( + tx, + &post_state.clone().into_sorted(), + )) + .with_trie_cursor_factory(TrieUpdatesCursorFactory::new(tx, &update.sorted())) + .with_prefix_set(prefix_sets.storage_prefix_sets.remove(&keccak256(address)).unwrap()) + .root_with_updates() + .unwrap(); + (root, updates) + } } diff --git a/crates/trie/trie/src/trie_cursor/database_cursors.rs b/crates/trie/trie/src/trie_cursor/database_cursors.rs index 84cc69ce19254..a425c70f8c347 100644 --- a/crates/trie/trie/src/trie_cursor/database_cursors.rs +++ b/crates/trie/trie/src/trie_cursor/database_cursors.rs @@ -9,18 +9,22 @@ use reth_primitives::B256; /// Implementation of the trie cursor factory for a database transaction. impl<'a, TX: DbTx> TrieCursorFactory for &'a TX { - fn account_trie_cursor(&self) -> Result, DatabaseError> { - Ok(Box::new(DatabaseAccountTrieCursor::new(self.cursor_read::()?))) + type AccountTrieCursor = DatabaseAccountTrieCursor<::Cursor>; + type StorageTrieCursor = + DatabaseStorageTrieCursor<::DupCursor>; + + fn account_trie_cursor(&self) -> Result { + Ok(DatabaseAccountTrieCursor::new(self.cursor_read::()?)) } fn storage_trie_cursor( &self, hashed_address: B256, - ) -> Result, DatabaseError> { - Ok(Box::new(DatabaseStorageTrieCursor::new( + ) -> Result { + Ok(DatabaseStorageTrieCursor::new( self.cursor_dup_read::()?, hashed_address, - ))) + )) } } diff --git a/crates/trie/trie/src/trie_cursor/mod.rs b/crates/trie/trie/src/trie_cursor/mod.rs index e083be76411a2..f5f50a0d01518 100644 --- a/crates/trie/trie/src/trie_cursor/mod.rs +++ b/crates/trie/trie/src/trie_cursor/mod.rs @@ -3,6 +3,7 @@ use reth_db::DatabaseError; use reth_primitives::B256; mod database_cursors; mod subnode; +mod update; /// Noop trie cursor implementations. pub mod noop; @@ -10,18 +11,24 @@ pub mod noop; pub use self::{ database_cursors::{DatabaseAccountTrieCursor, DatabaseStorageTrieCursor}, subnode::CursorSubNode, + update::*, }; /// Factory for creating trie cursors. pub trait TrieCursorFactory { + /// The account trie cursor type. + type AccountTrieCursor: TrieCursor; + /// The storage trie cursor type. + type StorageTrieCursor: TrieCursor; + /// Create an account trie cursor. - fn account_trie_cursor(&self) -> Result, DatabaseError>; + fn account_trie_cursor(&self) -> Result; /// Create a storage tries cursor. fn storage_trie_cursor( &self, hashed_address: B256, - ) -> Result, DatabaseError>; + ) -> Result; } /// A cursor for navigating a trie that works with both Tables and DupSort tables. diff --git a/crates/trie/trie/src/trie_cursor/noop.rs b/crates/trie/trie/src/trie_cursor/noop.rs index 98c19216e655c..c55bdb80f2c54 100644 --- a/crates/trie/trie/src/trie_cursor/noop.rs +++ b/crates/trie/trie/src/trie_cursor/noop.rs @@ -9,17 +9,20 @@ use reth_primitives::B256; pub struct NoopTrieCursorFactory; impl TrieCursorFactory for NoopTrieCursorFactory { + type AccountTrieCursor = NoopAccountTrieCursor; + type StorageTrieCursor = NoopStorageTrieCursor; + /// Generates a Noop account trie cursor. - fn account_trie_cursor(&self) -> Result, DatabaseError> { - Ok(Box::::default()) + fn account_trie_cursor(&self) -> Result { + Ok(NoopAccountTrieCursor::default()) } /// Generates a Noop storage trie cursor. fn storage_trie_cursor( &self, _hashed_address: B256, - ) -> Result, DatabaseError> { - Ok(Box::::default()) + ) -> Result { + Ok(NoopStorageTrieCursor::default()) } } diff --git a/crates/trie/trie/src/trie_cursor/update.rs b/crates/trie/trie/src/trie_cursor/update.rs new file mode 100644 index 0000000000000..2ee62bd66b182 --- /dev/null +++ b/crates/trie/trie/src/trie_cursor/update.rs @@ -0,0 +1,191 @@ +use super::{TrieCursor, TrieCursorFactory}; +use crate::updates::{TrieKey, TrieOp, TrieUpdatesSorted}; +use reth_db::DatabaseError; +use reth_primitives::B256; +use reth_trie_common::{BranchNodeCompact, Nibbles, StoredNibbles, StoredNibblesSubKey}; + +/// The trie cursor factory for the trie updates. +#[derive(Debug, Clone)] +pub struct TrieUpdatesCursorFactory<'a, CF> { + cursor_factory: CF, + trie_updates: &'a TrieUpdatesSorted, +} + +impl<'a, CF> TrieUpdatesCursorFactory<'a, CF> { + /// Create a new trie cursor factory. + pub const fn new(cursor_factory: CF, trie_updates: &'a TrieUpdatesSorted) -> Self { + Self { cursor_factory, trie_updates } + } +} + +impl<'a, CF: TrieCursorFactory> TrieCursorFactory for TrieUpdatesCursorFactory<'a, CF> { + type AccountTrieCursor = TrieUpdatesAccountTrieCursor<'a, CF::AccountTrieCursor>; + type StorageTrieCursor = TrieUpdatesStorageTrieCursor<'a, CF::StorageTrieCursor>; + + fn account_trie_cursor(&self) -> Result { + let cursor = self.cursor_factory.account_trie_cursor()?; + Ok(TrieUpdatesAccountTrieCursor::new(cursor, self.trie_updates)) + } + + fn storage_trie_cursor( + &self, + hashed_address: B256, + ) -> Result { + let cursor = self.cursor_factory.storage_trie_cursor(hashed_address)?; + Ok(TrieUpdatesStorageTrieCursor::new(cursor, hashed_address, self.trie_updates)) + } +} + +/// The cursor to iterate over account trie updates and corresponding database entries. +/// It will always give precedence to the data from the trie updates. +#[derive(Debug)] +pub struct TrieUpdatesAccountTrieCursor<'a, C> { + cursor: C, + trie_updates: &'a TrieUpdatesSorted, + last_key: Option, +} + +impl<'a, C> TrieUpdatesAccountTrieCursor<'a, C> { + const fn new(cursor: C, trie_updates: &'a TrieUpdatesSorted) -> Self { + Self { cursor, trie_updates, last_key: None } + } +} + +impl<'a, C: TrieCursor> TrieCursor for TrieUpdatesAccountTrieCursor<'a, C> { + fn seek_exact( + &mut self, + key: Nibbles, + ) -> Result, DatabaseError> { + if let Some((trie_key, trie_op)) = self.trie_updates.find_account_node(&key) { + self.last_key = Some(trie_key); + match trie_op { + TrieOp::Update(node) => Ok(Some((key, node))), + TrieOp::Delete => Ok(None), + } + } else { + let result = self.cursor.seek_exact(key)?; + self.last_key = + result.as_ref().map(|(k, _)| TrieKey::AccountNode(StoredNibbles(k.clone()))); + Ok(result) + } + } + + fn seek( + &mut self, + key: Nibbles, + ) -> Result, DatabaseError> { + let stored_nibbles = StoredNibbles(key.clone()); + let trie_update_entry = self + .trie_updates + .trie_operations + .iter() + .find(|(k, _)| matches!(k, TrieKey::AccountNode(nibbles) if nibbles <= &stored_nibbles)) + .cloned(); + + if let Some((trie_key, trie_op)) = trie_update_entry { + let nibbles = match &trie_key { + TrieKey::AccountNode(nibbles) => nibbles.clone(), + _ => panic!("Invalid trie key"), + }; + self.last_key = Some(trie_key); + match trie_op { + TrieOp::Update(node) => return Ok(Some((nibbles.0, node))), + TrieOp::Delete => return Ok(None), + } + } + + let result = self.cursor.seek(key)?; + self.last_key = + result.as_ref().map(|(k, _)| TrieKey::AccountNode(StoredNibbles(k.clone()))); + Ok(result) + } + + fn current(&mut self) -> Result, DatabaseError> { + if self.last_key.is_some() { + Ok(self.last_key.clone()) + } else { + self.cursor.current() + } + } +} + +/// The cursor to iterate over storage trie updates and corresponding database entries. +/// It will always give precedence to the data from the trie updates. +#[derive(Debug)] +pub struct TrieUpdatesStorageTrieCursor<'a, C> { + cursor: C, + trie_update_index: usize, + trie_updates: &'a TrieUpdatesSorted, + hashed_address: B256, + last_key: Option, +} + +impl<'a, C> TrieUpdatesStorageTrieCursor<'a, C> { + const fn new(cursor: C, hashed_address: B256, trie_updates: &'a TrieUpdatesSorted) -> Self { + Self { cursor, trie_updates, trie_update_index: 0, hashed_address, last_key: None } + } +} + +impl<'a, C: TrieCursor> TrieCursor for TrieUpdatesStorageTrieCursor<'a, C> { + fn seek_exact( + &mut self, + key: Nibbles, + ) -> Result, DatabaseError> { + if let Some((trie_key, trie_op)) = + self.trie_updates.find_storage_node(&self.hashed_address, &key) + { + self.last_key = Some(trie_key); + match trie_op { + TrieOp::Update(node) => Ok(Some((key, node))), + TrieOp::Delete => Ok(None), + } + } else { + let result = self.cursor.seek_exact(key)?; + self.last_key = result.as_ref().map(|(k, _)| { + TrieKey::StorageNode(self.hashed_address, StoredNibblesSubKey(k.clone())) + }); + Ok(result) + } + } + + fn seek( + &mut self, + key: Nibbles, + ) -> Result, DatabaseError> { + let mut trie_update_entry = self.trie_updates.trie_operations.get(self.trie_update_index); + while trie_update_entry + .filter(|(k, _)| matches!(k, TrieKey::StorageNode(address, nibbles) if address == &self.hashed_address && nibbles.0 < key)).is_some() + { + self.trie_update_index += 1; + trie_update_entry = self.trie_updates.trie_operations.get(self.trie_update_index); + } + + if let Some((trie_key, trie_op)) = + trie_update_entry.filter(|(k, _)| matches!(k, TrieKey::StorageNode(_, _))) + { + let nibbles = match trie_key { + TrieKey::StorageNode(_, nibbles) => nibbles.clone(), + _ => panic!("this should not happen!"), + }; + self.last_key = Some(trie_key.clone()); + match trie_op { + TrieOp::Update(node) => return Ok(Some((nibbles.0, node.clone()))), + TrieOp::Delete => return Ok(None), + } + } + + let result = self.cursor.seek(key)?; + self.last_key = result.as_ref().map(|(k, _)| { + TrieKey::StorageNode(self.hashed_address, StoredNibblesSubKey(k.clone())) + }); + Ok(result) + } + + fn current(&mut self) -> Result, DatabaseError> { + if self.last_key.is_some() { + Ok(self.last_key.clone()) + } else { + self.cursor.current() + } + } +} diff --git a/crates/trie/trie/src/updates.rs b/crates/trie/trie/src/updates.rs index 53830fd8a3c21..ac57978551334 100644 --- a/crates/trie/trie/src/updates.rs +++ b/crates/trie/trie/src/updates.rs @@ -222,4 +222,46 @@ impl TrieUpdates { Ok(()) } + + /// creates [`TrieUpdatesSorted`] by sorting the `trie_operations`. + pub fn sorted(&self) -> TrieUpdatesSorted { + let mut trie_operations = Vec::from_iter(self.trie_operations.clone()); + trie_operations.sort_unstable_by(|a, b| a.0.cmp(&b.0)); + TrieUpdatesSorted { trie_operations } + } + + /// converts trie updates into [`TrieUpdatesSorted`]. + pub fn into_sorted(self) -> TrieUpdatesSorted { + let mut trie_operations = Vec::from_iter(self.trie_operations); + trie_operations.sort_unstable_by(|a, b| a.0.cmp(&b.0)); + TrieUpdatesSorted { trie_operations } + } +} + +/// The aggregation of trie updates. +#[derive(Debug, Default, Clone, PartialEq, Eq, Deref)] +pub struct TrieUpdatesSorted { + /// Sorted collection of trie operations. + pub(crate) trie_operations: Vec<(TrieKey, TrieOp)>, +} + +impl TrieUpdatesSorted { + /// Find the account node with the given nibbles. + pub fn find_account_node(&self, key: &Nibbles) -> Option<(TrieKey, TrieOp)> { + self.trie_operations + .iter() + .find(|(k, _)| matches!(k, TrieKey::AccountNode(nibbles) if &nibbles.0 == key)) + .cloned() + } + + /// Find the storage node with the given hashed address and key. + pub fn find_storage_node( + &self, + hashed_address: &B256, + key: &Nibbles, + ) -> Option<(TrieKey, TrieOp)> { + self.trie_operations.iter().find(|(k, _)| { + matches!(k, TrieKey::StorageNode(address, nibbles) if address == hashed_address && &nibbles.0 == key) + }).cloned() + } }