Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Trie (non)-membership proofs #7509

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions core/primitives/src/views.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,6 @@ impl From<AccessKeyView> for AccessKey {
}
}

/// Set of serialized TrieNodes that are encoded in base64. Represent proof of inclusion of some TrieNode in the MerkleTrie.
pub type TrieProofPath = Vec<String>;

/// Item of the state, key and value are serialized in base64 and proof for inclusion of given state item.
#[cfg_attr(feature = "deepsize_feature", derive(deepsize::DeepSizeOf))]
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
Expand All @@ -204,14 +201,18 @@ pub struct StateItem {
pub key: Vec<u8>,
#[serde(with = "base64_format")]
pub value: Vec<u8>,
pub proof: TrieProofPath,
/// Deprecated, always empty, eventually will be deleted.
// TODO(mina86): This was deprecated in 1.30. Get rid of the field
// altogether at 1.33 or something.
#[serde(default)]
pub proof: Vec<()>,
}

#[cfg_attr(feature = "deepsize_feature", derive(deepsize::DeepSizeOf))]
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
pub struct ViewStateResult {
pub values: Vec<StateItem>,
pub proof: TrieProofPath,
pub proof: Vec<Arc<[u8]>>,
blasrodri marked this conversation as resolved.
Show resolved Hide resolved
}

#[cfg_attr(feature = "deepsize_feature", derive(deepsize::DeepSizeOf))]
Expand Down
6 changes: 3 additions & 3 deletions core/store/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ use crate::db::{
pub use crate::trie::iterator::TrieIterator;
pub use crate::trie::update::{TrieUpdate, TrieUpdateIterator, TrieUpdateValuePtr};
pub use crate::trie::{
estimator, split_state, ApplyStatePartResult, KeyForStateChanges, PartialStorage, ShardTries,
Trie, TrieAccess, TrieCache, TrieCacheFactory, TrieCachingStorage, TrieChanges, TrieStorage,
WrappedTrieChanges,
estimator, split_state, ApplyStatePartResult, KeyForStateChanges, NibbleSlice, PartialStorage,
RawTrieNode, RawTrieNodeWithSize, ShardTries, Trie, TrieAccess, TrieCache, TrieCacheFactory,
TrieCachingStorage, TrieChanges, TrieNodeWithSize, TrieStorage, WrappedTrieChanges,
};

mod columns;
Expand Down
31 changes: 29 additions & 2 deletions core/store/src/trie/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,9 @@ pub struct TrieIterator<'a> {
trie: &'a Trie,
trail: Vec<Crumb>,
pub(crate) key_nibbles: Vec<u8>,

/// If not `None`, a list of all nodes that the iterator has visited.
visited_nodes: Option<Vec<std::sync::Arc<[u8]>>>,
}

pub type TrieItem = (Vec<u8>, Vec<u8>);
Expand All @@ -56,6 +59,7 @@ impl<'a> TrieIterator<'a> {
trie,
trail: Vec::with_capacity(8),
key_nibbles: Vec::with_capacity(64),
visited_nodes: None,
};
r.descend_into_node(&trie.root)?;
Ok(r)
Expand All @@ -65,6 +69,23 @@ impl<'a> TrieIterator<'a> {
pub fn seek<K: AsRef<[u8]>>(&mut self, key: K) -> Result<(), StorageError> {
self.seek_nibble_slice(NibbleSlice::new(key.as_ref())).map(drop)
}
/// Configures whether the iterator should remember all the nodes its
/// visiting.
///
/// Use [`Self::into_visited_nodes`] to retrieve the list.
pub fn remember_visited_nodes(&mut self, remember: bool) {
self.visited_nodes = remember.then(|| Vec::new());
}

/// Consumes iterator and returns list of nodes it’s visited.
///
/// By default the iterator *doesn’t* remember nodes it visits. To enable
/// that feature use [`Self::remember_visited_nodes`] method. If the
/// feature is disabled, this method returns an empty list. Otherwise
/// it returns list of nodes visited since the feature was enabled.
pub fn into_visited_nodes(self) -> Vec<std::sync::Arc<[u8]>> {
self.visited_nodes.unwrap_or(Vec::new())
}

/// Returns the hash of the last node
pub(crate) fn seek_nibble_slice(
Expand Down Expand Up @@ -124,9 +145,15 @@ impl<'a> TrieIterator<'a> {

/// Fetches block by its hash and adds it to the trail.
///
/// The node is stored as the last [`Crumb`] in the trail.
/// The node is stored as the last [`Crumb`] in the trail. If iterator is
/// configured to remember all the nodes its visiting (which can be enabled
/// with [`Self::remember_visited_nodes`]), the node will be added to the
/// list.
fn descend_into_node(&mut self, hash: &CryptoHash) -> Result<(), StorageError> {
let node = self.trie.retrieve_node(hash)?;
let (bytes, node) = self.trie.retrieve_node(hash)?;
if let (Some(bytes), Some(nodes)) = (bytes, &mut self.visited_nodes) {
nodes.push(bytes);
}
self.trail.push(Crumb { status: CrumbStatus::Entering, node });
Ok(())
}
Expand Down
29 changes: 19 additions & 10 deletions core/store/src/trie/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use near_primitives::types::{StateRoot, StateRootNode};
use crate::flat_state::FlatState;
use crate::trie::insert_delete::NodesStorage;
use crate::trie::iterator::TrieIterator;
use crate::trie::nibble_slice::NibbleSlice;
pub use crate::trie::nibble_slice::NibbleSlice;
pub use crate::trie::shard_tries::{
KeyForStateChanges, ShardTries, TrieCacheFactory, WrappedTrieChanges,
};
Expand Down Expand Up @@ -257,7 +257,7 @@ impl TrieNode {

#[derive(Debug, Eq, PartialEq)]
#[allow(clippy::large_enum_variant)]
enum RawTrieNode {
pub enum RawTrieNode {
Leaf(Vec<u8>, u32, CryptoHash),
Branch([Option<CryptoHash>; 16], Option<(u32, CryptoHash)>),
Extension(Vec<u8>, CryptoHash),
Expand All @@ -266,8 +266,8 @@ enum RawTrieNode {
/// Trie node + memory cost of its subtree
/// memory_usage is serialized, stored, and contributes to hash
#[derive(Debug, Eq, PartialEq)]
struct RawTrieNodeWithSize {
node: RawTrieNode,
pub struct RawTrieNodeWithSize {
pub node: RawTrieNode,
memory_usage: u64,
}

Expand Down Expand Up @@ -344,7 +344,7 @@ impl RawTrieNode {
out
}

fn decode(bytes: &[u8]) -> Result<Self, std::io::Error> {
pub fn decode(bytes: &[u8]) -> Result<Self, std::io::Error> {
let mut cursor = Cursor::new(bytes);
match cursor.read_u8()? {
LEAF_NODE => {
Expand Down Expand Up @@ -394,7 +394,7 @@ impl RawTrieNodeWithSize {
out
}

fn decode(bytes: &[u8]) -> Result<Self, std::io::Error> {
pub fn decode(bytes: &[u8]) -> Result<Self, std::io::Error> {
if bytes.len() < 8 {
return Err(std::io::Error::new(std::io::ErrorKind::Other, "Wrong type"));
}
Expand Down Expand Up @@ -533,7 +533,7 @@ impl Trie {
}
let TrieNodeWithSize { node, memory_usage } = match handle {
NodeHandle::InMemory(h) => memory.node_ref(h).clone(),
NodeHandle::Hash(h) => self.retrieve_node(&h).expect("storage failure"),
NodeHandle::Hash(h) => self.retrieve_node(&h).expect("storage failure").1,
};

let mut memory_usage_naive = node.memory_usage_direct(memory);
Expand Down Expand Up @@ -615,10 +615,19 @@ impl Trie {
}
}

fn retrieve_node(&self, hash: &CryptoHash) -> Result<TrieNodeWithSize, StorageError> {
/// Retrieves decoded node alongside with its raw bytes representation.
///
/// Note that because Empty nodes (those which are referenced by
/// [`Self::EMPTY_ROOT`] hash) aren’t stored in the database, they don’t
/// have a bytes representation. For those nodes the first return value
/// will be `None`.
fn retrieve_node(
&self,
hash: &CryptoHash,
) -> Result<(Option<std::sync::Arc<[u8]>>, TrieNodeWithSize), StorageError> {
match self.retrieve_raw_node(hash)? {
None => Ok(TrieNodeWithSize::empty()),
Some((_bytes, node)) => Ok(TrieNodeWithSize::from_raw(node)),
None => Ok((None, TrieNodeWithSize::empty())),
Some((bytes, node)) => Ok((Some(bytes), TrieNodeWithSize::from_raw(node))),
}
}
Comment on lines -618 to 632
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be best to push this change as separate PR. That would make this PR smaller and easier to understand.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm unsure of what to do here.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It’s just a matter of separating this diff hunk and all the places where retrieve_node is called into a new commit and submitting as separate PR. It will get this PR smaller.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it's not super trivial as far as I see. Need to also modify descend_into_node. Can't we just keep it as it is?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be rather simple mechanical change. In that separate PR, the change to desdend_into_node will be change from self.trie.retrieve_node(hash)? to self.trie.retrieve_node(hash)?.1.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That will happen in this PR.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sorry @mina86 but I'm truly not following you on this one

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are three separate PRs here:

  1. PR changing retrieve_node to return (bytes, TrieNodeWithSize). All that PR does is changes the return type and updates all the call sites to do retrieve_node(...)?.1. This is quite mechanical and boring change but thanks to that it should be easy to write, review and merge. This PR is not concerned with the overall feature.
  2. PR changing TrieIterator::seek to TrieIterator::seek_prefix and makes changes the iterator such that it stops once everything within the prefix has been traversed. This can be done by adding a prefix_boundary: bool to Crumb and changing Crumb::increment such that if that flag is set, it always moves to Exiting status. The purpose of this change is to make sure that when iterating over a prefix, the iterator does not fetch spurious nodes. Currently I believe that returned proof includes unnecessary nodes. There may be some smarter way of dealing with this but I’m not seeing it just now.
  3. Once 1 and 2 are done, this PR can be rebased and will be ready to go.

1 and 2 can be done in parallel.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For (2) I've created ComposableFi#11 Can you please review it there?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For (1) #7535


Expand Down
16 changes: 8 additions & 8 deletions core/store/src/trie/state_parts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ impl Trie {
if part_id == num_parts {
return Ok(vec![16]);
}
let root_node = self.retrieve_node(&self.root)?;
let (_bytes, root_node) = self.retrieve_node(&self.root)?;
let total_size = root_node.memory_usage;
let size_start = (total_size + num_parts - 1) / num_parts * part_id;
self.find_path(&root_node, size_start)
Expand Down Expand Up @@ -106,7 +106,7 @@ impl Trie {
Some(NodeHandle::InMemory(_)) => {
unreachable!("only possible while mutating")
}
Some(NodeHandle::Hash(h)) => self.retrieve_node(h)?,
Some(NodeHandle::Hash(h)) => self.retrieve_node(h)?.1,
};
if *size_skipped + child.memory_usage <= size_start {
*size_skipped += child.memory_usage;
Expand Down Expand Up @@ -136,7 +136,7 @@ impl Trie {
TrieNode::Extension(key, child_handle) => {
let child = match child_handle {
NodeHandle::InMemory(_) => unreachable!("only possible while mutating"),
NodeHandle::Hash(h) => self.retrieve_node(h)?,
NodeHandle::Hash(h) => self.retrieve_node(h)?.1,
};
let (slice, _is_leaf) = NibbleSlice::from_encoded(key);
key_nibbles.extend(slice.iter());
Expand Down Expand Up @@ -312,7 +312,7 @@ mod tests {
return Ok(());
}
let mut stack: Vec<(CryptoHash, TrieNodeWithSize, CrumbStatus)> = Vec::new();
let root_node = self.retrieve_node(&self.root)?;
let (_bytes, root_node) = self.retrieve_node(&self.root)?;
stack.push((self.root.clone(), root_node, CrumbStatus::Entering));
while let Some((hash, node, position)) = stack.pop() {
if let CrumbStatus::Entering = position {
Expand Down Expand Up @@ -353,7 +353,7 @@ mod tests {
}
if i < 16 {
if let Some(NodeHandle::Hash(h)) = children[i].clone() {
let child = self.retrieve_node(&h)?;
let (_bytes, child) = self.retrieve_node(&h)?;
stack.push((hash, node, CrumbStatus::AtChild(i + 1)));
stack.push((h, child, CrumbStatus::Entering));
} else {
Expand All @@ -377,7 +377,7 @@ mod tests {
unreachable!("only possible while mutating")
}
NodeHandle::Hash(h) => {
let child = self.retrieve_node(&h)?;
let (_bytes, child) = self.retrieve_node(&h)?;
stack.push((hash, node, CrumbStatus::Exiting));
stack.push((h, child, CrumbStatus::Entering));
}
Expand All @@ -394,7 +394,7 @@ mod tests {
size_start: u64,
size_end: u64,
) -> Result<(), StorageError> {
let root_node = self.retrieve_node(&self.root)?;
let (_bytes, root_node) = self.retrieve_node(&self.root)?;
let path_begin = self.find_path(&root_node, size_start)?;
let path_end = self.find_path(&root_node, size_end)?;

Expand Down Expand Up @@ -424,7 +424,7 @@ mod tests {
part_id: PartId,
) -> Result<PartialState, StorageError> {
assert!(self.storage.as_caching_storage().is_some());
let root_node = self.retrieve_node(&self.root)?;
let (_bytes, root_node) = self.retrieve_node(&self.root)?;
let total_size = root_node.memory_usage;
let size_start = (total_size + part_id.total - 1) / part_id.total * part_id.idx;
let size_end = std::cmp::min(
Expand Down
Loading