Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

update seek_prefix #7579

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions core/primitives/src/views.rs
Original file line number Diff line number Diff line change
Expand Up @@ -194,9 +194,8 @@ impl From<AccessKeyView> for AccessKey {
}
}

/// Set of serialized TrieNodes that are encoded in base64. Represent proof of inclusion of some TrieNode in the MerkleTrie.
pub type TrieProofPath = Vec<String>;

/// Set of serialized encoded TrieNodes. Represent proof of inclusion of some TrieNode in the MerkleTrie.
pub type TrieProofPath = Vec<Arc<[u8]>>;
/// Item of the state, key and value are serialized in base64 and proof for inclusion of given state item.
#[cfg_attr(feature = "deepsize_feature", derive(deepsize::DeepSizeOf))]
#[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)]
Expand Down
6 changes: 3 additions & 3 deletions core/store/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ use crate::db::{
pub use crate::trie::iterator::TrieIterator;
pub use crate::trie::update::{TrieUpdate, TrieUpdateIterator, TrieUpdateValuePtr};
pub use crate::trie::{
estimator, split_state, ApplyStatePartResult, KeyForStateChanges, PartialStorage, ShardTries,
Trie, TrieAccess, TrieCache, TrieCachingStorage, TrieChanges, TrieConfig, TrieStorage,
WrappedTrieChanges,
estimator, split_state, ApplyStatePartResult, KeyForStateChanges, NibbleSlice, PartialStorage,
RawTrieNode, RawTrieNodeWithSize, ShardTries, Trie, TrieAccess, TrieCache, TrieCachingStorage,
TrieChanges, TrieNodeWithSize, TrieStorage, WrappedTrieChanges,
};

mod columns;
Expand Down
95 changes: 77 additions & 18 deletions core/store/src/trie/iterator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ use crate::{StorageError, Trie};
struct Crumb {
node: TrieNodeWithSize,
status: CrumbStatus,
prefix_boundary: bool,
}

#[derive(Clone, Copy, Eq, PartialEq, Debug)]
Expand All @@ -20,6 +21,10 @@ pub(crate) enum CrumbStatus {

impl Crumb {
fn increment(&mut self) {
if self.prefix_boundary {
self.status = CrumbStatus::Exiting;
return;
}
self.status = match (&self.status, &self.node.node) {
(_, &TrieNode::Empty) => CrumbStatus::Exiting,
(&CrumbStatus::Entering, _) => CrumbStatus::At,
Expand All @@ -36,6 +41,9 @@ pub struct TrieIterator<'a> {
trie: &'a Trie,
trail: Vec<Crumb>,
pub(crate) key_nibbles: Vec<u8>,

/// If not `None`, a list of all nodes that the iterator has visited.
visited_nodes: Option<Vec<std::sync::Arc<[u8]>>>,
}

pub type TrieItem = (Vec<u8>, Vec<u8>);
Expand All @@ -56,32 +64,68 @@ impl<'a> TrieIterator<'a> {
trie,
trail: Vec::with_capacity(8),
key_nibbles: Vec::with_capacity(64),
visited_nodes: None,
};
r.descend_into_node(&trie.root)?;
Ok(r)
}

/// Position the iterator on the first element with key => `key`.
pub fn seek<K: AsRef<[u8]>>(&mut self, key: K) -> Result<(), StorageError> {
self.seek_nibble_slice(NibbleSlice::new(key.as_ref())).map(drop)
pub fn seek_prefix<K: AsRef<[u8]>>(&mut self, key: K) -> Result<(), StorageError> {
self.seek_nibble_slice(NibbleSlice::new(key.as_ref()), true).map(drop)
}
/// Configures whether the iterator should remember all the nodes its
/// visiting.
///
/// Use [`Self::into_visited_nodes`] to retrieve the list.
pub fn remember_visited_nodes(&mut self, remember: bool) {
self.visited_nodes = remember.then(|| Vec::new());
}

/// Consumes iterator and returns list of nodes it’s visited.
///
/// By default the iterator *doesn’t* remember nodes it visits. To enable
/// that feature use [`Self::remember_visited_nodes`] method. If the
/// feature is disabled, this method returns an empty list. Otherwise
/// it returns list of nodes visited since the feature was enabled.
pub fn into_visited_nodes(self) -> Vec<std::sync::Arc<[u8]>> {
self.visited_nodes.unwrap_or(Vec::new())
}

/// Returns the hash of the last node
pub(crate) fn seek_nibble_slice(
&mut self,
mut key: NibbleSlice<'_>,
is_prefix_seek: bool,
) -> Result<CryptoHash, StorageError> {
self.trail.clear();
self.key_nibbles.clear();
// Checks if a key in an extension or leaf matches our search query.
//
// When doing prefix seek, this checks whether `key` is a prefix of
// `ext_key`. When doing regular range seek, this checks whether `key`
// is no greater than `ext_key`. If those conditions aren’t met, the
// node with `ext_key` should not match our query.
let check_ext_key = |key: &NibbleSlice, ext_key: &NibbleSlice| {
if is_prefix_seek {
ext_key.starts_with(key)
} else {
ext_key >= key
}
};

let mut hash = self.trie.root;
let mut prev_prefix_boundary = &mut false;
loop {
*prev_prefix_boundary = is_prefix_seek;
self.descend_into_node(&hash)?;
let Crumb { status, node } = self.trail.last_mut().unwrap();
let Crumb { status, node, prefix_boundary } = self.trail.last_mut().unwrap();
prev_prefix_boundary = prefix_boundary;
match &node.node {
TrieNode::Empty => break,
TrieNode::Leaf(leaf_key, _) => {
let existing_key = NibbleSlice::from_encoded(leaf_key).0;
if existing_key < key {
if !check_ext_key(&key, &existing_key) {
self.key_nibbles.extend(existing_key.iter());
*status = CrumbStatus::Exiting;
}
Expand All @@ -98,6 +142,7 @@ impl<'a> TrieIterator<'a> {
hash = *child.unwrap_hash();
key = key.mid(1);
} else {
*prefix_boundary = is_prefix_seek;
break;
}
}
Expand All @@ -110,7 +155,7 @@ impl<'a> TrieIterator<'a> {
*status = CrumbStatus::At;
self.key_nibbles.extend(existing_key.iter());
} else {
if existing_key < key {
if !check_ext_key(&key, &existing_key) {
*status = CrumbStatus::Exiting;
self.key_nibbles.extend(existing_key.iter());
}
Expand All @@ -124,10 +169,16 @@ impl<'a> TrieIterator<'a> {

/// Fetches block by its hash and adds it to the trail.
///
/// The node is stored as the last [`Crumb`] in the trail.
/// The node is stored as the last [`Crumb`] in the trail. If iterator is
/// configured to remember all the nodes its visiting (which can be enabled
/// with [`Self::remember_visited_nodes`]), the node will be added to the
/// list.
fn descend_into_node(&mut self, hash: &CryptoHash) -> Result<(), StorageError> {
let node = self.trie.retrieve_node(hash)?.1;
self.trail.push(Crumb { status: CrumbStatus::Entering, node });
let (bytes, node) = self.trie.retrieve_node(hash)?;
if let (Some(bytes), Some(nodes)) = (bytes, &mut self.visited_nodes) {
nodes.push(bytes);
}
self.trail.push(Crumb { status: CrumbStatus::Entering, node, prefix_boundary: false });
Ok(())
}

Expand Down Expand Up @@ -227,7 +278,7 @@ impl<'a> TrieIterator<'a> {
path_end: &[u8],
) -> Result<Vec<TrieItem>, StorageError> {
let path_begin_encoded = NibbleSlice::encode_nibbles(path_begin, false);
self.seek_nibble_slice(NibbleSlice::from_encoded(&path_begin_encoded).0)?;
self.seek_nibble_slice(NibbleSlice::from_encoded(&path_begin_encoded).0, false)?;

let mut trie_items = vec![];
for item in self {
Expand All @@ -250,7 +301,8 @@ impl<'a> TrieIterator<'a> {
path_end: &[u8],
) -> Result<Vec<TrieTraversalItem>, StorageError> {
let path_begin_encoded = NibbleSlice::encode_nibbles(path_begin, true);
let last_hash = self.seek_nibble_slice(NibbleSlice::from_encoded(&path_begin_encoded).0)?;
let last_hash =
self.seek_nibble_slice(NibbleSlice::from_encoded(&path_begin_encoded).0, false)?;
let mut prefix = Self::common_prefix(path_end, &self.key_nibbles);
if self.key_nibbles[prefix..] >= path_end[prefix..] {
return Ok(vec![]);
Expand Down Expand Up @@ -371,15 +423,15 @@ mod tests {
let result2: Vec<_> = map.iter().map(|(k, v)| (k.clone(), v.clone())).collect();
assert_eq!(result1, result2);
}
test_seek(&trie, &map, &[]);
test_seek_prefix(&trie, &map, &[]);

let empty_vec = vec![];
let max_key = map.keys().max().unwrap_or(&empty_vec);
let min_key = map.keys().min().unwrap_or(&empty_vec);
test_get_trie_items(&trie, &map, &[], &[]);
test_get_trie_items(&trie, &map, min_key, max_key);
for (seek_key, _) in trie_changes.iter() {
test_seek(&trie, &map, seek_key);
test_seek_prefix(&trie, &map, seek_key);
test_get_trie_items(&trie, &map, min_key, seek_key);
test_get_trie_items(&trie, &map, seek_key, max_key);
}
Expand All @@ -388,7 +440,7 @@ mod tests {
let key_length = rng.gen_range(1, 8);
let seek_key: Vec<u8> =
(0..key_length).map(|_| *alphabet.choose(&mut rng).unwrap()).collect();
test_seek(&trie, &map, &seek_key);
test_seek_prefix(&trie, &map, &seek_key);

let seek_key2: Vec<u8> =
(0..key_length).map(|_| *alphabet.choose(&mut rng).unwrap()).collect();
Expand Down Expand Up @@ -422,12 +474,19 @@ mod tests {
assert_eq!(result1, result2);
}

fn test_seek(trie: &Trie, map: &BTreeMap<Vec<u8>, Vec<u8>>, seek_key: &[u8]) {
fn test_seek_prefix(trie: &Trie, map: &BTreeMap<Vec<u8>, Vec<u8>>, seek_key: &[u8]) {
let mut iterator = trie.iter().unwrap();
iterator.seek(&seek_key).unwrap();
let result1: Vec<_> = iterator.map(Result::unwrap).take(5).collect();
let result2: Vec<_> =
map.range(seek_key.to_vec()..).map(|(k, v)| (k.clone(), v.clone())).take(5).collect();
iterator.seek_prefix(&seek_key).unwrap();
let iterator = iterator.map(Result::unwrap).inspect(|(key, _)| {
assert!(key.starts_with(seek_key), "‘{key:x?}’ does not start with ‘{seek_key:x?}’");
});
let result1: Vec<_> = iterator.take(5).collect();
let result2: Vec<_> = map
.range(seek_key.to_vec()..)
.map(|(k, v)| (k.clone(), v.clone()))
.take(5)
.filter(|(x, _)| x.starts_with(seek_key))
.collect();
assert_eq!(result1, result2);
}

Expand Down
37 changes: 23 additions & 14 deletions core/store/src/trie/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ use crate::flat_state::FlatState;
pub use crate::trie::config::TrieConfig;
use crate::trie::insert_delete::NodesStorage;
use crate::trie::iterator::TrieIterator;
use crate::trie::nibble_slice::NibbleSlice;
pub use crate::trie::nibble_slice::NibbleSlice;
pub use crate::trie::shard_tries::{KeyForStateChanges, ShardTries, WrappedTrieChanges};
pub use crate::trie::trie_storage::{TrieCache, TrieCachingStorage, TrieStorage};
use crate::trie::trie_storage::{TrieMemoryPartialStorage, TrieRecordingStorage};
Expand Down Expand Up @@ -309,7 +309,7 @@ impl std::fmt::Debug for TrieNode {

#[derive(Debug, Eq, PartialEq)]
#[allow(clippy::large_enum_variant)]
enum RawTrieNode {
pub enum RawTrieNode {
Leaf(Vec<u8>, u32, CryptoHash),
Branch([Option<CryptoHash>; 16], Option<(u32, CryptoHash)>),
Extension(Vec<u8>, CryptoHash),
Expand All @@ -318,8 +318,8 @@ enum RawTrieNode {
/// Trie node + memory cost of its subtree
/// memory_usage is serialized, stored, and contributes to hash
#[derive(Debug, Eq, PartialEq)]
struct RawTrieNodeWithSize {
node: RawTrieNode,
pub struct RawTrieNodeWithSize {
pub node: RawTrieNode,
memory_usage: u64,
}

Expand Down Expand Up @@ -396,7 +396,7 @@ impl RawTrieNode {
out
}

fn decode(bytes: &[u8]) -> Result<Self, std::io::Error> {
pub fn decode(bytes: &[u8]) -> Result<Self, std::io::Error> {
let mut cursor = Cursor::new(bytes);
match cursor.read_u8()? {
LEAF_NODE => {
Expand Down Expand Up @@ -446,7 +446,7 @@ impl RawTrieNodeWithSize {
out
}

fn decode(bytes: &[u8]) -> Result<Self, std::io::Error> {
pub fn decode(bytes: &[u8]) -> Result<Self, std::io::Error> {
if bytes.len() < 8 {
return Err(std::io::Error::new(std::io::ErrorKind::Other, "Wrong type"));
}
Expand Down Expand Up @@ -946,9 +946,18 @@ mod tests {
}
assert_eq!(pairs, iter_pairs);

let assert_has_next = |want, other_iter: &mut TrieIterator| {
assert_eq!(Some(want), other_iter.next().map(|item| item.unwrap().0).as_deref());
};

let mut other_iter = trie.iter().unwrap();
other_iter.seek(b"r").unwrap();
assert_eq!(other_iter.next().unwrap().unwrap().0, b"x".to_vec());
other_iter.seek_prefix(b"r").unwrap();
assert_eq!(other_iter.next(), None);
other_iter.seek_prefix(b"x").unwrap();
assert_has_next(b"x", &mut other_iter);
assert_eq!(other_iter.next(), None);
other_iter.seek_prefix(b"y").unwrap();
assert_has_next(b"y", &mut other_iter);
}

#[test]
Expand Down Expand Up @@ -1000,13 +1009,13 @@ mod tests {
let root = test_populate_trie(&tries, &Trie::EMPTY_ROOT, ShardUId::single_shard(), changes);
let trie = tries.get_trie_for_shard(ShardUId::single_shard(), root.clone());
let mut iter = trie.iter().unwrap();
iter.seek(&vec![0, 116, 101, 115, 116, 44]).unwrap();
iter.seek_prefix(&vec![0, 116, 101, 115, 116, 44]).unwrap();
let mut pairs = vec![];
for pair in iter {
pairs.push(pair.unwrap().0);
}
assert_eq!(
pairs[..2],
pairs,
[
vec![
0, 116, 101, 115, 116, 44, 98, 97, 108, 97, 110, 99, 101, 115, 58, 98, 111, 98,
Expand Down Expand Up @@ -1088,7 +1097,7 @@ mod tests {
}

#[test]
fn test_iterator_seek() {
fn test_iterator_seek_prefix() {
let mut rng = rand::thread_rng();
for _test_run in 0..10 {
let tries = create_tries();
Expand All @@ -1106,7 +1115,7 @@ mod tests {
if let Some(value) = value {
let want = Some(Ok((key.clone(), value)));
let mut iterator = trie.iter().unwrap();
iterator.seek(&key).unwrap();
iterator.seek_prefix(&key).unwrap();
assert_eq!(want, iterator.next(), "key: {key:x?}");
}
}
Expand All @@ -1115,9 +1124,9 @@ mod tests {
let queries = gen_changes(&mut rng, 500).into_iter().map(|(key, _)| key);
for query in queries {
let mut iterator = trie.iter().unwrap();
iterator.seek(&query).unwrap();
iterator.seek_prefix(&query).unwrap();
if let Some(Ok((key, _))) = iterator.next() {
assert!(key >= query);
assert!(key.starts_with(&query), "‘{key:x?}’ does not start with ‘{query:x?}’");
}
}
}
Expand Down
Loading