From 0f247ebd94a6b6a634aa2a1883abb87b34caf8d4 Mon Sep 17 00:00:00 2001 From: creativcoder <5155745+creativcoder@users.noreply.github.com> Date: Thu, 3 Aug 2023 05:10:47 +0530 Subject: [PATCH] Add support for HamtV0 from forest(commit hash: b622af5a6) (#1808) More context: https://github.com/ChainSafe/forest/pull/3060#pullrequestreview-1503177969 --- ipld/hamt/src/hamt.rs | 22 +++++-- ipld/hamt/src/lib.rs | 2 +- ipld/hamt/src/node.rs | 34 ++++++----- ipld/hamt/src/pointer.rs | 112 +++++++++++++++++++++++++++++----- ipld/hamt/tests/hamt_tests.rs | 18 +++++- 5 files changed, 151 insertions(+), 37 deletions(-) diff --git a/ipld/hamt/src/hamt.rs b/ipld/hamt/src/hamt.rs index fbe3332d1..8fbc5a586 100644 --- a/ipld/hamt/src/hamt.rs +++ b/ipld/hamt/src/hamt.rs @@ -15,7 +15,8 @@ use serde::{Serialize, Serializer}; use crate::hash_bits::HashBits; use crate::node::Node; -use crate::{Config, Error, Hash, HashAlgorithm, Sha256}; +use crate::pointer::version::Version; +use crate::{pointer::version, Config, Error, Hash, HashAlgorithm, Sha256}; /// Implementation of the HAMT data structure for IPLD. /// @@ -33,9 +34,14 @@ use crate::{Config, Error, Hash, HashAlgorithm, Sha256}; /// assert_eq!(map.get::<_>(&1).unwrap(), None); /// let cid = map.flush().unwrap(); /// ``` +pub type Hamt = HamtImpl; +/// Legacy amt V0 +pub type Hamtv0 = HamtImpl; + #[derive(Debug)] -pub struct Hamt { - root: Node, +#[doc(hidden)] +pub struct HamtImpl { + root: Node, store: BS, conf: Config, hash: PhantomData, @@ -43,11 +49,12 @@ pub struct Hamt { flushed_cid: Option, } -impl Serialize for Hamt +impl Serialize for HamtImpl where K: Serialize, V: Serialize, H: HashAlgorithm, + Ver: Version, { fn serialize(&self, serializer: S) -> Result where @@ -57,17 +64,20 @@ where } } -impl PartialEq for Hamt { +impl PartialEq + for HamtImpl +{ fn eq(&self, other: &Self) -> bool { self.root == other.root } } -impl Hamt +impl HamtImpl where K: Hash + Eq + PartialOrd + Serialize + DeserializeOwned, V: Serialize + DeserializeOwned, BS: Blockstore, + Ver: Version, H: HashAlgorithm, { pub fn new(store: BS) -> Self { diff --git a/ipld/hamt/src/lib.rs b/ipld/hamt/src/lib.rs index 2e8241bd8..2bb6567c5 100644 --- a/ipld/hamt/src/lib.rs +++ b/ipld/hamt/src/lib.rs @@ -23,7 +23,7 @@ pub use forest_hash_utils::{BytesKey, Hash}; use serde::{Deserialize, Serialize}; pub use self::error::Error; -pub use self::hamt::Hamt; +pub use self::hamt::{Hamt, Hamtv0}; pub use self::hash::*; pub use self::hash_algorithm::*; diff --git a/ipld/hamt/src/node.rs b/ipld/hamt/src/node.rs index 81bad7d67..4ae5fc020 100644 --- a/ipld/hamt/src/node.rs +++ b/ipld/hamt/src/node.rs @@ -17,26 +17,28 @@ use super::bitfield::Bitfield; use super::hash_bits::HashBits; use super::pointer::Pointer; use super::{Error, Hash, HashAlgorithm, KeyValuePair}; +use crate::pointer::version::Version; use crate::Config; /// Node in Hamt tree which contains bitfield of set indexes and pointers to nodes #[derive(Debug)] -pub(crate) struct Node { +pub(crate) struct Node { pub(crate) bitfield: Bitfield, - pub(crate) pointers: Vec>, + pub(crate) pointers: Vec>, hash: PhantomData, } -impl PartialEq for Node { +impl PartialEq for Node { fn eq(&self, other: &Self) -> bool { (self.bitfield == other.bitfield) && (self.pointers == other.pointers) } } -impl Serialize for Node +impl Serialize for Node where K: Serialize, V: Serialize, + Ver: self::Version, { fn serialize(&self, serializer: S) -> Result where @@ -46,10 +48,11 @@ where } } -impl<'de, K, V, H> Deserialize<'de> for Node +impl<'de, K, V, Ver, H> Deserialize<'de> for Node where K: DeserializeOwned, V: DeserializeOwned, + Ver: Version, { fn deserialize(deserializer: D) -> Result where @@ -64,7 +67,7 @@ where } } -impl Default for Node { +impl Default for Node { fn default() -> Self { Node { bitfield: Bitfield::zero(), @@ -74,11 +77,12 @@ impl Default for Node { } } -impl Node +impl Node where K: Hash + Eq + PartialOrd + Serialize + DeserializeOwned, H: HashAlgorithm, V: Serialize + DeserializeOwned, + Ver: Version, { pub fn set( &mut self, @@ -318,7 +322,7 @@ where // Link node is cached cached_node } else { - let node: Box> = if let Some(node) = store.get_cbor(cid)? { + let node: Box> = if let Some(node) = store.get_cbor(cid)? { node } else { #[cfg(not(feature = "ignore-dead-links"))] @@ -367,7 +371,7 @@ where self.insert_child(idx, key, value); } else { // Need to insert some empty nodes reserved for links. - let mut sub = Node::::default(); + let mut sub = Node::::default(); sub.modify_value(hashed_key, conf, depth + 1, key, value, store, overwrite)?; self.insert_child_dirty(idx, Box::new(sub)); } @@ -433,7 +437,7 @@ where }); let consumed = hashed_key.consumed; - let mut sub = Node::::default(); + let mut sub = Node::::default(); let modified = sub.modify_value( hashed_key, conf, @@ -568,7 +572,7 @@ where Ok(()) } - fn rm_child(&mut self, i: usize, idx: u32) -> Pointer { + fn rm_child(&mut self, i: usize, idx: u32) -> Pointer { self.bitfield.clear_bit(idx); self.pointers.remove(i) } @@ -579,7 +583,7 @@ where self.pointers.insert(i, Pointer::from_key_value(key, value)) } - fn insert_child_dirty(&mut self, idx: u32, node: Box>) { + fn insert_child_dirty(&mut self, idx: u32, node: Box>) { let i = self.index_for_bit_pos(idx); self.bitfield.set_bit(idx); self.pointers.insert(i, Pointer::Dirty(node)) @@ -591,11 +595,11 @@ where mask.and(&self.bitfield).count_ones() } - fn get_child_mut(&mut self, i: usize) -> &mut Pointer { + fn get_child_mut(&mut self, i: usize) -> &mut Pointer { &mut self.pointers[i] } - fn get_child(&self, i: usize) -> &Pointer { + fn get_child(&self, i: usize) -> &Pointer { &self.pointers[i] } @@ -603,7 +607,7 @@ where /// /// Returns true if the child pointer is completely empty and can be removed, /// which can happen if we artificially inserted nodes during insertion. - fn clean(child: &mut Pointer, conf: &Config, depth: u32) -> Result { + fn clean(child: &mut Pointer, conf: &Config, depth: u32) -> Result { match child.clean(conf, depth) { Ok(()) => Ok(false), Err(Error::ZeroPointers) if depth < conf.min_data_depth => Ok(true), diff --git a/ipld/hamt/src/pointer.rs b/ipld/hamt/src/pointer.rs index ef4444619..bce841c5a 100644 --- a/ipld/hamt/src/pointer.rs +++ b/ipld/hamt/src/pointer.rs @@ -15,18 +15,38 @@ use super::node::Node; use super::{Error, Hash, HashAlgorithm, KeyValuePair}; use crate::Config; +#[doc(hidden)] +pub mod version { + #[derive(PartialEq, Eq, Debug)] + pub struct V0; + #[derive(PartialEq, Eq, Debug)] + pub struct V3; + + pub trait Version { + const NUMBER: usize; + } + + impl Version for V0 { + const NUMBER: usize = 0; + } + + impl Version for V3 { + const NUMBER: usize = 3; + } +} + /// Pointer to index values or a link to another child node. #[derive(Debug)] -pub(crate) enum Pointer { +pub(crate) enum Pointer { Values(Vec>), Link { cid: Cid, - cache: OnceCell>>, + cache: OnceCell>>, }, - Dirty(Box>), + Dirty(Box>), } -impl PartialEq for Pointer { +impl PartialEq for Pointer { fn eq(&self, other: &Self) -> bool { match (self, other) { (Pointer::Values(a), Pointer::Values(b)) => a == b, @@ -37,25 +57,80 @@ impl PartialEq for Pointer { } } +mod pointer_v0 { + use cid::Cid; + use serde::{Deserialize, Serialize}; + + use crate::KeyValuePair; + + use super::Pointer; + + #[derive(Serialize)] + pub(super) enum PointerSer<'a, K, V> { + #[serde(rename = "0")] + Link(&'a Cid), + #[serde(rename = "1")] + Vals(&'a [KeyValuePair]), + } + + #[derive(Deserialize, Serialize)] + pub(super) enum PointerDe { + #[serde(rename = "0")] + Link(Cid), + #[serde(rename = "1")] + Vals(Vec>), + } + + impl<'a, K, V, Ver, H> TryFrom<&'a Pointer> for PointerSer<'a, K, V> { + type Error = &'static str; + + fn try_from(pointer: &'a Pointer) -> Result { + match pointer { + Pointer::Values(vals) => Ok(PointerSer::Vals(vals.as_ref())), + Pointer::Link { cid, .. } => Ok(PointerSer::Link(cid)), + Pointer::Dirty(_) => Err("Cannot serialize cached values"), + } + } + } + + impl From> for Pointer { + fn from(pointer: PointerDe) -> Self { + match pointer { + PointerDe::Link(cid) => Pointer::Link { + cid, + cache: Default::default(), + }, + PointerDe::Vals(vals) => Pointer::Values(vals), + } + } + } +} + /// Serialize the Pointer like an untagged enum. -impl Serialize for Pointer +impl Serialize for Pointer where K: Serialize, V: Serialize, + Ver: self::version::Version, { fn serialize(&self, serializer: S) -> Result where S: Serializer, { - match self { - Pointer::Values(vals) => vals.serialize(serializer), - Pointer::Link { cid, .. } => cid.serialize(serializer), - Pointer::Dirty(_) => Err(ser::Error::custom("Cannot serialize cached values")), + match Ver::NUMBER { + 0 => pointer_v0::PointerSer::try_from(self) + .map_err(ser::Error::custom)? + .serialize(serializer), + _ => match self { + Pointer::Values(vals) => vals.serialize(serializer), + Pointer::Link { cid, .. } => cid.serialize(serializer), + Pointer::Dirty(_) => Err(ser::Error::custom("Cannot serialize cached values")), + }, } } } -impl TryFrom for Pointer +impl TryFrom for Pointer where K: DeserializeOwned, V: DeserializeOwned, @@ -81,26 +156,35 @@ where } /// Deserialize the Pointer like an untagged enum. -impl<'de, K, V, H> Deserialize<'de> for Pointer +impl<'de, K, V, Ver, H> Deserialize<'de> for Pointer where K: DeserializeOwned, V: DeserializeOwned, + Ver: self::version::Version, { fn deserialize(deserializer: D) -> Result where D: Deserializer<'de>, { - Ipld::deserialize(deserializer).and_then(|ipld| ipld.try_into().map_err(de::Error::custom)) + match Ver::NUMBER { + 0 => { + let pointer_de: pointer_v0::PointerDe = + Deserialize::deserialize(deserializer)?; + Ok(Pointer::from(pointer_de)) + } + _ => Ipld::deserialize(deserializer) + .and_then(|ipld| ipld.try_into().map_err(de::Error::custom)), + } } } -impl Default for Pointer { +impl Default for Pointer { fn default() -> Self { Pointer::Values(Vec::new()) } } -impl Pointer +impl Pointer where K: Serialize + DeserializeOwned + Hash + PartialOrd, V: Serialize + DeserializeOwned, diff --git a/ipld/hamt/tests/hamt_tests.rs b/ipld/hamt/tests/hamt_tests.rs index 43b84eaa6..6bb40dcfc 100644 --- a/ipld/hamt/tests/hamt_tests.rs +++ b/ipld/hamt/tests/hamt_tests.rs @@ -893,7 +893,8 @@ fn tstring(v: impl Display) -> BytesKey { } mod test_default { - use fvm_ipld_blockstore::tracking::BSStats; + use fvm_ipld_blockstore::{tracking::BSStats, MemoryBlockstore}; + use fvm_ipld_hamt::{Config, Hamtv0}; use quickcheck_macros::quickcheck; use crate::{CidChecker, HamtFactory, LimitedKeyOps, UniqueKeyValuePairs}; @@ -1007,6 +1008,21 @@ mod test_default { super::clean_child_ordering(HamtFactory::default(), Some(stats), cids); } + #[test] + fn test_hamtv0() { + let config = Config { + bit_width: 5, + ..Default::default() + }; + let store = MemoryBlockstore::default(); + let mut hamtv0: Hamtv0<_, _, usize> = Hamtv0::new_with_config(&store, config.clone()); + hamtv0.set(1, "world".to_string()).unwrap(); + assert_eq!(hamtv0.get(&1).unwrap(), Some(&"world".to_string())); + let c = hamtv0.flush().unwrap(); + let new_hamt = Hamtv0::load_with_config(&c, &store, config).unwrap(); + assert_eq!(hamtv0, new_hamt); + } + #[quickcheck] fn prop_cid_indep_of_insert_order(kvs: UniqueKeyValuePairs, seed: u64) -> bool { super::prop_cid_indep_of_insert_order(HamtFactory::default(), kvs, seed)