From 04dbbf9bcaf863ae3dfc3588e77d4820991a2755 Mon Sep 17 00:00:00 2001 From: Qyriad Date: Tue, 22 Oct 2024 17:16:38 -0600 Subject: [PATCH 1/4] merkle: add parent() helper function on NodeIndex --- src/merkle/index.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/merkle/index.rs b/src/merkle/index.rs index 104ceb44..24b38089 100644 --- a/src/merkle/index.rs +++ b/src/merkle/index.rs @@ -97,6 +97,14 @@ impl NodeIndex { self } + /// Returns the parent of the current node. This is the same as [`Self::move_up()`], but returns + /// a new value instead of mutating `self`. + pub const fn parent(mut self) -> Self { + self.depth = self.depth.saturating_sub(1); + self.value >>= 1; + self + } + // PROVIDERS // -------------------------------------------------------------------------------------------- From 96389691e6c8d94974c71875a5167d95ce145682 Mon Sep 17 00:00:00 2001 From: Qyriad Date: Wed, 23 Oct 2024 19:26:42 -0600 Subject: [PATCH 2/4] WIP(smt): impl simple subtree8 hashing and benchmarks for it --- Cargo.toml | 4 + benches/smt-subtree.rs | 137 +++++++++++++++++++++++++++++ src/merkle/smt/full/mod.rs | 7 ++ src/merkle/smt/mod.rs | 171 ++++++++++++++++++++++++++++++++++++- 4 files changed, 318 insertions(+), 1 deletion(-) create mode 100644 benches/smt-subtree.rs diff --git a/Cargo.toml b/Cargo.toml index 2616341c..62d7a0d2 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,10 @@ harness = false name = "smt" harness = false +[[bench]] +name = "smt-subtree" +harness = false + [[bench]] name = "store" harness = false diff --git a/benches/smt-subtree.rs b/benches/smt-subtree.rs new file mode 100644 index 00000000..d690a665 --- /dev/null +++ b/benches/smt-subtree.rs @@ -0,0 +1,137 @@ +use std::{fmt::Debug, hint, mem, time::Duration}; + +use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion}; +use miden_crypto::{ + hash::rpo::RpoDigest, + merkle::{NodeIndex, Smt, SmtLeaf, SMT_DEPTH}, + Felt, Word, ONE, +}; +use rand_utils::prng_array; +use winter_utils::Randomizable; + +fn smt_subtree_even(c: &mut Criterion) { + let mut seed = [0u8; 32]; + + let mut group = c.benchmark_group("subtree8-even"); + + for pair_count in (64..=256).step_by(64) { + let bench_id = BenchmarkId::from_parameter(pair_count); + group.bench_with_input(bench_id, &pair_count, |b, &pair_count| { + b.iter_batched( + || { + // Setup. + let entries: Vec<(RpoDigest, Word)> = (0..pair_count) + .map(|n| { + // A single depth-8 subtree can have a maximum of 255 leaves. + let leaf_index = (n / pair_count) * 255; + let key = RpoDigest::new([ + generate_value(&mut seed), + ONE, + Felt::new(n), + Felt::new(leaf_index), + ]); + let value = generate_word(&mut seed); + (key, value) + }) + .collect(); + + let mut leaves: Vec<_> = entries + .iter() + .map(|(key, value)| { + let leaf = SmtLeaf::new_single(*key, *value); + let col = NodeIndex::from(leaf.index()).value(); + let hash = leaf.hash(); + (col, hash) + }) + .collect(); + leaves.sort(); + leaves + }, + |leaves| { + // Benchmarked function. + let subtree = + Smt::build_subtree(hint::black_box(leaves), hint::black_box(SMT_DEPTH)); + assert!(!subtree.is_empty()); + }, + BatchSize::SmallInput, + ); + }); + } +} + +fn smt_subtree_random(c: &mut Criterion) { + let mut seed = [0u8; 32]; + + let mut group = c.benchmark_group("subtree8-rand"); + + for pair_count in (64..=256).step_by(64) { + let bench_id = BenchmarkId::from_parameter(pair_count); + group.bench_with_input(bench_id, &pair_count, |b, &pair_count| { + b.iter_batched( + || { + // Setup. + let entries: Vec<(RpoDigest, Word)> = (0..pair_count) + .map(|i| { + let leaf_index: u8 = generate_value(&mut seed); + let key = RpoDigest::new([ + ONE, + ONE, + Felt::new(i), + Felt::new(leaf_index as u64), + ]); + let value = generate_word(&mut seed); + (key, value) + }) + .collect(); + + let mut leaves: Vec<_> = entries + .iter() + .map(|(key, value)| { + let leaf = SmtLeaf::new_single(*key, *value); + let col = NodeIndex::from(leaf.index()).value(); + let hash = leaf.hash(); + (col, hash) + }) + .collect(); + leaves.sort(); + let before = leaves.len(); + leaves.dedup(); + let after = leaves.len(); + assert_eq!(before, after); + leaves + }, + |leaves| { + let subtree = + Smt::build_subtree(hint::black_box(leaves), hint::black_box(SMT_DEPTH)); + assert!(!subtree.is_empty()); + }, + BatchSize::SmallInput, + ); + }); + } +} + +criterion_group! { + name = smt_subtree_group; + config = Criterion::default() + .measurement_time(Duration::from_secs(40)) + .sample_size(60) + .configure_from_args(); + targets = smt_subtree_even, smt_subtree_random +} +criterion_main!(smt_subtree_group); + +// HELPER FUNCTIONS +// -------------------------------------------------------------------------------------------- + +fn generate_value(seed: &mut [u8; 32]) -> T { + mem::swap(seed, &mut prng_array(*seed)); + let value: [T; 1] = rand_utils::prng_array(*seed); + value[0] +} + +fn generate_word(seed: &mut [u8; 32]) -> Word { + mem::swap(seed, &mut prng_array(*seed)); + let nums: [u64; 4] = prng_array(*seed); + [Felt::new(nums[0]), Felt::new(nums[1]), Felt::new(nums[2]), Felt::new(nums[3])] +} diff --git a/src/merkle/smt/full/mod.rs b/src/merkle/smt/full/mod.rs index 9c640021..7101a03e 100644 --- a/src/merkle/smt/full/mod.rs +++ b/src/merkle/smt/full/mod.rs @@ -243,6 +243,13 @@ impl Smt { None } } + + pub fn build_subtree( + leaves: Vec<(u64, RpoDigest)>, + bottom_depth: u8, + ) -> BTreeMap { + >::build_subtree(leaves, bottom_depth) + } } impl SparseMerkleTree for Smt { diff --git a/src/merkle/smt/mod.rs b/src/merkle/smt/mod.rs index 0b7ceb95..14613bd5 100644 --- a/src/merkle/smt/mod.rs +++ b/src/merkle/smt/mod.rs @@ -1,4 +1,7 @@ use alloc::{collections::BTreeMap, vec::Vec}; +use core::mem; + +use num::Integer; use super::{EmptySubtreeRoots, InnerNodeInfo, MerkleError, MerklePath, NodeIndex}; use crate::{ @@ -339,6 +342,108 @@ pub(crate) trait SparseMerkleTree { /// /// The length `path` is guaranteed to be equal to `DEPTH` fn path_and_leaf_to_opening(path: MerklePath, leaf: Self::Leaf) -> Self::Opening; + + /// Builds Merkle nodes from a bottom layer of tuples of horizontal indices and their hashes, + /// sorted by their position. + /// + /// The leaves are 'conceptual' leaves, simply being entities at the bottom of some subtree, not + /// [`Self::Leaf`]. + /// + /// # Panics + /// With debug assertions on, this function panics under invalid inputs: if `leaves` contains + /// more entries than can fit in a depth-8 subtree (more than 256), if `bottom_depth` is + /// lower in the tree than the specified maximum depth (`DEPTH`), or if `leaves` is not sorted. + // FIXME: more complete docstring. + #[cfg_attr(not(test), allow(dead_code))] + fn build_subtree( + mut leaves: Vec<(u64, RpoDigest)>, + bottom_depth: u8, + ) -> BTreeMap { + debug_assert!(bottom_depth <= DEPTH); + debug_assert!(bottom_depth.is_multiple_of(&8)); + debug_assert!(leaves.len() <= usize::pow(2, 8)); + + let subtree_root = bottom_depth - 8; + + let mut inner_nodes: BTreeMap = Default::default(); + + let mut next_leaves: Vec<(u64, RpoDigest)> = Vec::with_capacity(leaves.len() / 2); + + for next_depth in (subtree_root..bottom_depth).rev() { + debug_assert!(next_depth <= bottom_depth); + + // `next_depth` is the stuff we're making. + // `current_depth` is the stuff we have. + let current_depth = next_depth + 1; + + let mut iter = leaves.drain(..).map(SubtreeLeaf::from_tuple).peekable(); + while let Some(first) = iter.next() { + // On non-continuous iterations, including the first iteration, `first_column` may + // be a left or right node. On subsequent continuous iterations, we will always call + // `iter.next()` twice. + + // On non-continuous iterations (including the very first iteration), this column + // could be either on the left or the right. If the next iteration is not + // discontinuous with our right node, then the next iteration's + + let is_right = first.col.is_odd(); + let (left, right) = if is_right { + // Discontinuous iteration: we have no left node, so it must be empty. + + let left = SubtreeLeaf { + col: first.col - 1, + hash: *EmptySubtreeRoots::entry(DEPTH, current_depth), + }; + let right = first; + + (left, right) + } else { + let left = first; + + let right_col = first.col + 1; + let right = match iter.peek().copied() { + Some(SubtreeLeaf { col, .. }) if col == right_col => { + // Our inputs must be sorted. + debug_assert!(left.col <= col); + // The next leaf in the iterator is our sibling. Use it and consume it! + iter.next().unwrap() + }, + // Otherwise, the leaves don't contain our sibling, so our sibling must be + // empty. + _ => SubtreeLeaf { + col: right_col, + hash: *EmptySubtreeRoots::entry(DEPTH, current_depth), + }, + }; + + (left, right) + }; + + let index = NodeIndex::new_unchecked(current_depth, left.col).parent(); + let node = InnerNode { left: left.hash, right: right.hash }; + let hash = node.hash(); + + let &equivalent_empty_hash = EmptySubtreeRoots::entry(DEPTH, next_depth); + // If this hash is empty, then it doesn't become a new inner node, nor does it count + // as a leaf for the next depth. + if hash != equivalent_empty_hash { + inner_nodes.insert(index, node); + // FIXME: is it possible for this to end up not being sorted? I don't think so. + next_leaves.push((index.value(), hash)); + } + } + + // Stop borrowing `leaves`, so we can swap it. + // The iterator is empty at this point anyway. + drop(iter); + + // After each depth, consider the stuff we just made the new "leaves", and empty the + // other collection. + mem::swap(&mut leaves, &mut next_leaves); + } + + inner_nodes + } } // INNER NODE @@ -346,7 +451,7 @@ pub(crate) trait SparseMerkleTree { #[derive(Debug, Default, Clone, PartialEq, Eq)] #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))] -pub(crate) struct InnerNode { +pub struct InnerNode { pub left: RpoDigest, pub right: RpoDigest, } @@ -456,3 +561,67 @@ impl MutationSet { self.new_root } } + +// HELPERS +// ================================================================================================ +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Default)] +struct SubtreeLeaf { + col: u64, + hash: RpoDigest, +} +impl SubtreeLeaf { + const fn from_tuple((col, hash): (u64, RpoDigest)) -> Self { + Self { col, hash } + } +} + +// TESTS +// ================================================================================================ +#[cfg(test)] +mod test { + use alloc::vec::Vec; + + use super::SparseMerkleTree; + use crate::{ + hash::rpo::RpoDigest, + merkle::{Smt, SmtLeaf, SMT_DEPTH}, + Felt, Word, ONE, + }; + + #[test] + fn test_build_subtree_from_leaves() { + const PAIR_COUNT: u64 = u64::pow(2, 8); + + let entries: Vec<(RpoDigest, Word)> = (0..PAIR_COUNT) + .map(|i| { + let leaf_index = u64::MAX / (i + 1); + let key = RpoDigest::new([ONE, ONE, Felt::new(i), Felt::new(leaf_index)]); + let value = [ONE, ONE, ONE, Felt::new(i)]; + (key, value) + }) + .collect(); + + let control = Smt::with_entries(entries.clone()).unwrap(); + + let leaves: Vec<(u64, RpoDigest)> = entries + .iter() + .map(|(key, value)| { + let leaf = SmtLeaf::new_single(*key, *value); + let col = leaf.index().index.value(); + let hash = leaf.hash(); + (col, hash) + }) + .collect(); + + let first_subtree = Smt::build_subtree(leaves, SMT_DEPTH); + assert!(!first_subtree.is_empty()); + + for (index, node) in first_subtree.into_iter() { + let control = control.get_inner_node(index); + assert_eq!( + control, node, + "subtree-computed node at index {index:?} does not match control", + ); + } + } +} From fb0ff729f088d81f0cdd71646e43aa06dc99a471 Mon Sep 17 00:00:00 2001 From: Qyriad Date: Fri, 25 Oct 2024 12:45:40 -0600 Subject: [PATCH 3/4] bench(smt-subtree): add a benchmark for single-leaf subtrees --- benches/smt-subtree.rs | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/benches/smt-subtree.rs b/benches/smt-subtree.rs index d690a665..c469b144 100644 --- a/benches/smt-subtree.rs +++ b/benches/smt-subtree.rs @@ -9,12 +9,14 @@ use miden_crypto::{ use rand_utils::prng_array; use winter_utils::Randomizable; +const PAIR_COUNTS: [u64; 5] = [1, 64, 128, 192, 256]; + fn smt_subtree_even(c: &mut Criterion) { let mut seed = [0u8; 32]; let mut group = c.benchmark_group("subtree8-even"); - for pair_count in (64..=256).step_by(64) { + for pair_count in PAIR_COUNTS { let bench_id = BenchmarkId::from_parameter(pair_count); group.bench_with_input(bench_id, &pair_count, |b, &pair_count| { b.iter_batched( @@ -64,7 +66,7 @@ fn smt_subtree_random(c: &mut Criterion) { let mut group = c.benchmark_group("subtree8-rand"); - for pair_count in (64..=256).step_by(64) { + for pair_count in PAIR_COUNTS { let bench_id = BenchmarkId::from_parameter(pair_count); group.bench_with_input(bench_id, &pair_count, |b, &pair_count| { b.iter_batched( @@ -94,10 +96,6 @@ fn smt_subtree_random(c: &mut Criterion) { }) .collect(); leaves.sort(); - let before = leaves.len(); - leaves.dedup(); - let after = leaves.len(); - assert_eq!(before, after); leaves }, |leaves| { From 64a2d7ae1c5bb96a2e231791a63f2c77583218ac Mon Sep 17 00:00:00 2001 From: Qyriad Date: Fri, 25 Oct 2024 13:31:48 -0600 Subject: [PATCH 4/4] merkle: add a benchmark for constructing 256-leaf balanced trees --- Cargo.toml | 4 ++++ benches/merkle.rs | 59 +++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 benches/merkle.rs diff --git a/Cargo.toml b/Cargo.toml index 62d7a0d2..b940acc5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,10 @@ harness = false name = "smt-subtree" harness = false +[[bench]] +name = "merkle" +harness = false + [[bench]] name = "store" harness = false diff --git a/benches/merkle.rs b/benches/merkle.rs new file mode 100644 index 00000000..5bd434bb --- /dev/null +++ b/benches/merkle.rs @@ -0,0 +1,59 @@ +use std::{hint, mem, time::Duration}; + +use criterion::{criterion_group, criterion_main, BatchSize, Criterion}; +use miden_crypto::{merkle::MerkleTree, Felt, Word, ONE}; +use rand_utils::prng_array; + +fn balanced_merkle_even(c: &mut Criterion) { + c.bench_function("balanced-merkle-even", |b| { + b.iter_batched( + || { + let entries: Vec = + (0..256).map(|i| [Felt::new(i), ONE, ONE, Felt::new(i)]).collect(); + assert_eq!(entries.len(), 256); + entries + }, + |leaves| { + let tree = MerkleTree::new(hint::black_box(leaves)).unwrap(); + assert_eq!(tree.depth(), 8); + }, + BatchSize::SmallInput, + ); + }); +} + +fn balanced_merkle_rand(c: &mut Criterion) { + let mut seed = [0u8; 32]; + c.bench_function("balanced-merkle-rand", |b| { + b.iter_batched( + || { + let entries: Vec = (0..256).map(|_| generate_word(&mut seed)).collect(); + assert_eq!(entries.len(), 256); + entries + }, + |leaves| { + let tree = MerkleTree::new(hint::black_box(leaves)).unwrap(); + assert_eq!(tree.depth(), 8); + }, + BatchSize::SmallInput, + ); + }); +} + +criterion_group! { + name = smt_subtree_group; + config = Criterion::default() + .measurement_time(Duration::from_secs(20)) + .configure_from_args(); + targets = balanced_merkle_even, balanced_merkle_rand +} +criterion_main!(smt_subtree_group); + +// HELPER FUNCTIONS +// -------------------------------------------------------------------------------------------- + +fn generate_word(seed: &mut [u8; 32]) -> Word { + mem::swap(seed, &mut prng_array(*seed)); + let nums: [u64; 4] = prng_array(*seed); + [Felt::new(nums[0]), Felt::new(nums[1]), Felt::new(nums[2]), Felt::new(nums[3])] +}