From 04dbbf9bcaf863ae3dfc3588e77d4820991a2755 Mon Sep 17 00:00:00 2001
From: Qyriad <qyriad@qyriad.me>
Date: Tue, 22 Oct 2024 17:16:38 -0600
Subject: [PATCH 1/4] merkle: add parent() helper function on NodeIndex

---
 src/merkle/index.rs | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/merkle/index.rs b/src/merkle/index.rs
index 104ceb44..24b38089 100644
--- a/src/merkle/index.rs
+++ b/src/merkle/index.rs
@@ -97,6 +97,14 @@ impl NodeIndex {
         self
     }
 
+    /// Returns the parent of the current node. This is the same as [`Self::move_up()`], but returns
+    /// a new value instead of mutating `self`.
+    pub const fn parent(mut self) -> Self {
+        self.depth = self.depth.saturating_sub(1);
+        self.value >>= 1;
+        self
+    }
+
     // PROVIDERS
     // --------------------------------------------------------------------------------------------
 

From 96389691e6c8d94974c71875a5167d95ce145682 Mon Sep 17 00:00:00 2001
From: Qyriad <qyriad@qyriad.me>
Date: Wed, 23 Oct 2024 19:26:42 -0600
Subject: [PATCH 2/4] WIP(smt): impl simple subtree8 hashing and benchmarks for
 it

---
 Cargo.toml                 |   4 +
 benches/smt-subtree.rs     | 137 +++++++++++++++++++++++++++++
 src/merkle/smt/full/mod.rs |   7 ++
 src/merkle/smt/mod.rs      | 171 ++++++++++++++++++++++++++++++++++++-
 4 files changed, 318 insertions(+), 1 deletion(-)
 create mode 100644 benches/smt-subtree.rs

diff --git a/Cargo.toml b/Cargo.toml
index 2616341c..62d7a0d2 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -27,6 +27,10 @@ harness = false
 name = "smt"
 harness = false
 
+[[bench]]
+name = "smt-subtree"
+harness = false
+
 [[bench]]
 name = "store"
 harness = false
diff --git a/benches/smt-subtree.rs b/benches/smt-subtree.rs
new file mode 100644
index 00000000..d690a665
--- /dev/null
+++ b/benches/smt-subtree.rs
@@ -0,0 +1,137 @@
+use std::{fmt::Debug, hint, mem, time::Duration};
+
+use criterion::{criterion_group, criterion_main, BatchSize, BenchmarkId, Criterion};
+use miden_crypto::{
+    hash::rpo::RpoDigest,
+    merkle::{NodeIndex, Smt, SmtLeaf, SMT_DEPTH},
+    Felt, Word, ONE,
+};
+use rand_utils::prng_array;
+use winter_utils::Randomizable;
+
+fn smt_subtree_even(c: &mut Criterion) {
+    let mut seed = [0u8; 32];
+
+    let mut group = c.benchmark_group("subtree8-even");
+
+    for pair_count in (64..=256).step_by(64) {
+        let bench_id = BenchmarkId::from_parameter(pair_count);
+        group.bench_with_input(bench_id, &pair_count, |b, &pair_count| {
+            b.iter_batched(
+                || {
+                    // Setup.
+                    let entries: Vec<(RpoDigest, Word)> = (0..pair_count)
+                        .map(|n| {
+                            // A single depth-8 subtree can have a maximum of 255 leaves.
+                            let leaf_index = (n / pair_count) * 255;
+                            let key = RpoDigest::new([
+                                generate_value(&mut seed),
+                                ONE,
+                                Felt::new(n),
+                                Felt::new(leaf_index),
+                            ]);
+                            let value = generate_word(&mut seed);
+                            (key, value)
+                        })
+                        .collect();
+
+                    let mut leaves: Vec<_> = entries
+                        .iter()
+                        .map(|(key, value)| {
+                            let leaf = SmtLeaf::new_single(*key, *value);
+                            let col = NodeIndex::from(leaf.index()).value();
+                            let hash = leaf.hash();
+                            (col, hash)
+                        })
+                        .collect();
+                    leaves.sort();
+                    leaves
+                },
+                |leaves| {
+                    // Benchmarked function.
+                    let subtree =
+                        Smt::build_subtree(hint::black_box(leaves), hint::black_box(SMT_DEPTH));
+                    assert!(!subtree.is_empty());
+                },
+                BatchSize::SmallInput,
+            );
+        });
+    }
+}
+
+fn smt_subtree_random(c: &mut Criterion) {
+    let mut seed = [0u8; 32];
+
+    let mut group = c.benchmark_group("subtree8-rand");
+
+    for pair_count in (64..=256).step_by(64) {
+        let bench_id = BenchmarkId::from_parameter(pair_count);
+        group.bench_with_input(bench_id, &pair_count, |b, &pair_count| {
+            b.iter_batched(
+                || {
+                    // Setup.
+                    let entries: Vec<(RpoDigest, Word)> = (0..pair_count)
+                        .map(|i| {
+                            let leaf_index: u8 = generate_value(&mut seed);
+                            let key = RpoDigest::new([
+                                ONE,
+                                ONE,
+                                Felt::new(i),
+                                Felt::new(leaf_index as u64),
+                            ]);
+                            let value = generate_word(&mut seed);
+                            (key, value)
+                        })
+                        .collect();
+
+                    let mut leaves: Vec<_> = entries
+                        .iter()
+                        .map(|(key, value)| {
+                            let leaf = SmtLeaf::new_single(*key, *value);
+                            let col = NodeIndex::from(leaf.index()).value();
+                            let hash = leaf.hash();
+                            (col, hash)
+                        })
+                        .collect();
+                    leaves.sort();
+                    let before = leaves.len();
+                    leaves.dedup();
+                    let after = leaves.len();
+                    assert_eq!(before, after);
+                    leaves
+                },
+                |leaves| {
+                    let subtree =
+                        Smt::build_subtree(hint::black_box(leaves), hint::black_box(SMT_DEPTH));
+                    assert!(!subtree.is_empty());
+                },
+                BatchSize::SmallInput,
+            );
+        });
+    }
+}
+
+criterion_group! {
+    name = smt_subtree_group;
+    config = Criterion::default()
+        .measurement_time(Duration::from_secs(40))
+        .sample_size(60)
+        .configure_from_args();
+    targets = smt_subtree_even, smt_subtree_random
+}
+criterion_main!(smt_subtree_group);
+
+// HELPER FUNCTIONS
+// --------------------------------------------------------------------------------------------
+
+fn generate_value<T: Copy + Debug + Randomizable>(seed: &mut [u8; 32]) -> T {
+    mem::swap(seed, &mut prng_array(*seed));
+    let value: [T; 1] = rand_utils::prng_array(*seed);
+    value[0]
+}
+
+fn generate_word(seed: &mut [u8; 32]) -> Word {
+    mem::swap(seed, &mut prng_array(*seed));
+    let nums: [u64; 4] = prng_array(*seed);
+    [Felt::new(nums[0]), Felt::new(nums[1]), Felt::new(nums[2]), Felt::new(nums[3])]
+}
diff --git a/src/merkle/smt/full/mod.rs b/src/merkle/smt/full/mod.rs
index 9c640021..7101a03e 100644
--- a/src/merkle/smt/full/mod.rs
+++ b/src/merkle/smt/full/mod.rs
@@ -243,6 +243,13 @@ impl Smt {
             None
         }
     }
+
+    pub fn build_subtree(
+        leaves: Vec<(u64, RpoDigest)>,
+        bottom_depth: u8,
+    ) -> BTreeMap<NodeIndex, InnerNode> {
+        <Self as SparseMerkleTree<SMT_DEPTH>>::build_subtree(leaves, bottom_depth)
+    }
 }
 
 impl SparseMerkleTree<SMT_DEPTH> for Smt {
diff --git a/src/merkle/smt/mod.rs b/src/merkle/smt/mod.rs
index 0b7ceb95..14613bd5 100644
--- a/src/merkle/smt/mod.rs
+++ b/src/merkle/smt/mod.rs
@@ -1,4 +1,7 @@
 use alloc::{collections::BTreeMap, vec::Vec};
+use core::mem;
+
+use num::Integer;
 
 use super::{EmptySubtreeRoots, InnerNodeInfo, MerkleError, MerklePath, NodeIndex};
 use crate::{
@@ -339,6 +342,108 @@ pub(crate) trait SparseMerkleTree<const DEPTH: u8> {
     ///
     /// The length `path` is guaranteed to be equal to `DEPTH`
     fn path_and_leaf_to_opening(path: MerklePath, leaf: Self::Leaf) -> Self::Opening;
+
+    /// Builds Merkle nodes from a bottom layer of tuples of horizontal indices and their hashes,
+    /// sorted by their position.
+    ///
+    /// The leaves are 'conceptual' leaves, simply being entities at the bottom of some subtree, not
+    /// [`Self::Leaf`].
+    ///
+    /// # Panics
+    /// With debug assertions on, this function panics under invalid inputs: if `leaves` contains
+    /// more entries than can fit in a depth-8 subtree (more than 256), if `bottom_depth` is
+    /// lower in the tree than the specified maximum depth (`DEPTH`), or if `leaves` is not sorted.
+    // FIXME: more complete docstring.
+    #[cfg_attr(not(test), allow(dead_code))]
+    fn build_subtree(
+        mut leaves: Vec<(u64, RpoDigest)>,
+        bottom_depth: u8,
+    ) -> BTreeMap<NodeIndex, InnerNode> {
+        debug_assert!(bottom_depth <= DEPTH);
+        debug_assert!(bottom_depth.is_multiple_of(&8));
+        debug_assert!(leaves.len() <= usize::pow(2, 8));
+
+        let subtree_root = bottom_depth - 8;
+
+        let mut inner_nodes: BTreeMap<NodeIndex, InnerNode> = Default::default();
+
+        let mut next_leaves: Vec<(u64, RpoDigest)> = Vec::with_capacity(leaves.len() / 2);
+
+        for next_depth in (subtree_root..bottom_depth).rev() {
+            debug_assert!(next_depth <= bottom_depth);
+
+            // `next_depth` is the stuff we're making.
+            // `current_depth` is the stuff we have.
+            let current_depth = next_depth + 1;
+
+            let mut iter = leaves.drain(..).map(SubtreeLeaf::from_tuple).peekable();
+            while let Some(first) = iter.next() {
+                // On non-continuous iterations, including the first iteration, `first_column` may
+                // be a left or right node. On subsequent continuous iterations, we will always call
+                // `iter.next()` twice.
+
+                // On non-continuous iterations (including the very first iteration), this column
+                // could be either on the left or the right. If the next iteration is not
+                // discontinuous with our right node, then the next iteration's
+
+                let is_right = first.col.is_odd();
+                let (left, right) = if is_right {
+                    // Discontinuous iteration: we have no left node, so it must be empty.
+
+                    let left = SubtreeLeaf {
+                        col: first.col - 1,
+                        hash: *EmptySubtreeRoots::entry(DEPTH, current_depth),
+                    };
+                    let right = first;
+
+                    (left, right)
+                } else {
+                    let left = first;
+
+                    let right_col = first.col + 1;
+                    let right = match iter.peek().copied() {
+                        Some(SubtreeLeaf { col, .. }) if col == right_col => {
+                            // Our inputs must be sorted.
+                            debug_assert!(left.col <= col);
+                            // The next leaf in the iterator is our sibling. Use it and consume it!
+                            iter.next().unwrap()
+                        },
+                        // Otherwise, the leaves don't contain our sibling, so our sibling must be
+                        // empty.
+                        _ => SubtreeLeaf {
+                            col: right_col,
+                            hash: *EmptySubtreeRoots::entry(DEPTH, current_depth),
+                        },
+                    };
+
+                    (left, right)
+                };
+
+                let index = NodeIndex::new_unchecked(current_depth, left.col).parent();
+                let node = InnerNode { left: left.hash, right: right.hash };
+                let hash = node.hash();
+
+                let &equivalent_empty_hash = EmptySubtreeRoots::entry(DEPTH, next_depth);
+                // If this hash is empty, then it doesn't become a new inner node, nor does it count
+                // as a leaf for the next depth.
+                if hash != equivalent_empty_hash {
+                    inner_nodes.insert(index, node);
+                    // FIXME: is it possible for this to end up not being sorted? I don't think so.
+                    next_leaves.push((index.value(), hash));
+                }
+            }
+
+            // Stop borrowing `leaves`, so we can swap it.
+            // The iterator is empty at this point anyway.
+            drop(iter);
+
+            // After each depth, consider the stuff we just made the new "leaves", and empty the
+            // other collection.
+            mem::swap(&mut leaves, &mut next_leaves);
+        }
+
+        inner_nodes
+    }
 }
 
 // INNER NODE
@@ -346,7 +451,7 @@ pub(crate) trait SparseMerkleTree<const DEPTH: u8> {
 
 #[derive(Debug, Default, Clone, PartialEq, Eq)]
 #[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
-pub(crate) struct InnerNode {
+pub struct InnerNode {
     pub left: RpoDigest,
     pub right: RpoDigest,
 }
@@ -456,3 +561,67 @@ impl<const DEPTH: u8, K, V> MutationSet<DEPTH, K, V> {
         self.new_root
     }
 }
+
+// HELPERS
+// ================================================================================================
+#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Default)]
+struct SubtreeLeaf {
+    col: u64,
+    hash: RpoDigest,
+}
+impl SubtreeLeaf {
+    const fn from_tuple((col, hash): (u64, RpoDigest)) -> Self {
+        Self { col, hash }
+    }
+}
+
+// TESTS
+// ================================================================================================
+#[cfg(test)]
+mod test {
+    use alloc::vec::Vec;
+
+    use super::SparseMerkleTree;
+    use crate::{
+        hash::rpo::RpoDigest,
+        merkle::{Smt, SmtLeaf, SMT_DEPTH},
+        Felt, Word, ONE,
+    };
+
+    #[test]
+    fn test_build_subtree_from_leaves() {
+        const PAIR_COUNT: u64 = u64::pow(2, 8);
+
+        let entries: Vec<(RpoDigest, Word)> = (0..PAIR_COUNT)
+            .map(|i| {
+                let leaf_index = u64::MAX / (i + 1);
+                let key = RpoDigest::new([ONE, ONE, Felt::new(i), Felt::new(leaf_index)]);
+                let value = [ONE, ONE, ONE, Felt::new(i)];
+                (key, value)
+            })
+            .collect();
+
+        let control = Smt::with_entries(entries.clone()).unwrap();
+
+        let leaves: Vec<(u64, RpoDigest)> = entries
+            .iter()
+            .map(|(key, value)| {
+                let leaf = SmtLeaf::new_single(*key, *value);
+                let col = leaf.index().index.value();
+                let hash = leaf.hash();
+                (col, hash)
+            })
+            .collect();
+
+        let first_subtree = Smt::build_subtree(leaves, SMT_DEPTH);
+        assert!(!first_subtree.is_empty());
+
+        for (index, node) in first_subtree.into_iter() {
+            let control = control.get_inner_node(index);
+            assert_eq!(
+                control, node,
+                "subtree-computed node at index {index:?} does not match control",
+            );
+        }
+    }
+}

From fb0ff729f088d81f0cdd71646e43aa06dc99a471 Mon Sep 17 00:00:00 2001
From: Qyriad <qyriad@qyriad.me>
Date: Fri, 25 Oct 2024 12:45:40 -0600
Subject: [PATCH 3/4] bench(smt-subtree): add a benchmark for single-leaf
 subtrees

---
 benches/smt-subtree.rs | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/benches/smt-subtree.rs b/benches/smt-subtree.rs
index d690a665..c469b144 100644
--- a/benches/smt-subtree.rs
+++ b/benches/smt-subtree.rs
@@ -9,12 +9,14 @@ use miden_crypto::{
 use rand_utils::prng_array;
 use winter_utils::Randomizable;
 
+const PAIR_COUNTS: [u64; 5] = [1, 64, 128, 192, 256];
+
 fn smt_subtree_even(c: &mut Criterion) {
     let mut seed = [0u8; 32];
 
     let mut group = c.benchmark_group("subtree8-even");
 
-    for pair_count in (64..=256).step_by(64) {
+    for pair_count in PAIR_COUNTS {
         let bench_id = BenchmarkId::from_parameter(pair_count);
         group.bench_with_input(bench_id, &pair_count, |b, &pair_count| {
             b.iter_batched(
@@ -64,7 +66,7 @@ fn smt_subtree_random(c: &mut Criterion) {
 
     let mut group = c.benchmark_group("subtree8-rand");
 
-    for pair_count in (64..=256).step_by(64) {
+    for pair_count in PAIR_COUNTS {
         let bench_id = BenchmarkId::from_parameter(pair_count);
         group.bench_with_input(bench_id, &pair_count, |b, &pair_count| {
             b.iter_batched(
@@ -94,10 +96,6 @@ fn smt_subtree_random(c: &mut Criterion) {
                         })
                         .collect();
                     leaves.sort();
-                    let before = leaves.len();
-                    leaves.dedup();
-                    let after = leaves.len();
-                    assert_eq!(before, after);
                     leaves
                 },
                 |leaves| {

From 64a2d7ae1c5bb96a2e231791a63f2c77583218ac Mon Sep 17 00:00:00 2001
From: Qyriad <qyriad@qyriad.me>
Date: Fri, 25 Oct 2024 13:31:48 -0600
Subject: [PATCH 4/4] merkle: add a benchmark for constructing 256-leaf
 balanced trees

---
 Cargo.toml        |  4 ++++
 benches/merkle.rs | 59 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 63 insertions(+)
 create mode 100644 benches/merkle.rs

diff --git a/Cargo.toml b/Cargo.toml
index 62d7a0d2..b940acc5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -31,6 +31,10 @@ harness = false
 name = "smt-subtree"
 harness = false
 
+[[bench]]
+name = "merkle"
+harness = false
+
 [[bench]]
 name = "store"
 harness = false
diff --git a/benches/merkle.rs b/benches/merkle.rs
new file mode 100644
index 00000000..5bd434bb
--- /dev/null
+++ b/benches/merkle.rs
@@ -0,0 +1,59 @@
+use std::{hint, mem, time::Duration};
+
+use criterion::{criterion_group, criterion_main, BatchSize, Criterion};
+use miden_crypto::{merkle::MerkleTree, Felt, Word, ONE};
+use rand_utils::prng_array;
+
+fn balanced_merkle_even(c: &mut Criterion) {
+    c.bench_function("balanced-merkle-even", |b| {
+        b.iter_batched(
+            || {
+                let entries: Vec<Word> =
+                    (0..256).map(|i| [Felt::new(i), ONE, ONE, Felt::new(i)]).collect();
+                assert_eq!(entries.len(), 256);
+                entries
+            },
+            |leaves| {
+                let tree = MerkleTree::new(hint::black_box(leaves)).unwrap();
+                assert_eq!(tree.depth(), 8);
+            },
+            BatchSize::SmallInput,
+        );
+    });
+}
+
+fn balanced_merkle_rand(c: &mut Criterion) {
+    let mut seed = [0u8; 32];
+    c.bench_function("balanced-merkle-rand", |b| {
+        b.iter_batched(
+            || {
+                let entries: Vec<Word> = (0..256).map(|_| generate_word(&mut seed)).collect();
+                assert_eq!(entries.len(), 256);
+                entries
+            },
+            |leaves| {
+                let tree = MerkleTree::new(hint::black_box(leaves)).unwrap();
+                assert_eq!(tree.depth(), 8);
+            },
+            BatchSize::SmallInput,
+        );
+    });
+}
+
+criterion_group! {
+    name = smt_subtree_group;
+    config = Criterion::default()
+        .measurement_time(Duration::from_secs(20))
+        .configure_from_args();
+    targets = balanced_merkle_even, balanced_merkle_rand
+}
+criterion_main!(smt_subtree_group);
+
+// HELPER FUNCTIONS
+// --------------------------------------------------------------------------------------------
+
+fn generate_word(seed: &mut [u8; 32]) -> Word {
+    mem::swap(seed, &mut prng_array(*seed));
+    let nums: [u64; 4] = prng_array(*seed);
+    [Felt::new(nums[0]), Felt::new(nums[1]), Felt::new(nums[2]), Felt::new(nums[3])]
+}