Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Compute expected ancestry proof size #6

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
[package]
name = "polkadot-ckb-merkle-mountain-range"
version = "0.6.0"
version = "0.8.0"
authors = [
"Nervos Core Dev <dev@nervos.org>",
"Parity Technologies <admin@parity.io>",
"Robert Hambrock <roberthambrock@gmail.com>"
"Nervos Core Dev <dev@nervos.org>",
"Parity Technologies <admin@parity.io>",
"Robert Hambrock <roberthambrock@gmail.com>"
]

edition = "2018"
Expand All @@ -18,7 +18,7 @@ std = []

[dependencies]
cfg-if = "1.0"
itertools = {version = "0.10.5", default-features = false, features = ["use_alloc"]}
itertools = { version = "0.10.5", default-features = false, features = ["use_alloc"] }

[dev-dependencies]
faster-hex = "0.8.0"
Expand Down
173 changes: 88 additions & 85 deletions src/ancestry_proof.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use crate::collections::VecDeque;
use crate::helper::{
get_peak_map, get_peaks, is_descendant_pos, leaf_index_to_pos, parent_offset,
pos_height_in_tree, sibling_offset,
get_peak_map, get_peaks, leaf_index_to_pos, parent_offset, pos_height_in_tree, sibling_offset,
};
use crate::mmr::{bagging_peaks_hashes, take_while_vec};
pub use crate::mmr::bagging_peaks_hashes;
use crate::mmr::take_while_vec;
use crate::util::BTreeMapExt;
use crate::vec::Vec;
use crate::BTreeMap;
use crate::{Error, Merge, Result};
use core::fmt::Debug;
use core::marker::PhantomData;
Expand All @@ -31,15 +32,12 @@ impl<T: PartialEq + Debug + Clone, M: Merge<Item = T>> AncestryProof<T, M> {
if current_leaves_count <= self.prev_peaks.len() as u64 {
return Err(Error::CorruptedProof);
}
// Test if previous root is correct.
let prev_peaks_positions = {
let prev_peaks_positions = get_peaks(self.prev_mmr_size);
if prev_peaks_positions.len() != self.prev_peaks.len() {
return Err(Error::CorruptedProof);
}
prev_peaks_positions
};

// Test if previous root is correct.
let prev_peaks_positions = get_peaks(self.prev_mmr_size);
if prev_peaks_positions.len() != self.prev_peaks.len() {
return Err(Error::CorruptedProof);
}
let calculated_prev_root = bagging_peaks_hashes::<T, M>(self.prev_peaks.clone())?;
if calculated_prev_root != prev_root {
return Ok(false);
Expand Down Expand Up @@ -74,8 +72,8 @@ impl<T: Clone + PartialEq, M: Merge<Item = T>> NodeMerkleProof<T, M> {
&self.proof
}

pub fn calculate_root(&self, leaves: Vec<(u64, T)>) -> Result<T> {
calculate_root::<_, M, _>(leaves, self.mmr_size, self.proof.iter())
pub fn calculate_root(&self, nodes: Vec<(u64, T)>) -> Result<T> {
calculate_root::<_, M, _>(nodes, self.mmr_size, self.proof.iter())
}

/// from merkle proof of leaf n to calculate merkle root of n + 1 leaves.
Expand Down Expand Up @@ -140,105 +138,71 @@ fn calculate_peak_root<
'a,
T: 'a + PartialEq,
M: Merge<Item = T>,
// I: Iterator<Item = &'a T>
// I: Iterator<Item = &'a (u64, T)>,
>(
nodes: Vec<(u64, T)>,
peak_pos: u64,
// proof_iter: &mut I,
) -> Result<T> {
debug_assert!(!nodes.is_empty(), "can't be empty");
// (position, hash, height)

let mut queue: VecDeque<_> = nodes
.into_iter()
.map(|(pos, item)| (pos, item, pos_height_in_tree(pos)))
.collect();

let mut sibs_processed_from_back = Vec::new();
let mut queue = BTreeMap::new();
for (pos, item) in nodes.into_iter() {
if !queue.checked_insert((pos_height_in_tree(pos), pos), item) {
return Err(Error::CorruptedProof);
}
}

// calculate tree root from each items
while let Some((pos, item, height)) = queue.pop_front() {
while let Some(((height, pos), item)) = queue.pop_first() {
if pos == peak_pos {
if queue.is_empty() {
// return root once queue is consumed
return Ok(item);
}
if queue
.iter()
.any(|entry| entry.0 == peak_pos && entry.1 != item)
{
} else {
return Err(Error::CorruptedProof);
}
if queue
.iter()
.all(|entry| entry.0 == peak_pos && &entry.1 == &item && entry.2 == height)
{
// return root if remaining queue consists only of duplicate root entries
return Ok(item);
}
// if queue not empty, push peak back to the end
queue.push_back((pos, item, height));
continue;
}
// calculate sibling
let next_height = pos_height_in_tree(pos + 1);
let (parent_pos, parent_item) = {
let sibling_offset = sibling_offset(height);
if next_height > height {
// implies pos is right sibling
let (sib_pos, parent_pos) = (pos - sibling_offset, pos + 1);
let parent_item = if Some(&sib_pos) == queue.front().map(|(pos, _, _)| pos) {
let sibling_item = queue.pop_front().map(|(_, item, _)| item).unwrap();
M::merge(&sibling_item, &item)?
} else if Some(&sib_pos) == queue.back().map(|(pos, _, _)| pos) {
let sibling_item = queue.pop_back().map(|(_, item, _)| item).unwrap();
M::merge(&sibling_item, &item)?
}
// handle special if next queue item is descendant of sibling
else if let Some(&(front_pos, ..)) = queue.front() {
if height > 0 && is_descendant_pos(sib_pos, front_pos) {
queue.push_back((pos, item, height));
continue;
let sib_pos = pos - sibling_offset;
let parent_pos = pos + 1;
let parent_item =
if Some(&sib_pos) == queue.first_key_value().map(|((_, pos), _)| pos) {
let sibling_item = queue.pop_first().map(|((_, _), item)| item).unwrap();
M::merge(&sibling_item, &item)?
} else {
return Err(Error::CorruptedProof);
}
} else {
return Err(Error::CorruptedProof);
};
// Old `mmr.rs` code. It's not needed anymore since now we merge the `proof_iter`
// items with the nodes.
// let sibling_item = &proof_iter.next().ok_or(Error::CorruptedProof)?.1;
// M::merge(sibling_item, &item)?
};
(parent_pos, parent_item)
} else {
// pos is left sibling
let (sib_pos, parent_pos) = (pos + sibling_offset, pos + parent_offset(height));
let parent_item = if Some(&sib_pos) == queue.front().map(|(pos, _, _)| pos) {
let sibling_item = queue.pop_front().map(|(_, item, _)| item).unwrap();
M::merge(&item, &sibling_item)?
} else if Some(&sib_pos) == queue.back().map(|(pos, _, _)| pos) {
let sibling_item = queue.pop_back().map(|(_, item, _)| item).unwrap();
let parent = M::merge(&item, &sibling_item)?;
sibs_processed_from_back.push((sib_pos, sibling_item, height));
parent
} else if let Some(&(front_pos, ..)) = queue.front() {
if height > 0 && is_descendant_pos(sib_pos, front_pos) {
queue.push_back((pos, item, height));
continue;
let sib_pos = pos + sibling_offset;
let parent_pos = pos + parent_offset(height);
let parent_item =
if Some(&sib_pos) == queue.first_key_value().map(|((_, pos), _)| pos) {
let sibling_item = queue.pop_first().map(|((_, _), item)| item).unwrap();
M::merge(&item, &sibling_item)?
} else {
return Err(Error::CorruptedProof);
}
} else {
return Err(Error::CorruptedProof);
};
// Old `mmr.rs` code. It's not needed anymore since now we merge the `proof_iter`
// items with the nodes.
// let sibling_item = &proof_iter.next().ok_or(Error::CorruptedProof)?.1;
// M::merge(&item, sibling_item)?
};
(parent_pos, parent_item)
}
};

if parent_pos <= peak_pos {
let parent = (parent_pos, parent_item, height + 1);
if peak_pos == parent_pos
|| queue.front() != Some(&parent)
&& !sibs_processed_from_back.iter().any(|item| item == &parent)
{
queue.push_front(parent)
};
if !queue.checked_insert((height + 1, parent_pos), parent_item) {
return Err(Error::CorruptedProof);
}
} else {
return Err(Error::CorruptedProof);
}
Expand Down Expand Up @@ -266,7 +230,6 @@ fn calculate_peaks_hashes<
.into_iter()
.chain(proof_iter.cloned())
.sorted_by_key(|(pos, _)| *pos)
.dedup_by(|a, b| a.0 == b.0)
.collect();

let peaks = get_peaks(mmr_size);
Expand All @@ -293,11 +256,13 @@ fn calculate_peaks_hashes<
return Err(Error::CorruptedProof);
}

// check rhs peaks
// Old `mmr.rs` code. It's not needed anymore since now we merge the `proof_iter`
// items with the nodes.
// // check rhs peaks
// if let Some((_, rhs_peaks_hashes)) = proof_iter.next() {
// peaks_hashes.push(rhs_peaks_hashes.clone());
// }
// ensure nothing left in proof_iter
// // ensure nothing left in proof_iter
// if proof_iter.next().is_some() {
// return Err(Error::CorruptedProof);
// }
Expand All @@ -321,3 +286,41 @@ fn calculate_root<
let peaks_hashes = calculate_peaks_hashes::<_, M, _>(nodes, mmr_size, proof_iter)?;
bagging_peaks_hashes::<_, M>(peaks_hashes)
}

pub fn expected_ancestry_proof_size(prev_mmr_size: u64, mmr_size: u64) -> usize {
acatangiu marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

given the title of the PR, this seems like the main change in the PR. however, the rest seems like a totally unrelated rewriting of algorithm/refactoring. any reason to mix the two in one PR? or is it not a refactoring and actually this function depends on it?

also, what is the complexity of this function (seems non-trivial to guesstimate given nested loops)

Copy link
Author

@serban300 serban300 Aug 26, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

given the title of the PR, this seems like the main change in the PR.

Yes, exactly.

however, the rest seems like a totally unrelated rewriting of algorithm/refactoring. any reason to mix the two in one PR? or is it not a refactoring and actually this function depends on it?

Yes, it's just a refactoring. No particular reason. Just did it for readability. I tried to follow the previous implementation to understand how to compute the expected proof size and it was just very hard. So I thought it might be helpful to refactor that as well. Also if we're doing a security audit for this PR, maybe it would be good to cover both changes. Also we'll need to upstream ancestry proofs at some point, and this change would help there, because it makes the code more similar to the upstream.

also, what is the complexity of this function (seems non-trivial to guesstimate given nested loops)

Worst case scenario, for each peak we need to walk from a node inside it to the peak height. So should be something like O (num_peaks * log(num_leafs))

let mut expected_proof_size: usize = 0;
let mut prev_peaks = get_peaks(prev_mmr_size);
let peaks = get_peaks(mmr_size);

for (peak_idx, peak) in peaks.iter().enumerate() {
let mut local_prev_peaks: Vec<u64> = take_while_vec(&mut prev_peaks, |pos| *pos <= *peak);

// Pop lowest local prev peak under the current peak
match local_prev_peaks.pop() {
Some(mut node) => {
let mut height = pos_height_in_tree(node);
while node < *peak {
if pos_height_in_tree(node + 1) > height {
// Node is right sibling
node += 1;
height += 1;
} else {
// Node is left sibling
expected_proof_size += 1;
node += parent_offset(height);
height += 1;
}
}
}
None => {
if peak_idx <= peaks.len() {
// Account for rhs bagging peaks
expected_proof_size += 1;
}
break;
}
}
}

expected_proof_size
Comment on lines +291 to +325

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This works by checking the frequency of left-sibling-hood when iterating up from the last prev_peak.
In some sense, it's a myopic climber ascending a dark mountain and feeling out its shape from relative location :P
The mountain peaks here are the co-path items for proving said last prev_peak.
We can also determine the exact shape of the mountain from just knowing its width (i.e. the number of leaves beneath it) though, and skip the climbing step.

I've opened up #7 to show the changes, but feel free to cherry pick the new algorithm over here instead.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you ! Sounds good ! I'll take a look.

}
10 changes: 0 additions & 10 deletions src/helper.rs
Original file line number Diff line number Diff line change
Expand Up @@ -75,16 +75,6 @@ pub fn get_peak_map(mmr_size: u64) -> u64 {
peak_map
}

/// Returns whether `descendant_contender` is a descendant of `ancestor_contender` in a tree of the MMR.
pub fn is_descendant_pos(ancestor_contender: u64, descendant_contender: u64) -> bool {
// NOTE: "ancestry" here refers to the hierarchy within an MMR tree, not temporal hierarchy.
// the descendant needs to have been added to the mmr prior to the ancestor
descendant_contender <= ancestor_contender
// the descendant needs to be within the cone of positions descendant from the ancestor
&& descendant_contender
>= (ancestor_contender + 1 - sibling_offset(pos_height_in_tree(ancestor_contender)))
}

/// Returns the pos of the peaks in the mmr.
/// for example, for a mmr with 11 leaves, the mmr_size is 19, it will return [14, 17, 18].
/// 14
Expand Down
6 changes: 6 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,17 @@ cfg_if::cfg_if! {
use std::collections;
use std::vec;
use std::string;
use std::collections::BTreeSet;
use std::collections::BTreeMap;
use std::collections::btree_map::Entry as BTreeMapEntry;
} else {
extern crate alloc;
use alloc::borrow;
use alloc::collections;
use alloc::vec;
use alloc::string;
use alloc::collections::btree_set::BTreeSet;
use alloc::collections::btree_map::BTreeMap;
use alloc::collections::btree_map::Entry as BTreeMapEntry;
}
}
17 changes: 8 additions & 9 deletions src/mmr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,12 @@ use crate::helper::{
pos_height_in_tree, sibling_offset,
};
use crate::mmr_store::{MMRBatch, MMRStoreReadOps, MMRStoreWriteOps};
use crate::util::VeqDequeExt;
use crate::vec;
use crate::vec::Vec;
use crate::BTreeSet;
use crate::{Error, Merge, Result};
use core::fmt::Debug;
use core::marker::PhantomData;

#[allow(clippy::upper_case_acronyms)]
pub struct MMR<T, M, S> {
mmr_size: u64,
Expand Down Expand Up @@ -241,13 +240,13 @@ impl<T: Clone + PartialEq, M: Merge<Item = T>, S: MMRStoreReadOps<T>> MMR<T, M,
return Ok(());
}

let mut queue: VecDeque<_> = VecDeque::new();
let mut queue = BTreeSet::new();
for value in pos_list.iter().map(|pos| (pos_height_in_tree(*pos), *pos)) {
queue.insert_sorted(value);
queue.insert(value);
}

// Generate sub-tree merkle proof for positions
while let Some((height, pos)) = queue.pop_front() {
while let Some((height, pos)) = queue.pop_first() {
debug_assert!(pos <= peak_pos);
if pos == peak_pos {
if queue.is_empty() {
Expand All @@ -270,9 +269,9 @@ impl<T: Clone + PartialEq, M: Merge<Item = T>, S: MMRStoreReadOps<T>> MMR<T, M,
}
};

if Some(&sib_pos) == queue.front().map(|(_, pos)| pos) {
if Some(&sib_pos) == queue.first().map(|(_, pos)| pos) {
// drop sibling
queue.pop_front();
queue.pop_first();
} else {
let sibling = (
sib_pos,
Expand All @@ -285,7 +284,7 @@ impl<T: Clone + PartialEq, M: Merge<Item = T>, S: MMRStoreReadOps<T>> MMR<T, M,
}
if parent_pos < peak_pos {
// save pos to tree buf
queue.insert_sorted((height + 1, parent_pos));
queue.insert((height + 1, parent_pos));
}
}
Ok(())
Expand Down Expand Up @@ -692,7 +691,7 @@ fn calculate_peaks_hashes<'a, T: 'a + Clone, M: Merge<Item = T>, I: Iterator<Ite
Ok(peaks_hashes)
}

pub(crate) fn bagging_peaks_hashes<T, M: Merge<Item = T>>(mut peaks_hashes: Vec<T>) -> Result<T> {
pub fn bagging_peaks_hashes<T, M: Merge<Item = T>>(mut peaks_hashes: Vec<T>) -> Result<T> {
// bagging peaks
// bagging from right to left via hash(right, left).
while peaks_hashes.len() > 1 {
Expand Down
7 changes: 6 additions & 1 deletion src/tests/test_ancestry.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use super::{MergeNumberHash, NumberHash};
use crate::ancestry_proof::expected_ancestry_proof_size;
use crate::leaf_index_to_mmr_size;
use crate::util::{MemMMR, MemStore};

Expand All @@ -7,7 +8,7 @@ fn test_ancestry() {
let store = MemStore::default();
let mut mmr = MemMMR::<_, MergeNumberHash>::new(0, &store);

let mmr_size = 300;
let mmr_size = 5000;
let mut prev_roots = Vec::new();
for i in 0..mmr_size {
mmr.push(NumberHash::from(i)).unwrap();
Expand All @@ -21,5 +22,9 @@ fn test_ancestry() {
assert!(ancestry_proof
.verify_ancestor(root.clone(), prev_roots[i as usize].clone())
.unwrap());
assert_eq!(
expected_ancestry_proof_size(ancestry_proof.prev_mmr_size, mmr.mmr_size()),
ancestry_proof.prev_peaks_proof.proof_items().len()
);
}
}
Loading