Skip to content

Commit

Permalink
start slots witnesses with known capacities (#889)
Browse files Browse the repository at this point in the history
  • Loading branch information
arthurpaulino authored Nov 15, 2023
1 parent 4543009 commit 030fc4f
Show file tree
Hide file tree
Showing 4 changed files with 107 additions and 8 deletions.
6 changes: 5 additions & 1 deletion src/field.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
//! relating this field to the expresions of the language.
use clap::ValueEnum;
use ff::{PrimeField, PrimeFieldBits};
use nova::provider::bn256_grumpkin::bn256;
use nova::provider::bn256_grumpkin::{bn256, grumpkin};
use serde::{Deserialize, Serialize};
use std::convert::TryFrom;
use std::hash::Hash;
Expand Down Expand Up @@ -272,6 +272,10 @@ impl LurkField for bn256::Scalar {
const FIELD: LanguageField = LanguageField::BN256;
}

impl LurkField for grumpkin::Scalar {
const FIELD: LanguageField = LanguageField::Grumpkin;
}

// The impl LurkField for grumpkin::Scalar is technically possible, but voluntarily omitted to avoid confusion.

// For working around the orphan trait impl rule
Expand Down
8 changes: 4 additions & 4 deletions src/lem/circuit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1594,10 +1594,10 @@ impl Func {
};

// fixed cost for each slot
let slot_constraints = 289 * self.slots_count.hash4
+ 337 * self.slots_count.hash6
+ 388 * self.slots_count.hash8
+ 265 * self.slots_count.commitment
let slot_constraints = store.hash4_cost() * self.slots_count.hash4
+ store.hash6_cost() * self.slots_count.hash6
+ store.hash8_cost() * self.slots_count.hash8
+ store.hash3_cost() * self.slots_count.commitment
+ bit_decomp_cost * self.slots_count.bit_decomp;
let num_constraints = recurse(&self.body, globals, store, false);
slot_constraints + num_constraints + globals.len()
Expand Down
75 changes: 72 additions & 3 deletions src/lem/multiframe.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use crate::{
coprocessor::Coprocessor,
error::{ProofError, ReductionError},
eval::{lang::Lang, Meta},
field::LurkField,
field::{LanguageField, LurkField},
proof::{
nova::{CurveCycleEquipped, G1, G2},
supernova::{FoldingConfig, C2},
Expand Down Expand Up @@ -140,6 +140,30 @@ fn assert_eq_ptrs_aptrs<F: LurkField>(
Ok(())
}

// Hardcoded slot witness sizes, empirically collected
const BIT_DECOMP_PALLAS_WITNESS_SIZE: usize = 298;
const BIT_DECOMP_VESTA_WITNESS_SIZE: usize = 301;
const BIT_DECOMP_BN256_WITNESS_SIZE: usize = 354;
const BIT_DECOMP_GRUMPKIN_WITNESS_SIZE: usize = 364;

/// Computes the witness size for a `SlotType`. Note that the witness size for
/// bit decomposition depends on the field we're in.
#[inline]
fn compute_witness_size<F: LurkField>(slot_type: &SlotType, store: &Store<F>) -> usize {
match slot_type {
SlotType::Hash4 => store.hash4_cost() + 4, // 4 preimg elts
SlotType::Hash6 => store.hash6_cost() + 6, // 6 preimg elts
SlotType::Hash8 => store.hash8_cost() + 8, // 8 preimg elts
SlotType::Commitment => store.hash3_cost() + 3, // 3 preimg elts
SlotType::BitDecomp => match F::FIELD {
LanguageField::Pallas => BIT_DECOMP_PALLAS_WITNESS_SIZE,
LanguageField::Vesta => BIT_DECOMP_VESTA_WITNESS_SIZE,
LanguageField::BN256 => BIT_DECOMP_BN256_WITNESS_SIZE,
LanguageField::Grumpkin => BIT_DECOMP_GRUMPKIN_WITNESS_SIZE,
},
}
}

/// Generates the witnesses for all slots in `frames`. Since many slots are fed
/// with dummy data, we cache their (dummy) witnesses for extra speed
fn generate_slots_witnesses<F: LurkField>(
Expand All @@ -160,11 +184,25 @@ fn generate_slots_witnesses<F: LurkField>(
.into_iter()
.for_each(|(sd_vec, st)| sd_vec.iter().for_each(|sd| slots_data.push((sd, st))));
});
// precompute these values
let hash4_witness_size = compute_witness_size(&SlotType::Hash4, store);
let hash6_witness_size = compute_witness_size(&SlotType::Hash6, store);
let hash8_witness_size = compute_witness_size(&SlotType::Hash8, store);
let commitment_witness_size = compute_witness_size(&SlotType::Commitment, store);
let bit_decomp_witness_size = compute_witness_size(&SlotType::BitDecomp, store);
// fast getter for the precomputed values
let get_witness_size = |slot_type| match slot_type {
SlotType::Hash4 => hash4_witness_size,
SlotType::Hash6 => hash6_witness_size,
SlotType::Hash8 => hash8_witness_size,
SlotType::Commitment => commitment_witness_size,
SlotType::BitDecomp => bit_decomp_witness_size,
};
// cache dummy slots witnesses with `Arc` for speedy clones
let dummy_witnesses_cache: FrozenMap<_, Box<Arc<SlotWitness<F>>>> = FrozenMap::default();
let gen_slot_witness = |(slot_idx, (slot_data, slot_type))| {
let mk_witness = || {
let mut witness = WitnessCS::new();
let mut witness = WitnessCS::with_capacity(1, get_witness_size(slot_type));
let allocations = allocate_slot(&mut witness, slot_data, slot_idx, slot_type, store)
.expect("slot allocations failed");
Arc::new(SlotWitness {
Expand Down Expand Up @@ -896,7 +934,8 @@ where
#[cfg(test)]
mod tests {
use bellpepper_core::test_cs::TestConstraintSystem;
use pasta_curves::Fq;
use nova::provider::bn256_grumpkin::{bn256::Scalar as Bn, grumpkin::Scalar as Gr};
use pasta_curves::{Fp, Fq};

use crate::{
eval::lang::Coproc,
Expand All @@ -908,6 +947,36 @@ mod tests {

use super::*;

/// Asserts that the computed witness sizes are correct across all slot types
/// and fields used in Lurk
#[test]
fn test_get_witness_size() {
fn assert_sizes<F: LurkField>() {
[
SlotType::Hash4,
SlotType::Hash6,
SlotType::Hash8,
SlotType::Commitment,
SlotType::BitDecomp,
]
.into_par_iter()
.for_each(|slot_type| {
let store = Store::<F>::default();
let mut w = WitnessCS::<F>::new();
let computed_size = compute_witness_size::<F>(&slot_type, &store);
allocate_slot(&mut w, &None, 0, slot_type, &store).unwrap();
assert_eq!(w.aux_assignment().len(), computed_size);
});
}
(0..3).into_par_iter().for_each(|i| match i {
0 => assert_sizes::<Fp>(),
1 => assert_sizes::<Fq>(),
2 => assert_sizes::<Gr>(),
3 => assert_sizes::<Bn>(),
_ => unreachable!(),
});
}

#[test]
fn test_sequential_and_parallel_witnesses_equivalences() {
let lurk_step = eval_step();
Expand Down
26 changes: 26 additions & 0 deletions src/lem/store.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
use anyhow::{bail, Result};
use arc_swap::ArcSwap;
use bellpepper::util_cs::witness_cs::SizedWitness;
use elsa::{
sync::index_set::FrozenIndexSet,
sync::{FrozenMap, FrozenVec},
};
use indexmap::IndexSet;
use neptune::Poseidon;
use nom::{sequence::preceded, Parser};
use rayon::prelude::{IntoParallelRefIterator, ParallelIterator};
use std::{cell::RefCell, rc::Rc, sync::Arc};
Expand Down Expand Up @@ -102,6 +104,30 @@ impl<F: LurkField> Default for Store<F> {
}

impl<F: LurkField> Store<F> {
/// Cost of poseidon hash with arity 3, including the input
#[inline]
pub fn hash3_cost(&self) -> usize {
Poseidon::new(self.poseidon_cache.constants.c3()).num_aux() + 1
}

/// Cost of poseidon hash with arity 4, including the input
#[inline]
pub fn hash4_cost(&self) -> usize {
Poseidon::new(self.poseidon_cache.constants.c4()).num_aux() + 1
}

/// Cost of poseidon hash with arity 6, including the input
#[inline]
pub fn hash6_cost(&self) -> usize {
Poseidon::new(self.poseidon_cache.constants.c6()).num_aux() + 1
}

/// Cost of poseidon hash with arity 8, including the input
#[inline]
pub fn hash8_cost(&self) -> usize {
Poseidon::new(self.poseidon_cache.constants.c8()).num_aux() + 1
}

/// Creates a `Ptr` that's a parent of two children
pub fn intern_2_ptrs(&self, tag: Tag, a: Ptr<F>, b: Ptr<F>) -> Ptr<F> {
let (idx, inserted) = self.tuple2.insert_probe(Box::new((a, b)));
Expand Down

1 comment on commit 030fc4f

@github-actions
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmarks

Table of Contents

Overview

This benchmark report shows the Fibonacci GPU benchmark.
Tesla T4
Intel(R) Xeon(R) CPU @ 2.30GHz
117.905 GB RAM

Benchmark Results

LEM Fibonacci Prove - rc = 100

fib-ref=b65ded07d57e2df78c83a39e5ee9dabdd74fad2b fib-ref=030fc4fc64538dfd693fc4d05d7619e378d8aa63
num-100 5.91 s (✅ 1.00x) 5.93 s (✅ 1.00x slower)
num-200 14.63 s (✅ 1.00x) 12.39 s (✅ 1.18x faster)

LEM Fibonacci Prove - rc = 600

fib-ref=b65ded07d57e2df78c83a39e5ee9dabdd74fad2b fib-ref=030fc4fc64538dfd693fc4d05d7619e378d8aa63
num-100 5.54 s (✅ 1.00x) 5.53 s (✅ 1.00x faster)
num-200 12.44 s (✅ 1.00x) 12.37 s (✅ 1.01x faster)

Made with criterion-table

Please sign in to comment.