Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: simple sparse commitment #7488

Merged
merged 23 commits into from
Jul 23, 2024
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
7811d58
Add commitment timing to bench
ledwards2225 Jul 10, 2024
958a682
constructing an srs view and computing commitments on reduced inputs
ledwards2225 Jul 11, 2024
c62842b
add commit bench from my other branch
ledwards2225 Jul 15, 2024
6e940fd
clean up commit bench a bit
ledwards2225 Jul 15, 2024
170ac63
commit_sparse method with test and benchmark
ledwards2225 Jul 15, 2024
c8055e0
multithreaded commit sparse
ledwards2225 Jul 15, 2024
2cdd476
use comit sparse in oink
ledwards2225 Jul 15, 2024
e8c3002
Merge branch 'master' into lde/analyze_commitments
ledwards2225 Jul 15, 2024
5b3926a
cleanup
ledwards2225 Jul 16, 2024
df99d1d
clean out comit key test suite
ledwards2225 Jul 16, 2024
0696f08
fix gcc
ledwards2225 Jul 16, 2024
75fbea8
improve commit sparse and add comments
ledwards2225 Jul 16, 2024
a339ff5
clean up commit bench
ledwards2225 Jul 16, 2024
9fecf59
Merge branch 'master' into lde/analyze_commitments
ledwards2225 Jul 16, 2024
d89771f
clean and comment
ledwards2225 Jul 16, 2024
ff801fe
fix endo point from debugging
ledwards2225 Jul 16, 2024
6139b4a
update test
ledwards2225 Jul 16, 2024
cd3ed2f
Merge branch 'master' into lde/analyze_commitments
ledwards2225 Jul 16, 2024
e503e55
cleanup and naming
ledwards2225 Jul 16, 2024
1789be2
Merge branch 'master' into lde/analyze_commitments
ledwards2225 Jul 17, 2024
982480d
update commit bench with larger polys
ledwards2225 Jul 18, 2024
199faca
Merge branch 'master' into lde/analyze_commitments
ledwards2225 Jul 22, 2024
3ee1bab
reserve space in vectors
ledwards2225 Jul 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 44 additions & 23 deletions barretenberg/cpp/scripts/analyze_client_ivc_bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,35 @@
print(f"{key:<{max_label_length}}{time_ms:>8.0f} {time_ms/total_time_ms:>8.2%}")


# Relations breakdown
# Note: The timings here are off likely because the tracking is occuring in a hot loop but
# they should be meaningful relative to one another
print('\nRelation contributions (times to be interpreted relatively):')
# Extract a set of components from the benchmark data and display timings and relative percentages
def print_contributions(prefix, ivc_bench_json, bench_name, components):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This was logic previously used only to process bench data for the Relations but I wanted to reuse it for commitments so I just made it a method


# Read JSON file and extract benchmark
try:
with open(prefix / ivc_bench_json, "r") as read_file:
read_result = json.load(read_file)
bench = next((_bench for _bench in read_result["benchmarks"] if _bench["name"] == bench_name), None)
if not bench:
raise ValueError(f"Benchmark '{bench_name}' not found in the JSON file.")
except FileNotFoundError:
print(f"File not found: {prefix / ivc_bench_json}")
return

# Filter and sum up kept times
bench_components = {key: bench[key] for key in components if key in bench}
sum_of_kept_times_ms = sum(float(time) for time in bench_components.values()) / 1e6
print(f"Total time accounted for (ms): {sum_of_kept_times_ms:>8.0f}")

# Print results
max_label_length = max(len(label) for label in components)
column_headers = {"operation": "operation", "ms": "ms", "%": "% sum"}
print(f"{column_headers['operation']:<{max_label_length}}{column_headers['ms']:>8} {column_headers['%']:>8}")

for key in components:
time_ms = bench_components.get(key, 0) / 1e6
percentage = time_ms / sum_of_kept_times_ms if sum_of_kept_times_ms > 0 else 0
print(f"{key:<{max_label_length}}{time_ms:>8.0f} {percentage:>8.2%}")

relations = [
"Arithmetic::accumulate(t)",
"Permutation::accumulate(t)",
Expand All @@ -87,23 +112,19 @@
"PoseidonExt::accumulate(t)",
"PoseidonInt::accumulate(t)",
]
with open(PREFIX/IVC_BENCH_JSON, "r") as read_file:
read_result = json.load(read_file)
for _bench in read_result["benchmarks"]:
if _bench["name"] == BENCHMARK:
bench = _bench
bench_components = dict(filter(lambda x: x[0] in relations, bench.items()))

# For each kept time, get the proportion over all kept times.
sum_of_kept_times_ms = sum(float(time)
for _, time in bench_components.items())/1e6
max_label_length = max(len(label) for label in relations)
column = {"function": "function", "ms": "ms", "%": "% sum"}
print(
f"{column['function']:<{max_label_length}}{column['ms']:>8} {column['%']:>8}")
for key in relations:
if key not in bench:
time_ms = 0
else:
time_ms = bench[key]/1e6
print(f"{key:<{max_label_length}}{time_ms:>8.0f} {time_ms/sum_of_kept_times_ms:>8.2%}")
print('\nRelation contributions (times to be interpreted relatively):')
print_contributions(PREFIX, IVC_BENCH_JSON, BENCHMARK, relations)

commitments = [
"COMMIT::wires(t)",
"COMMIT::z_perm(t)",
"COMMIT::databus(t)",
"COMMIT::ecc_op_wires(t)",
"COMMIT::lookup_inverses(t)",
"COMMIT::databus_inverses(t)",
"COMMIT::lookup_counts_tags(t)",
]

print('\nCommitment contributions:')
print_contributions(PREFIX, IVC_BENCH_JSON, BENCHMARK, commitments)
124 changes: 118 additions & 6 deletions barretenberg/cpp/src/barretenberg/commitment_schemes/commit.bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,25 +7,137 @@ namespace bb {

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This suite was previously just benchmarking committing to zero polynomials of various sizes. I'm assuming it was just a WiP and never used but who knows. I've updated it to include a number of different scenarios, including committing various types of sparse polynomials with the traditional commit() and the new commit_sparse()

template <typename Curve> std::shared_ptr<CommitmentKey<Curve>> create_commitment_key(const size_t num_points)
{
bb::srs::init_crs_factory("../srs_db/ignition");
std::string srs_path;
return std::make_shared<CommitmentKey<Curve>>(num_points);
}

constexpr size_t MAX_LOG_NUM_POINTS = 24;
constexpr size_t MAX_NUM_POINTS = 1 << MAX_LOG_NUM_POINTS;
// Generate a polynomial with a specified number of nonzero random coefficients
template <typename FF> Polynomial<FF> sparse_random_poly(const size_t size, const size_t num_nonzero)
{
auto& engine = numeric::get_debug_randomness();
auto polynomial = Polynomial<FF>(size);

for (size_t i = 0; i < num_nonzero; i++) {
size_t idx = engine.get_random_uint32() % size;
polynomial[idx] = FF::random_element();
}

auto key = create_commitment_key<curve::BN254>(MAX_NUM_POINTS);
return polynomial;
}

constexpr size_t MAX_LOG_NUM_POINTS = 18;
constexpr size_t MAX_NUM_POINTS = 1 << MAX_LOG_NUM_POINTS;
constexpr size_t SPARSE_NUM_NONZERO = 5;

template <typename Curve> void bench_commit(::benchmark::State& state)
// Commit to a zero polynomial
template <typename Curve> void bench_commit_zero(::benchmark::State& state)
{
auto key = create_commitment_key<Curve>(MAX_NUM_POINTS);

const size_t num_points = 1 << state.range(0);
const auto polynomial = Polynomial<typename Curve::ScalarField>(num_points);
for (auto _ : state) {
benchmark::DoNotOptimize(key->commit(polynomial));
key->commit(polynomial);
}
}

// Commit to a polynomial with sparse nonzero entries equal to 1
template <typename Curve> void bench_commit_sparse(::benchmark::State& state)
{
using Fr = typename Curve::ScalarField;
auto key = create_commitment_key<Curve>(MAX_NUM_POINTS);

const size_t num_points = 1 << state.range(0);
const size_t num_nonzero = SPARSE_NUM_NONZERO;

auto polynomial = Polynomial<Fr>(num_points);
for (size_t i = 0; i < num_nonzero; i++) {
polynomial[i] = 1;
}

for (auto _ : state) {
key->commit(polynomial);
}
}

// Commit to a polynomial with sparse nonzero entries equal to 1 using the commit_sparse method to preprocess the input
template <typename Curve> void bench_commit_sparse_preprocessed(::benchmark::State& state)
{
using Fr = typename Curve::ScalarField;
auto key = create_commitment_key<Curve>(MAX_NUM_POINTS);

const size_t num_points = 1 << state.range(0);
const size_t num_nonzero = SPARSE_NUM_NONZERO;

auto polynomial = Polynomial<Fr>(num_points);
for (size_t i = 0; i < num_nonzero; i++) {
polynomial[i] = 1;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe a follow-on question: if these are at random locations, do we have the same performance?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For the random sparse polys (i.e. the ones for which I was showing bench results) the locations are randomized, but I did not see a difference before and after randomizing the locations.

}

for (auto _ : state) {
key->commit_sparse(polynomial);
}
}

// Commit to a polynomial with sparse random nonzero entries
template <typename Curve> void bench_commit_sparse_random(::benchmark::State& state)
{
using Fr = typename Curve::ScalarField;
auto key = create_commitment_key<Curve>(MAX_NUM_POINTS);

const size_t num_points = 1 << state.range(0);
const size_t num_nonzero = SPARSE_NUM_NONZERO;

auto polynomial = sparse_random_poly<Fr>(num_points, num_nonzero);

for (auto _ : state) {
key->commit(polynomial);
}
}

// Commit to a polynomial with sparse random nonzero entries using the commit_sparse method to preprocess the input
template <typename Curve> void bench_commit_sparse_random_preprocessed(::benchmark::State& state)
{
using Fr = typename Curve::ScalarField;
auto key = create_commitment_key<Curve>(MAX_NUM_POINTS);

const size_t num_points = 1 << state.range(0);
const size_t num_nonzero = SPARSE_NUM_NONZERO;

auto polynomial = sparse_random_poly<Fr>(num_points, num_nonzero);

for (auto _ : state) {
key->commit_sparse(polynomial);
}
}

// Commit to a polynomial with dense random nonzero entries
template <typename Curve> void bench_commit_random(::benchmark::State& state)
{
using Fr = typename Curve::ScalarField;
auto key = create_commitment_key<Curve>(MAX_NUM_POINTS);

const size_t num_points = 1 << state.range(0);
auto polynomial = Polynomial<Fr>(num_points);
for (auto& coeff : polynomial) {
coeff = Fr::random_element();
}
for (auto _ : state) {
key->commit(polynomial);
}
}

BENCHMARK(bench_commit<curve::BN254>)->DenseRange(10, MAX_LOG_NUM_POINTS)->Unit(benchmark::kMillisecond);
BENCHMARK(bench_commit_zero<curve::BN254>)->DenseRange(14, MAX_LOG_NUM_POINTS)->Unit(benchmark::kMillisecond);
BENCHMARK(bench_commit_sparse<curve::BN254>)->DenseRange(14, MAX_LOG_NUM_POINTS)->Unit(benchmark::kMillisecond);
BENCHMARK(bench_commit_sparse_preprocessed<curve::BN254>)
->DenseRange(14, MAX_LOG_NUM_POINTS)
->Unit(benchmark::kMillisecond);
BENCHMARK(bench_commit_sparse_random<curve::BN254>)->DenseRange(14, MAX_LOG_NUM_POINTS)->Unit(benchmark::kMillisecond);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NB the weird asymptotics including a sudden huge jump.

BENCHMARK(bench_commit_sparse_random_preprocessed<curve::BN254>)
->DenseRange(14, MAX_LOG_NUM_POINTS)
->Unit(benchmark::kMillisecond);
BENCHMARK(bench_commit_random<curve::BN254>)->DenseRange(14, MAX_LOG_NUM_POINTS)->Unit(benchmark::kMillisecond);

} // namespace bb

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <cstddef>
#include <memory>
#include <ranges>
#include <string_view>

namespace bb {
Expand All @@ -34,6 +35,7 @@ template <class Curve> class CommitmentKey {

using Fr = typename Curve::ScalarField;
using Commitment = typename Curve::AffineElement;
using G1 = typename Curve::AffineElement;

public:
scalar_multiplication::pippenger_runtime_state<Curve> pippenger_runtime_state;
Expand Down Expand Up @@ -81,6 +83,65 @@ template <class Curve> class CommitmentKey {
return scalar_multiplication::pippenger_unsafe<Curve>(
const_cast<Fr*>(polynomial.data()), srs->get_monomial_points(), degree, pippenger_runtime_state);
};

/**
* @brief Efficiently commit to a sparse polynomial
* @details Iterate through the {point, scalar} pairs that define the inputs to the commitment MSM, maintain (copy)
* only those for which the scalar is nonzero, then perform the MSM on the reduced inputs.
* @warning Method makes a copy of all {point, scalar} pairs that comprise the reduced input. Will not be efficient
* in terms of memory or computation for polynomials beyond a certain sparseness threshold.
*
* @param polynomial
* @return Commitment
*/
Commitment commit_sparse(std::span<const Fr> polynomial)
{
// BB_OP_COUNT_TIME();
const size_t degree = polynomial.size();
ASSERT(degree <= srs->get_monomial_size());

// Extract the precomputed point table (contains raw SRS points at even indices and the corresponding
// endomorphism point (\beta*x, -y) at odd indices).
G1* point_table = srs->get_monomial_points();

// Define structures needed to multithread the extraction of non-zero inputs
const size_t num_threads = degree >= get_num_cpus_pow2() ? get_num_cpus_pow2() : 1;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is weird for small degree but we don't care in practice?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

IIUC these shouldn't (aren't) the number of threads, just the number of things you need to iterate. If you look closely at the current implementation of parallel_for, the number of threads are not sth the caller can choose, you can only choose the number of elements to iterate (which is right):

void parallel_for_mutex_pool(size_t num_iterations, const std::function<void(size_t)>& func)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess I'm a little unclear what your suggestion is here. Is your objection about the name num_threads? I guess I'm thinking of it as shorthand for "num threads over which to distribute the work", which is how this value is used if I'm not mistaken. Admittedly though I am being a bit sloppy here in that I don't really need a power-of-2 thread count and my condition for when not to multithread is a bit arbitrary. In practice though the only context that matters is degree ~2^18 and higher so not so important to get the small values right

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I mean is that the value passed to parallel_for is in principle not the number of threads. If you have 16 cores and do parallel_for(200, some_f) then it will be chunked in 200/16 chunks over 16 threads, and f will be called from 0 to 199.

Maybe the way you set it up does make it coincide with the number of threads though. @ludamad would be the best to ask. I'm commenting because I had to use parallel_for recently and noticed this subtlety.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah I see what you mean. In my case the "num_iterations" input will always be <= actual num threads but your point stands. I don't really love that - seems like the parallel_for interface should allow you to specify how it should multithread.

const size_t block_size = degree / num_threads;
std::vector<std::vector<Fr>> thread_scalars(num_threads);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My above comment also means that you might get fewer real threads than the number of elements. IIUC, this means that your division of vectors here is still thread-safe, but less efficient than you might think (i.e., not one "bucket" per thread).

std::vector<std::vector<G1>> thread_points(num_threads);

// Loop over all polynomial coefficients and keep {point, scalar} pairs for which scalar != 0
parallel_for(num_threads, [&](size_t thread_idx) {
const size_t start = thread_idx * block_size;
const size_t end = (thread_idx + 1) * block_size;

for (size_t idx = start; idx < end; ++idx) {

const Fr& scalar = polynomial[idx];

if (!scalar.is_zero()) {
thread_scalars[thread_idx].emplace_back(scalar);
// Save both the raw srs point and the precomputed endomorphism point from the point table
const G1& point = point_table[idx * 2];
const G1& endo_point = point_table[idx * 2 + 1];
thread_points[thread_idx].emplace_back(point);
thread_points[thread_idx].emplace_back(endo_point);
}
}
});

// Reconstruct the full input to the pippenger from the individual threads
std::vector<Fr> scalars;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably worth reserving some space in these just in case / since it's easy.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added

std::vector<G1> points;
for (size_t idx = 0; idx < num_threads; ++idx) {
scalars.insert(scalars.end(), thread_scalars[idx].begin(), thread_scalars[idx].end());
points.insert(points.end(), thread_points[idx].begin(), thread_points[idx].end());
}

// Call the version of pippenger which assumes all points are distinct
return scalar_multiplication::pippenger_unsafe<Curve>(
scalars.data(), points.data(), scalars.size(), pippenger_runtime_state);
}
};

} // namespace bb
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
#include "barretenberg/commitment_schemes/commitment_key.hpp"
#include "barretenberg/polynomials/polynomial.hpp"
#include "barretenberg/srs/factories/file_crs_factory.hpp"

#include <gtest/gtest.h>

namespace bb {

template <typename Curve> class CommitmentKeyTest : public ::testing::Test {
using CK = CommitmentKey<Curve>;

using Fr = typename Curve::ScalarField;
using Commitment = typename Curve::AffineElement;
using Polynomial = bb::Polynomial<Fr>;

public:
template <class CK> inline std::shared_ptr<CK> create_commitment_key(size_t num_points);
};

template <>
template <>
std::shared_ptr<CommitmentKey<curve::BN254>> CommitmentKeyTest<curve::BN254>::create_commitment_key<
CommitmentKey<curve::BN254>>(const size_t num_points)
{
srs::init_crs_factory("../srs_db/ignition");
return std::make_shared<CommitmentKey<curve::BN254>>(num_points);
}

template <>
template <>
std::shared_ptr<CommitmentKey<curve::Grumpkin>> CommitmentKeyTest<curve::Grumpkin>::create_commitment_key<
CommitmentKey<curve::Grumpkin>>(const size_t num_points)
{
srs::init_grumpkin_crs_factory("../srs_db/grumpkin");
return std::make_shared<CommitmentKey<curve::Grumpkin>>(num_points);
}

using Curves = ::testing::Types<curve::BN254, curve::Grumpkin>;

TYPED_TEST_SUITE(CommitmentKeyTest, Curves);

// Check that commit and commit_sparse return the same result for a random sparse polynomial
TYPED_TEST(CommitmentKeyTest, CommitSparse)
{
using Curve = TypeParam;
using CK = CommitmentKey<Curve>;
using G1 = Curve::AffineElement;
using Fr = Curve::ScalarField;
using Polynomial = bb::Polynomial<Fr>;

const size_t num_points = 1 << 12; // large enough to ensure normal pippenger logic is used
const size_t num_nonzero = 7;

// Construct a sparse random polynomial
Polynomial poly{ num_points };
for (size_t i = 0; i < num_nonzero; ++i) {
size_t idx = (i + 1) * (i + 1) % num_points;
poly[idx] = Fr::random_element();
}

// Commit to the polynomial using both the conventional commit method and the sparse commitment method
auto key = TestFixture::template create_commitment_key<CK>(num_points);
G1 commit_result = key->commit(poly);
G1 sparse_commit_result = key->commit_sparse(poly);

EXPECT_EQ(sparse_commit_result, commit_result);
}

} // namespace bb
Loading
Loading