From 6038f7592ce33b8c7dda5ce713fa45494a1a9bc8 Mon Sep 17 00:00:00 2001 From: Rumata888 Date: Wed, 21 Jun 2023 17:29:18 +0000 Subject: [PATCH 1/3] Paralellized folding in gemini --- .../barretenberg/honk/pcs/gemini/gemini.cpp | 409 ++++++++++++++++++ .../barretenberg/honk/pcs/gemini/gemini.hpp | 265 +----------- 2 files changed, 419 insertions(+), 255 deletions(-) create mode 100644 cpp/src/barretenberg/honk/pcs/gemini/gemini.cpp diff --git a/cpp/src/barretenberg/honk/pcs/gemini/gemini.cpp b/cpp/src/barretenberg/honk/pcs/gemini/gemini.cpp new file mode 100644 index 0000000000..6ed2ffe2a6 --- /dev/null +++ b/cpp/src/barretenberg/honk/pcs/gemini/gemini.cpp @@ -0,0 +1,409 @@ + +#include "gemini.hpp" +#include "barretenberg/common/thread.hpp" + +#include +#include +#include + +/** + * @brief Protocol for opening several multi-linear polynomials at the same point. + * + * + * m = number of variables + * n = 2ᵐ + * u = (u₀,...,uₘ₋₁) + * f₀, …, fₖ₋₁ = multilinear polynomials, + * g₀, …, gₕ₋₁ = shifted multilinear polynomial, + * Each gⱼ is the left-shift of some f↺ᵢ, and gⱼ points to the same memory location as fᵢ. + * v₀, …, vₖ₋₁, v↺₀, …, v↺ₕ₋₁ = multilinear evalutions s.t. fⱼ(u) = vⱼ, and gⱼ(u) = f↺ⱼ(u) = v↺ⱼ + * + * We use a challenge ρ to create a random linear combination of all fⱼ, + * and actually define A₀ = F + G↺, where + * F = ∑ⱼ ρʲ fⱼ + * G = ∑ⱼ ρᵏ⁺ʲ gⱼ, + * G↺ = is the shift of G + * where fⱼ is normal, and gⱼ is shifted. + * The evaluations are also batched, and + * v = ∑ ρʲ⋅vⱼ + ∑ ρᵏ⁺ʲ⋅v↺ⱼ = F(u) + G↺(u) + * + * The prover then creates the folded polynomials A₀, ..., Aₘ₋₁, + * and opens them at different points, as univariates. + * + * We open A₀ as univariate at r and -r. + * Since A₀ = F + G↺, but the verifier only has commitments to the gⱼs, + * we need to partially evaluate A₀ at both evaluation points. + * As univariate, we have + * A₀(X) = F(X) + G↺(X) = F(X) + G(X)/X + * So we define + * - A₀₊(X) = F(X) + G(X)/r + * - A₀₋(X) = F(X) − G(X)/r + * So that A₀₊(r) = A₀(r) and A₀₋(-r) = A₀(-r). + * The verifier is able to computed the simulated commitments to A₀₊(X) and A₀₋(X) + * since they are linear-combinations of the commitments [fⱼ] and [gⱼ]. + */ +namespace proof_system::honk::pcs::gemini { + +/** + * @brief Computes d-1 fold polynomials Fold_i, i = 1, ..., d-1 + * + * @param mle_opening_point multilinear opening point 'u' + * @param batched_unshifted F(X) = ∑ⱼ ρʲ fⱼ(X) + * @param batched_to_be_shifted G(X) = ∑ⱼ ρᵏ⁺ʲ gⱼ(X) + * @return std::vector + */ +template +std::vector> MultilinearReductionScheme< + Params>::compute_fold_polynomials(std::span mle_opening_point, + Polynomial&& batched_unshifted, + Polynomial&& batched_to_be_shifted) +{ + + using Fr = typename Params::Fr; + using Polynomial = barretenberg::Polynomial; + + const size_t num_variables = mle_opening_point.size(); // m + + size_t num_threads = get_num_cpus_pow2(); + // Allocate space for m+1 Fold polynomials + // + // The first two are populated here with the batched unshifted and to-be-shifted polynomial respectively. + // They will eventually contain the full batched polynomial A₀ partially evaluated at the challenges r,-r. + // This function populates the other m-1 polynomials with the foldings of A₀. + std::vector fold_polynomials; + fold_polynomials.reserve(num_variables + 1); + + // F(X) = ∑ⱼ ρʲ fⱼ(X) and G(X) = ∑ⱼ ρᵏ⁺ʲ gⱼ(X) + Polynomial& batched_F = fold_polynomials.emplace_back(std::move(batched_unshifted)); + Polynomial& batched_G = fold_polynomials.emplace_back(std::move(batched_to_be_shifted)); + constexpr size_t offset_to_folded = 2; // Offset because of F an G + // A₀(X) = F(X) + G↺(X) = F(X) + G(X)/X. + Polynomial A_0(batched_F); + A_0 += batched_G.shifted(); + + size_t total_size = (1 << num_variables); // 1 more than the sum of sizes of all vectors + constexpr size_t efficient_operations_per_thread = 64; // A guess of the number of operation for which there + // would be a point in sending them to a separate thread + num_threads = std::min(total_size / efficient_operations_per_thread, num_threads); + + if (num_threads == 0) { + num_threads = 1; + } + size_t num_parts = + std::bit_ceil(num_threads); // If we don't have a power of 2 number of threads, we still need to split the + // workload into a power of 2 parts to avoid race conditions + // Allocate everything before parallel computation + for (size_t l = 0; l < num_variables - 1; ++l) { + // size of the previous polynomial/2 + const size_t n_l = 1 << (num_variables - l - 1); + + // A_l_fold = Aₗ₊₁(X) = (1-uₗ)⋅even(Aₗ)(X) + uₗ⋅odd(Aₗ)(X) + fold_polynomials.emplace_back(Polynomial(n_l)); + } + + // Compute folded polynomials in parallel. + parallel_for(num_threads, [&](size_t i) { + // Create the folded polynomials A₁(X),…,Aₘ₋₁(X) + // + // A_l = Aₗ(X) is the polynomial being folded + // in the first iteration, we take the batched polynomial + // in the next iteration, it is the previously folded one + auto A_l = A_0.data(); + + // The loop only goes from the largest vector the the vector of size num_parts, so individual threads can + // work completely independently + for (size_t l = 0; l < num_variables - 1 - (size_t)std::countr_zero(num_parts); ++l) { + const Fr u_l = mle_opening_point[l]; + + // size of the previous polynomial/2 + const size_t n_l = 1 << (num_variables - l - 1); + + // A_l_fold = Aₗ₊₁(X) = (1-uₗ)⋅even(Aₗ)(X) + uₗ⋅odd(Aₗ)(X) + auto A_l_fold = fold_polynomials[l + offset_to_folded].data(); + + // The size of the chunk 1 thread would process ideally + size_t chunk_size = n_l / num_parts; + for (std::ptrdiff_t j = (std::ptrdiff_t)(i * chunk_size); j < (std::ptrdiff_t)((i + 1) * chunk_size); j++) { + // fold(Aₗ)[j] = (1-uₗ)⋅even(Aₗ)[j] + uₗ⋅odd(Aₗ)[j] + // = (1-uₗ)⋅Aₗ[2j] + uₗ⋅Aₗ[2j+1] + // = Aₗ₊₁[j] + A_l_fold[j] = A_l[j << 1] + u_l * (A_l[(j << 1) + 1] - A_l[j << 1]); + } + + // Let's say we have 3 threads and 4 parts. We need some threads to pick up the slack, because we can't + // divide the workload into 3 parts + if (i < ((num_parts - num_threads))) { + for (std::ptrdiff_t j = (std::ptrdiff_t)((i + num_threads) * chunk_size); + j < (std::ptrdiff_t)((i + num_threads + 1) * chunk_size); + j++) { + // fold(Aₗ)[j] = (1-uₗ)⋅even(Aₗ)[j] + uₗ⋅odd(Aₗ)[j] + // = (1-uₗ)⋅Aₗ[2j] + uₗ⋅Aₗ[2j+1] + // = Aₗ₊₁[j] + A_l_fold[j] = A_l[j << 1] + u_l * (A_l[(j << 1) + 1] - A_l[j << 1]); + } + } + // set Aₗ₊₁ = Aₗ for the next iteration + A_l = A_l_fold; + } + }); + // If we used multithreading, we need to process the tail (num_parts, num_parts/2,...,2,1) which can not be + // safely parallellized. + if (num_threads != 1) { + // Get the previous folded polynomial + auto A_l = fold_polynomials[num_variables - (size_t)std::countr_zero(num_parts)].data(); + for (size_t l = num_variables - 1 - (size_t)std::countr_zero(num_parts); l < num_variables - 1; ++l) { + + const Fr u_l = mle_opening_point[l]; + + // size of the previous polynomial/2 + const size_t n_l = 1 << (num_variables - l - 1); + + // A_l_fold = Aₗ₊₁(X) = (1-uₗ)⋅even(Aₗ)(X) + uₗ⋅odd(Aₗ)(X) + auto A_l_fold = fold_polynomials[l + offset_to_folded].data(); + + // fold the previous polynomial with odd and even parts + // Go through elements and compute ζ powers + + for (std::ptrdiff_t j = 0; j < (std::ptrdiff_t)n_l; ++j) { + + A_l_fold[j] = A_l[j << 1] + u_l * (A_l[(j << 1) + 1] - A_l[j << 1]); + } + // set Aₗ₊₁ = Aₗ for the next iteration + A_l = A_l_fold; + } + } + + return fold_polynomials; +}; + +/** + * @brief Computes/aggragates d+1 Fold polynomials and their opening pairs (challenge, evaluation) + * + * @details This function assumes that, upon input, last d-1 entries in fold_polynomials are Fold_i. + * The first two entries are assumed to be, respectively, the batched unshifted and batched to-be-shifted + * polynomials F(X) = ∑ⱼ ρʲfⱼ(X) and G(X) = ∑ⱼ ρᵏ⁺ʲ gⱼ(X). This function completes the computation + * of the first two Fold polynomials as F + G/r and F - G/r. It then evaluates each of the d+1 + * fold polynomials at, respectively, the points r, rₗ = r^{2ˡ} for l = 0, 1, ..., d-1. + * + * @param mle_opening_point u = (u₀,...,uₘ₋₁) is the MLE opening point + * @param fold_polynomials vector of polynomials whose first two elements are F(X) = ∑ⱼ ρʲfⱼ(X) + * and G(X) = ∑ⱼ ρᵏ⁺ʲ gⱼ(X), and the next d-1 elements are Fold_i, i = 1, ..., d-1. + * @param r_challenge univariate opening challenge + */ +template +ProverOutput MultilinearReductionScheme::compute_fold_polynomial_evaluations( + std::span mle_opening_point, std::vector&& fold_polynomials, const Fr& r_challenge) +{ + + using Fr = typename Params::Fr; + using Polynomial = barretenberg::Polynomial; + + const size_t num_variables = mle_opening_point.size(); // m + + Polynomial& batched_F = fold_polynomials[0]; // F(X) = ∑ⱼ ρʲ fⱼ(X) + Polynomial& batched_G = fold_polynomials[1]; // G(X) = ∑ⱼ ρᵏ⁺ʲ gⱼ(X) + + // Compute univariate opening queries rₗ = r^{2ˡ} for l = 0, 1, ..., m-1 + std::vector r_squares = squares_of_r(r_challenge, num_variables); + + // Compute G/r + Fr r_inv = r_challenge.invert(); + batched_G *= r_inv; + + // Construct A₀₊ = F + G/r and A₀₋ = F - G/r in place in fold_polynomials + Polynomial tmp = batched_F; + Polynomial& A_0_pos = fold_polynomials[0]; + + // A₀₊(X) = F(X) + G(X)/r, s.t. A₀₊(r) = A₀(r) + A_0_pos += batched_G; + + // Perform a swap so that tmp = G(X)/r and A_0_neg = F(X) + std::swap(tmp, batched_G); + Polynomial& A_0_neg = fold_polynomials[1]; + + // A₀₋(X) = F(X) - G(X)/r, s.t. A₀₋(-r) = A₀(-r) + A_0_neg -= tmp; + + std::vector> fold_poly_opening_pairs; + fold_poly_opening_pairs.reserve(num_variables + 1); + + // Compute first opening pair {r, A₀(r)} + fold_poly_opening_pairs.emplace_back(OpeningPair{ r_challenge, fold_polynomials[0].evaluate(r_challenge) }); + + // Compute the remaining m opening pairs {−r^{2ˡ}, Aₗ(−r^{2ˡ})}, l = 0, ..., m-1. + for (size_t l = 0; l < num_variables; ++l) { + fold_poly_opening_pairs.emplace_back( + OpeningPair{ -r_squares[l], fold_polynomials[l + 1].evaluate(-r_squares[l]) }); + } + + return { fold_poly_opening_pairs, std::move(fold_polynomials) }; +}; + +/** + * @brief Checks that all MLE evaluations vⱼ contained in the list of m MLE opening claims + * is correct, and returns univariate polynomial opening claims to be checked later + * + * @param mle_opening_point the MLE evaluation point u + * @param batched_evaluation batched evaluation from multivariate evals at the point u + * @param batched_f batched commitment to unshifted polynomials + * @param batched_g batched commitment to to-be-shifted polynomials + * @param proof commitments to the m-1 folded polynomials, and alleged evaluations. + * @param transcript + * @return Fold polynomial opening claims: (r, A₀(r), C₀₊), (-r, A₀(-r), C₀₋), and + * (Cⱼ, Aⱼ(-r^{2ʲ}), -r^{2}), j = [1, ..., m-1] + */ + +template +std::vector> MultilinearReductionScheme::reduce_verify( + std::span mle_opening_point, /* u */ + const Fr batched_evaluation, /* all */ + GroupElement& batched_f, /* unshifted */ + GroupElement& batched_g, /* to-be-shifted */ + VerifierTranscript& transcript) +{ + + using Fr = typename Params::Fr; + using Commitment = typename Params::Commitment; + const size_t num_variables = mle_opening_point.size(); + + // Get polynomials Fold_i, i = 1,...,m-1 from transcript + std::vector commitments; + commitments.reserve(num_variables - 1); + for (size_t i = 0; i < num_variables - 1; ++i) { + auto commitment = transcript.template receive_from_prover("Gemini:FOLD_" + std::to_string(i + 1)); + commitments.emplace_back(commitment); + } + + // compute vector of powers of random evaluation point r + const Fr r = transcript.get_challenge("Gemini:r"); + std::vector r_squares = squares_of_r(r, num_variables); + + // Get evaluations a_i, i = 0,...,m-1 from transcript + std::vector evaluations; + evaluations.reserve(num_variables); + for (size_t i = 0; i < num_variables; ++i) { + auto eval = transcript.template receive_from_prover("Gemini:a_" + std::to_string(i)); + evaluations.emplace_back(eval); + } + + // Compute evaluation A₀(r) + auto a_0_pos = compute_eval_pos(batched_evaluation, mle_opening_point, r_squares, evaluations); + + // C₀_r_pos = ∑ⱼ ρʲ⋅[fⱼ] + r⁻¹⋅∑ⱼ ρᵏ⁺ʲ [gⱼ] + // C₀_r_pos = ∑ⱼ ρʲ⋅[fⱼ] - r⁻¹⋅∑ⱼ ρᵏ⁺ʲ [gⱼ] + auto [c0_r_pos, c0_r_neg] = compute_simulated_commitments(batched_f, batched_g, r); + + std::vector> fold_polynomial_opening_claims; + fold_polynomial_opening_claims.reserve(num_variables + 1); + + // ( [A₀₊], r, A₀(r) ) + fold_polynomial_opening_claims.emplace_back(OpeningClaim{ { r, a_0_pos }, c0_r_pos }); + // ( [A₀₋], -r, A₀(-r) ) + fold_polynomial_opening_claims.emplace_back(OpeningClaim{ { -r, evaluations[0] }, c0_r_neg }); + for (size_t l = 0; l < num_variables - 1; ++l) { + // ([A₀₋], −r^{2ˡ}, Aₗ(−r^{2ˡ}) ) + fold_polynomial_opening_claims.emplace_back( + OpeningClaim{ { -r_squares[l + 1], evaluations[l + 1] }, commitments[l] }); + } + + return fold_polynomial_opening_claims; +}; + +template +std::vector MultilinearReductionScheme::powers_of_rho(const Fr rho, + const size_t num_powers) +{ + + using Fr = typename Params::Fr; + std::vector rhos = { Fr(1), rho }; + rhos.reserve(num_powers); + for (size_t j = 2; j < num_powers; j++) { + rhos.emplace_back(rhos[j - 1] * rho); + } + return rhos; +}; +/** + * @brief Compute the expected evaluation of the univariate commitment to the batched polynomial. + * + * @param batched_mle_eval The evaluation of the folded polynomials + * @param mle_vars MLE opening point u + * @param r_squares squares of r, r², ..., r^{2ᵐ⁻¹} + * @param fold_polynomial_evals series of Aᵢ₋₁(−r^{2ⁱ⁻¹}) + * @return evaluation A₀(r) + */ +template +typename Params::Fr MultilinearReductionScheme::compute_eval_pos(const Fr batched_mle_eval, + std::span mle_vars, + std::span r_squares, + std::span fold_polynomial_evals) +{ + using Fr = typename Params::Fr; + const size_t num_variables = mle_vars.size(); + + const auto& evals = fold_polynomial_evals; + + // Initialize eval_pos with batched MLE eval v = ∑ⱼ ρʲ vⱼ + ∑ⱼ ρᵏ⁺ʲ v↺ⱼ + Fr eval_pos = batched_mle_eval; + for (size_t l = num_variables; l != 0; --l) { + const Fr r = r_squares[l - 1]; // = rₗ₋₁ = r^{2ˡ⁻¹} + const Fr eval_neg = evals[l - 1]; // = Aₗ₋₁(−r^{2ˡ⁻¹}) + const Fr u = mle_vars[l - 1]; // = uₗ₋₁ + + // The folding property ensures that + // Aₗ₋₁(r^{2ˡ⁻¹}) + Aₗ₋₁(−r^{2ˡ⁻¹}) Aₗ₋₁(r^{2ˡ⁻¹}) - Aₗ₋₁(−r^{2ˡ⁻¹}) + // Aₗ(r^{2ˡ}) = (1-uₗ₋₁) ----------------------------- + uₗ₋₁ ----------------------------- + // 2 2r^{2ˡ⁻¹} + // We solve the above equation in Aₗ₋₁(r^{2ˡ⁻¹}), using the previously computed Aₗ(r^{2ˡ}) in eval_pos + // and using Aₗ₋₁(−r^{2ˡ⁻¹}) sent by the prover in the proof. + eval_pos = ((r * eval_pos * 2) - eval_neg * (r * (Fr(1) - u) - u)) / (r * (Fr(1) - u) + u); + } + + return eval_pos; // return A₀(r) +}; + +/** + * @brief Compute squares of folding challenge r + * + * @tparam Params + * @param r + * @param num_squares The number of foldings + * @return std::vector + */ +template +std::vector MultilinearReductionScheme::squares_of_r(const Fr r, const size_t num_squares) +{ + std::vector squares = { r }; + squares.reserve(num_squares); + for (size_t j = 1; j < num_squares; j++) { + squares.emplace_back(squares[j - 1].sqr()); + } + return squares; +}; + +/** + * @brief Computes two commitments to A₀ partially evaluated in r and -r. + * + * @param batched_f batched commitment to non-shifted polynomials + * @param batched_g batched commitment to to-be-shifted polynomials + * @param r evaluation point at which we have partially evaluated A₀ at r and -r. + * @return std::pair c0_r_pos, c0_r_neg + */ +template +std::pair MultilinearReductionScheme< + Params>::compute_simulated_commitments(GroupElement& batched_f, GroupElement& batched_g, Fr r) +{ + // C₀ᵣ₊ = [F] + r⁻¹⋅[G] + GroupElement C0_r_pos = batched_f; + // C₀ᵣ₋ = [F] - r⁻¹⋅[G] + GroupElement C0_r_neg = batched_f; + Fr r_inv = r.invert(); + if (!batched_g.is_point_at_infinity()) { + batched_g *= r_inv; + C0_r_pos += batched_g; + C0_r_neg -= batched_g; + } + return { C0_r_pos, C0_r_neg }; +}; +template class MultilinearReductionScheme; +template class MultilinearReductionScheme; +}; // namespace proof_system::honk::pcs::gemini diff --git a/cpp/src/barretenberg/honk/pcs/gemini/gemini.hpp b/cpp/src/barretenberg/honk/pcs/gemini/gemini.hpp index d3e8112735..4f19c365d8 100644 --- a/cpp/src/barretenberg/honk/pcs/gemini/gemini.hpp +++ b/cpp/src/barretenberg/honk/pcs/gemini/gemini.hpp @@ -1,14 +1,10 @@ #pragma once #include "../claim.hpp" -#include "barretenberg/common/log.hpp" #include "barretenberg/honk/pcs/commitment_key.hpp" #include "barretenberg/polynomials/polynomial.hpp" #include "barretenberg/honk/transcript/transcript.hpp" -#include "barretenberg/common/assert.hpp" -#include -#include #include /** @@ -76,275 +72,34 @@ template class MultilinearReductionScheme { using Polynomial = barretenberg::Polynomial; public: - /** - * @brief Computes d-1 fold polynomials Fold_i, i = 1, ..., d-1 - * - * @param mle_opening_point multilinear opening point 'u' - * @param batched_unshifted F(X) = ∑ⱼ ρʲ fⱼ(X) - * @param batched_to_be_shifted G(X) = ∑ⱼ ρᵏ⁺ʲ gⱼ(X) - * @return std::vector - */ static std::vector compute_fold_polynomials(std::span mle_opening_point, Polynomial&& batched_unshifted, - Polynomial&& batched_to_be_shifted) - { - const size_t num_variables = mle_opening_point.size(); // m + Polynomial&& batched_to_be_shifted); - // Allocate space for m+1 Fold polynomials - // - // The first two are populated here with the batched unshifted and to-be-shifted polynomial respectively. - // They will eventually contain the full batched polynomial A₀ partially evaluated at the challenges r,-r. - // This function populates the other m-1 polynomials with the foldings of A₀. - std::vector fold_polynomials; - fold_polynomials.reserve(num_variables + 1); - - // F(X) = ∑ⱼ ρʲ fⱼ(X) and G(X) = ∑ⱼ ρᵏ⁺ʲ gⱼ(X) - Polynomial& batched_F = fold_polynomials.emplace_back(std::move(batched_unshifted)); - Polynomial& batched_G = fold_polynomials.emplace_back(std::move(batched_to_be_shifted)); - - // A₀(X) = F(X) + G↺(X) = F(X) + G(X)/X. - Polynomial A_0(batched_F); - A_0 += batched_G.shifted(); - - // Create the folded polynomials A₁(X),…,Aₘ₋₁(X) - // - // A_l = Aₗ(X) is the polynomial being folded - // in the first iteration, we take the batched polynomial - // in the next iteration, it is the previously folded one - auto A_l = A_0.data(); - for (size_t l = 0; l < num_variables - 1; ++l) { - const Fr u_l = mle_opening_point[l]; - - // size of the previous polynomial/2 - const size_t n_l = 1 << (num_variables - l - 1); - - // A_l_fold = Aₗ₊₁(X) = (1-uₗ)⋅even(Aₗ)(X) + uₗ⋅odd(Aₗ)(X) - auto A_l_fold = fold_polynomials.emplace_back(Polynomial(n_l)).data(); - - // fold the previous polynomial with odd and even parts - for (std::ptrdiff_t i = 0; i < (std::ptrdiff_t)n_l; ++i) { - // TODO(#219)(Adrian) parallelize - - // fold(Aₗ)[i] = (1-uₗ)⋅even(Aₗ)[i] + uₗ⋅odd(Aₗ)[i] - // = (1-uₗ)⋅Aₗ[2i] + uₗ⋅Aₗ[2i+1] - // = Aₗ₊₁[i] - A_l_fold[i] = A_l[i << 1] + u_l * (A_l[(i << 1) + 1] - A_l[i << 1]); - } - - // set Aₗ₊₁ = Aₗ for the next iteration - A_l = A_l_fold; - } - - return fold_polynomials; - }; - - /** - * @brief Computes/aggragates d+1 Fold polynomials and their opening pairs (challenge, evaluation) - * - * @details This function assumes that, upon input, last d-1 entries in fold_polynomials are Fold_i. - * The first two entries are assumed to be, respectively, the batched unshifted and batched to-be-shifted - * polynomials F(X) = ∑ⱼ ρʲfⱼ(X) and G(X) = ∑ⱼ ρᵏ⁺ʲ gⱼ(X). This function completes the computation - * of the first two Fold polynomials as F + G/r and F - G/r. It then evaluates each of the d+1 - * fold polynomials at, respectively, the points r, rₗ = r^{2ˡ} for l = 0, 1, ..., d-1. - * - * @param mle_opening_point u = (u₀,...,uₘ₋₁) is the MLE opening point - * @param fold_polynomials vector of polynomials whose first two elements are F(X) = ∑ⱼ ρʲfⱼ(X) - * and G(X) = ∑ⱼ ρᵏ⁺ʲ gⱼ(X), and the next d-1 elements are Fold_i, i = 1, ..., d-1. - * @param r_challenge univariate opening challenge - */ static ProverOutput compute_fold_polynomial_evaluations(std::span mle_opening_point, std::vector&& fold_polynomials, - const Fr& r_challenge) - { - const size_t num_variables = mle_opening_point.size(); // m - - Polynomial& batched_F = fold_polynomials[0]; // F(X) = ∑ⱼ ρʲ fⱼ(X) - Polynomial& batched_G = fold_polynomials[1]; // G(X) = ∑ⱼ ρᵏ⁺ʲ gⱼ(X) - - // Compute univariate opening queries rₗ = r^{2ˡ} for l = 0, 1, ..., m-1 - std::vector r_squares = squares_of_r(r_challenge, num_variables); - - // Compute G/r - Fr r_inv = r_challenge.invert(); - batched_G *= r_inv; - - // Construct A₀₊ = F + G/r and A₀₋ = F - G/r in place in fold_polynomials - Polynomial tmp = batched_F; - Polynomial& A_0_pos = fold_polynomials[0]; - - // A₀₊(X) = F(X) + G(X)/r, s.t. A₀₊(r) = A₀(r) - A_0_pos += batched_G; - - // Perform a swap so that tmp = G(X)/r and A_0_neg = F(X) - std::swap(tmp, batched_G); - Polynomial& A_0_neg = fold_polynomials[1]; - - // A₀₋(X) = F(X) - G(X)/r, s.t. A₀₋(-r) = A₀(-r) - A_0_neg -= tmp; + const Fr& r_challenge); - std::vector> fold_poly_opening_pairs; - fold_poly_opening_pairs.reserve(num_variables + 1); - - // Compute first opening pair {r, A₀(r)} - fold_poly_opening_pairs.emplace_back( - OpeningPair{ r_challenge, fold_polynomials[0].evaluate(r_challenge) }); - - // Compute the remaining m opening pairs {−r^{2ˡ}, Aₗ(−r^{2ˡ})}, l = 0, ..., m-1. - for (size_t l = 0; l < num_variables; ++l) { - fold_poly_opening_pairs.emplace_back( - OpeningPair{ -r_squares[l], fold_polynomials[l + 1].evaluate(-r_squares[l]) }); - } - - return { fold_poly_opening_pairs, std::move(fold_polynomials) }; - }; - - /** - * @brief Checks that all MLE evaluations vⱼ contained in the list of m MLE opening claims - * is correct, and returns univariate polynomial opening claims to be checked later - * - * @param mle_opening_point the MLE evaluation point u - * @param batched_evaluation batched evaluation from multivariate evals at the point u - * @param batched_f batched commitment to unshifted polynomials - * @param batched_g batched commitment to to-be-shifted polynomials - * @param proof commitments to the m-1 folded polynomials, and alleged evaluations. - * @param transcript - * @return Fold polynomial opening claims: (r, A₀(r), C₀₊), (-r, A₀(-r), C₀₋), and - * (Cⱼ, Aⱼ(-r^{2ʲ}), -r^{2}), j = [1, ..., m-1] - */ static std::vector> reduce_verify(std::span mle_opening_point, /* u */ const Fr batched_evaluation, /* all */ GroupElement& batched_f, /* unshifted */ GroupElement& batched_g, /* to-be-shifted */ - VerifierTranscript& transcript) - { - const size_t num_variables = mle_opening_point.size(); - - // Get polynomials Fold_i, i = 1,...,m-1 from transcript - std::vector commitments; - commitments.reserve(num_variables - 1); - for (size_t i = 0; i < num_variables - 1; ++i) { - auto commitment = - transcript.template receive_from_prover("Gemini:FOLD_" + std::to_string(i + 1)); - commitments.emplace_back(commitment); - } - - // compute vector of powers of random evaluation point r - const Fr r = transcript.get_challenge("Gemini:r"); - std::vector r_squares = squares_of_r(r, num_variables); - - // Get evaluations a_i, i = 0,...,m-1 from transcript - std::vector evaluations; - evaluations.reserve(num_variables); - for (size_t i = 0; i < num_variables; ++i) { - auto eval = transcript.template receive_from_prover("Gemini:a_" + std::to_string(i)); - evaluations.emplace_back(eval); - } - - // Compute evaluation A₀(r) - auto a_0_pos = compute_eval_pos(batched_evaluation, mle_opening_point, r_squares, evaluations); - - // C₀_r_pos = ∑ⱼ ρʲ⋅[fⱼ] + r⁻¹⋅∑ⱼ ρᵏ⁺ʲ [gⱼ] - // C₀_r_pos = ∑ⱼ ρʲ⋅[fⱼ] - r⁻¹⋅∑ⱼ ρᵏ⁺ʲ [gⱼ] - auto [c0_r_pos, c0_r_neg] = compute_simulated_commitments(batched_f, batched_g, r); + VerifierTranscript& transcript); - std::vector> fold_polynomial_opening_claims; - fold_polynomial_opening_claims.reserve(num_variables + 1); - - // ( [A₀₊], r, A₀(r) ) - fold_polynomial_opening_claims.emplace_back(OpeningClaim{ { r, a_0_pos }, c0_r_pos }); - // ( [A₀₋], -r, A₀(-r) ) - fold_polynomial_opening_claims.emplace_back(OpeningClaim{ { -r, evaluations[0] }, c0_r_neg }); - for (size_t l = 0; l < num_variables - 1; ++l) { - // ([A₀₋], −r^{2ˡ}, Aₗ(−r^{2ˡ}) ) - fold_polynomial_opening_claims.emplace_back( - OpeningClaim{ { -r_squares[l + 1], evaluations[l + 1] }, commitments[l] }); - } - - return fold_polynomial_opening_claims; - }; - - static std::vector powers_of_rho(const Fr rho, const size_t num_powers) - { - std::vector rhos = { Fr(1), rho }; - rhos.reserve(num_powers); - for (size_t j = 2; j < num_powers; j++) { - rhos.emplace_back(rhos[j - 1] * rho); - } - return rhos; - }; + static std::vector powers_of_rho(const Fr rho, const size_t num_powers); private: - /** - * @brief computes the output pair given the transcript. - * This method is common for both prover and verifier. - * - * @param evaluations evaluations of each multivariate - * @param mle_vars MLE opening point u - * @param rhos powers of the initial batching challenge ρ - * @param r_squares squares of r, r², ..., r^{2ᵐ⁻¹} - * @return evaluation A₀(r) - */ static Fr compute_eval_pos(const Fr batched_mle_eval, std::span mle_vars, std::span r_squares, - std::span fold_polynomial_evals) - { - const size_t num_variables = mle_vars.size(); - - const auto& evals = fold_polynomial_evals; + std::span fold_polynomial_evals); - // Initialize eval_pos with batched MLE eval v = ∑ⱼ ρʲ vⱼ + ∑ⱼ ρᵏ⁺ʲ v↺ⱼ - Fr eval_pos = batched_mle_eval; - for (size_t l = num_variables; l != 0; --l) { - const Fr r = r_squares[l - 1]; // = rₗ₋₁ = r^{2ˡ⁻¹} - const Fr eval_neg = evals[l - 1]; // = Aₗ₋₁(−r^{2ˡ⁻¹}) - const Fr u = mle_vars[l - 1]; // = uₗ₋₁ + static std::vector squares_of_r(const Fr r, const size_t num_squares); - // The folding property ensures that - // Aₗ₋₁(r^{2ˡ⁻¹}) + Aₗ₋₁(−r^{2ˡ⁻¹}) Aₗ₋₁(r^{2ˡ⁻¹}) - Aₗ₋₁(−r^{2ˡ⁻¹}) - // Aₗ(r^{2ˡ}) = (1-uₗ₋₁) ----------------------------- + uₗ₋₁ ----------------------------- - // 2 2r^{2ˡ⁻¹} - // We solve the above equation in Aₗ₋₁(r^{2ˡ⁻¹}), using the previously computed Aₗ(r^{2ˡ}) in eval_pos - // and using Aₗ₋₁(−r^{2ˡ⁻¹}) sent by the prover in the proof. - eval_pos = ((r * eval_pos * 2) - eval_neg * (r * (Fr(1) - u) - u)) / (r * (Fr(1) - u) + u); - } - - return eval_pos; // return A₀(r) - }; - - static std::vector squares_of_r(const Fr r, const size_t num_squares) - { - std::vector squares = { r }; - squares.reserve(num_squares); - for (size_t j = 1; j < num_squares; j++) { - squares.emplace_back(squares[j - 1].sqr()); - } - return squares; - }; - - /** - * @brief Computes two commitments to A₀ partially evaluated in r and -r. - * - * @param batched_f batched commitment to non-shifted polynomials - * @param batched_g batched commitment to to-be-shifted polynomials - * @param r evaluation point at which we have partially evaluated A₀ at r and -r. - * @return std::pair c0_r_pos, c0_r_neg - */ static std::pair compute_simulated_commitments(GroupElement& batched_f, GroupElement& batched_g, - Fr r) - { - // C₀ᵣ₊ = [F] + r⁻¹⋅[G] - GroupElement C0_r_pos = batched_f; - // C₀ᵣ₋ = [F] - r⁻¹⋅[G] - GroupElement C0_r_neg = batched_f; - Fr r_inv = r.invert(); - if (!batched_g.is_point_at_infinity()) { - batched_g *= r_inv; - C0_r_pos += batched_g; - C0_r_neg -= batched_g; - } - return { C0_r_pos, C0_r_neg }; - }; -}; + Fr r); +}; // namespace proof_system::honk::pcs::gemini +extern template class MultilinearReductionScheme; +extern template class MultilinearReductionScheme; } // namespace proof_system::honk::pcs::gemini From fb995450eb06b3da7de1eae78b14b7104cfc5768 Mon Sep 17 00:00:00 2001 From: Rumata888 Date: Wed, 21 Jun 2023 17:32:54 +0000 Subject: [PATCH 2/3] Typo --- cpp/src/barretenberg/honk/pcs/gemini/gemini.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/barretenberg/honk/pcs/gemini/gemini.cpp b/cpp/src/barretenberg/honk/pcs/gemini/gemini.cpp index 6ed2ffe2a6..1af3643a7e 100644 --- a/cpp/src/barretenberg/honk/pcs/gemini/gemini.cpp +++ b/cpp/src/barretenberg/honk/pcs/gemini/gemini.cpp @@ -110,7 +110,7 @@ std::vector> MultilinearR // in the next iteration, it is the previously folded one auto A_l = A_0.data(); - // The loop only goes from the largest vector the the vector of size num_parts, so individual threads can + // The loop only goes from the largest vector to the vector of size num_parts, so individual threads can // work completely independently for (size_t l = 0; l < num_variables - 1 - (size_t)std::countr_zero(num_parts); ++l) { const Fr u_l = mle_opening_point[l]; From 16719c5e7d4e4fb1096cfee687871e044b553e6f Mon Sep 17 00:00:00 2001 From: Rumata888 Date: Thu, 22 Jun 2023 13:03:18 +0000 Subject: [PATCH 3/3] Go back to simple parallelization --- .../barretenberg/honk/pcs/gemini/gemini.cpp | 108 ++++++------------ 1 file changed, 32 insertions(+), 76 deletions(-) diff --git a/cpp/src/barretenberg/honk/pcs/gemini/gemini.cpp b/cpp/src/barretenberg/honk/pcs/gemini/gemini.cpp index 1af3643a7e..27135537ff 100644 --- a/cpp/src/barretenberg/honk/pcs/gemini/gemini.cpp +++ b/cpp/src/barretenberg/honk/pcs/gemini/gemini.cpp @@ -64,7 +64,10 @@ std::vector> MultilinearR const size_t num_variables = mle_opening_point.size(); // m - size_t num_threads = get_num_cpus_pow2(); + const size_t num_threads = get_num_cpus_pow2(); + constexpr size_t efficient_operations_per_thread = 64; // A guess of the number of operation for which there + // would be a point in sending them to a separate thread + // Allocate space for m+1 Fold polynomials // // The first two are populated here with the batched unshifted and to-be-shifted polynomial respectively. @@ -81,17 +84,6 @@ std::vector> MultilinearR Polynomial A_0(batched_F); A_0 += batched_G.shifted(); - size_t total_size = (1 << num_variables); // 1 more than the sum of sizes of all vectors - constexpr size_t efficient_operations_per_thread = 64; // A guess of the number of operation for which there - // would be a point in sending them to a separate thread - num_threads = std::min(total_size / efficient_operations_per_thread, num_threads); - - if (num_threads == 0) { - num_threads = 1; - } - size_t num_parts = - std::bit_ceil(num_threads); // If we don't have a power of 2 number of threads, we still need to split the - // workload into a power of 2 parts to avoid race conditions // Allocate everything before parallel computation for (size_t l = 0; l < num_variables - 1; ++l) { // size of the previous polynomial/2 @@ -101,76 +93,40 @@ std::vector> MultilinearR fold_polynomials.emplace_back(Polynomial(n_l)); } - // Compute folded polynomials in parallel. - parallel_for(num_threads, [&](size_t i) { - // Create the folded polynomials A₁(X),…,Aₘ₋₁(X) - // - // A_l = Aₗ(X) is the polynomial being folded - // in the first iteration, we take the batched polynomial - // in the next iteration, it is the previously folded one - auto A_l = A_0.data(); - - // The loop only goes from the largest vector to the vector of size num_parts, so individual threads can - // work completely independently - for (size_t l = 0; l < num_variables - 1 - (size_t)std::countr_zero(num_parts); ++l) { - const Fr u_l = mle_opening_point[l]; - - // size of the previous polynomial/2 - const size_t n_l = 1 << (num_variables - l - 1); - - // A_l_fold = Aₗ₊₁(X) = (1-uₗ)⋅even(Aₗ)(X) + uₗ⋅odd(Aₗ)(X) - auto A_l_fold = fold_polynomials[l + offset_to_folded].data(); - - // The size of the chunk 1 thread would process ideally - size_t chunk_size = n_l / num_parts; - for (std::ptrdiff_t j = (std::ptrdiff_t)(i * chunk_size); j < (std::ptrdiff_t)((i + 1) * chunk_size); j++) { - // fold(Aₗ)[j] = (1-uₗ)⋅even(Aₗ)[j] + uₗ⋅odd(Aₗ)[j] - // = (1-uₗ)⋅Aₗ[2j] + uₗ⋅Aₗ[2j+1] - // = Aₗ₊₁[j] - A_l_fold[j] = A_l[j << 1] + u_l * (A_l[(j << 1) + 1] - A_l[j << 1]); - } - - // Let's say we have 3 threads and 4 parts. We need some threads to pick up the slack, because we can't - // divide the workload into 3 parts - if (i < ((num_parts - num_threads))) { - for (std::ptrdiff_t j = (std::ptrdiff_t)((i + num_threads) * chunk_size); - j < (std::ptrdiff_t)((i + num_threads + 1) * chunk_size); - j++) { - // fold(Aₗ)[j] = (1-uₗ)⋅even(Aₗ)[j] + uₗ⋅odd(Aₗ)[j] - // = (1-uₗ)⋅Aₗ[2j] + uₗ⋅Aₗ[2j+1] - // = Aₗ₊₁[j] - A_l_fold[j] = A_l[j << 1] + u_l * (A_l[(j << 1) + 1] - A_l[j << 1]); - } - } - // set Aₗ₊₁ = Aₗ for the next iteration - A_l = A_l_fold; - } - }); - // If we used multithreading, we need to process the tail (num_parts, num_parts/2,...,2,1) which can not be - // safely parallellized. - if (num_threads != 1) { - // Get the previous folded polynomial - auto A_l = fold_polynomials[num_variables - (size_t)std::countr_zero(num_parts)].data(); - for (size_t l = num_variables - 1 - (size_t)std::countr_zero(num_parts); l < num_variables - 1; ++l) { - - const Fr u_l = mle_opening_point[l]; - - // size of the previous polynomial/2 - const size_t n_l = 1 << (num_variables - l - 1); + // A_l = Aₗ(X) is the polynomial being folded + // in the first iteration, we take the batched polynomial + // in the next iteration, it is the previously folded one + auto A_l = A_0.data(); + for (size_t l = 0; l < num_variables - 1; ++l) { + // size of the previous polynomial/2 + const size_t n_l = 1 << (num_variables - l - 1); - // A_l_fold = Aₗ₊₁(X) = (1-uₗ)⋅even(Aₗ)(X) + uₗ⋅odd(Aₗ)(X) - auto A_l_fold = fold_polynomials[l + offset_to_folded].data(); + // Use as many threads as it is useful so that 1 thread doesn't process 1 element, but make sure that there is + // at least 1 + size_t num_used_threads = std::min(n_l / efficient_operations_per_thread, num_threads); + num_used_threads = num_used_threads ? num_used_threads : 1; + size_t chunk_size = n_l / num_used_threads; + size_t last_chunk_size = (n_l % chunk_size) ? (n_l % num_used_threads) : chunk_size; - // fold the previous polynomial with odd and even parts - // Go through elements and compute ζ powers + // Openning point is the same for all + const Fr u_l = mle_opening_point[l]; - for (std::ptrdiff_t j = 0; j < (std::ptrdiff_t)n_l; ++j) { + // A_l_fold = Aₗ₊₁(X) = (1-uₗ)⋅even(Aₗ)(X) + uₗ⋅odd(Aₗ)(X) + auto A_l_fold = fold_polynomials[l + offset_to_folded].data(); + parallel_for(num_used_threads, [&](size_t i) { + size_t current_chunk_size = (i == (num_used_threads - 1)) ? last_chunk_size : chunk_size; + for (std::ptrdiff_t j = (std::ptrdiff_t)(i * chunk_size); + j < (std::ptrdiff_t)((i * chunk_size) + current_chunk_size); + j++) { + // fold(Aₗ)[j] = (1-uₗ)⋅even(Aₗ)[j] + uₗ⋅odd(Aₗ)[j] + // = (1-uₗ)⋅Aₗ[2j] + uₗ⋅Aₗ[2j+1] + // = Aₗ₊₁[j] A_l_fold[j] = A_l[j << 1] + u_l * (A_l[(j << 1) + 1] - A_l[j << 1]); } - // set Aₗ₊₁ = Aₗ for the next iteration - A_l = A_l_fold; - } + }); + // set Aₗ₊₁ = Aₗ for the next iteration + A_l = A_l_fold; } return fold_polynomials;