From def75140dd800b4bd8967352ccfa76c42f7e2272 Mon Sep 17 00:00:00 2001 From: Cody Gunton Date: Thu, 11 Jan 2024 11:12:44 -0500 Subject: [PATCH] feat: Relations vs widgets benchmarking (#3931) Add code to measure Plonk widget execution vs Honk relation execution. Outputs below. ## Summary of results Here are values of widget time/relation time: ``` Arithmetic 0.8342541436464088 GenPermSort 1.0814663951120163 Elliptic 0.8155940594059405 Auxiliary 0.7053435114503817 Plookup 0.7008310249307479 Permutation 1.192233009708738 ``` ## Benchmark outputs ``` % ./bin/relations_bench 17s ~/barretenberg-cpp/build cg/relation-check-bench + mainframe 2024-01-10T19:27:00+00:00 Running ./bin/relations_bench Run on (128 X 2649.99 MHz CPU s) CPU Caches: L1 Data 32 KiB (x64) L1 Instruction 32 KiB (x64) L2 Unified 512 KiB (x64) L3 Unified 32768 KiB (x8) Load Average: 12.24, 23.55, 50.13 -------------------------------------------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations -------------------------------------------------------------------------------------------------------------------------------------------- execute_relation> 543 ns 543 ns 1289230 execute_relation> 491 ns 491 ns 1423778 execute_relation> 808 ns 808 ns 866448 execute_relation> 1965 ns 1965 ns 357614 execute_relation> 722 ns 722 ns 969890 execute_relation> 515 ns 515 ns 1358778 ``` ``` % taskset -c 0 ./bin/widget_bench 1m 57s ~/barretenberg-cpp/build cg/relation-check-bench + mainframe 2024-01-10T19:15:11+00:00 Running ./bin/widget_bench Run on (128 X 2649.99 MHz CPU s) CPU Caches: L1 Data 32 KiB (x64) L1 Instruction 32 KiB (x64) L2 Unified 512 KiB (x64) L3 Unified 32768 KiB (x8) Load Average: 25.77, 74.54, 79.34 ---------------------------------------------------------------------------------------------------------------------------------------- Benchmark Time CPU Iterations ---------------------------------------------------------------------------------------------------------------------------------------- quotient_contribution>/iterations:1/manual_time 453 ns 186172 ns 1 quotient_contribution>/iterations:1/manual_time 531 ns 165501 ns 1 quotient_contribution>/iterations:1/manual_time 659 ns 174561 ns 1 quotient_contribution>/iterations:1/manual_time 1386 ns 206071 ns 1 quotient_contribution>/iterations:1/manual_time 506 ns 188280 ns 1 quotient_contribution>/iterations:1/manual_time 614 ns 167110 ns 1 ``` --- .../relations_bench/relations.bench.cpp | 136 +++--------------- .../ultra_bench/ultra_honk_rounds.bench.cpp | 12 +- .../ultra_bench/ultra_plonk_rounds.bench.cpp | 12 +- .../benchmark/widgets_bench/widget.bench.cpp | 83 ++++++++--- 4 files changed, 104 insertions(+), 139 deletions(-) diff --git a/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp index 616e85957be..e922e3b3209 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/relations_bench/relations.bench.cpp @@ -33,119 +33,27 @@ template void execute_relation(::benchmark: Relation::accumulate(accumulator, new_value, params, 1); } } - -void ultra_auxiliary_relation(::benchmark::State& state) noexcept -{ - execute_relation>(state); -} -BENCHMARK(ultra_auxiliary_relation); - -void ultra_elliptic_relation(::benchmark::State& state) noexcept -{ - execute_relation>(state); -} -BENCHMARK(ultra_elliptic_relation); - -void ultra_ecc_op_queue_relation(::benchmark::State& state) noexcept -{ - execute_relation>(state); -} -BENCHMARK(ultra_ecc_op_queue_relation); - -void ultra_gen_perm_sort_relation(::benchmark::State& state) noexcept -{ - execute_relation>(state); -} -BENCHMARK(ultra_gen_perm_sort_relation); - -void ultralookup_relation(::benchmark::State& state) noexcept -{ - execute_relation>(state); -} -BENCHMARK(ultralookup_relation); - -void ultra_permutation_relation(::benchmark::State& state) noexcept -{ - execute_relation>(state); -} -BENCHMARK(ultra_permutation_relation); - -void ultra_arithmetic_relation(::benchmark::State& state) noexcept -{ - execute_relation>(state); -} -BENCHMARK(ultra_arithmetic_relation); - -void translator_decomposition_relation(::benchmark::State& state) noexcept -{ - execute_relation>(state); -} -BENCHMARK(translator_decomposition_relation); - -void translator_opcode_constraint_relation(::benchmark::State& state) noexcept -{ - execute_relation>(state); -} -BENCHMARK(translator_opcode_constraint_relation); - -void translator_accumulator_transfer_relation(::benchmark::State& state) noexcept -{ - execute_relation>(state); -} -BENCHMARK(translator_accumulator_transfer_relation); - -void translator_gen_perm_sort_relation(::benchmark::State& state) noexcept -{ - execute_relation>(state); -} -BENCHMARK(translator_gen_perm_sort_relation); - -void translator_non_native_field_relation(::benchmark::State& state) noexcept -{ - execute_relation>(state); -} -BENCHMARK(translator_non_native_field_relation); - -void translator_permutation_relation(::benchmark::State& state) noexcept -{ - execute_relation>(state); -} -BENCHMARK(translator_permutation_relation); - -void eccvm_lookup_relation(::benchmark::State& state) noexcept -{ - execute_relation>(state); -} -BENCHMARK(eccvm_lookup_relation); - -void eccvm_msm_relation(::benchmark::State& state) noexcept -{ - execute_relation>(state); -} -BENCHMARK(eccvm_msm_relation); - -void eccvm_point_table_relation(::benchmark::State& state) noexcept -{ - execute_relation>(state); -} -BENCHMARK(eccvm_point_table_relation); - -void eccvm_set_relation(::benchmark::State& state) noexcept -{ - execute_relation>(state); -} -BENCHMARK(eccvm_set_relation); - -void eccvm_transcript_relation(::benchmark::State& state) noexcept -{ - execute_relation>(state); -} -BENCHMARK(eccvm_transcript_relation); - -void eccvm_wnaf_relation(::benchmark::State& state) noexcept -{ - execute_relation>(state); -} -BENCHMARK(eccvm_wnaf_relation); +BENCHMARK(execute_relation>); +BENCHMARK(execute_relation>); +BENCHMARK(execute_relation>); +BENCHMARK(execute_relation>); +BENCHMARK(execute_relation>); +BENCHMARK(execute_relation>); + +BENCHMARK(execute_relation>); + +BENCHMARK(execute_relation>); +BENCHMARK(execute_relation>); +BENCHMARK(execute_relation>); +BENCHMARK(execute_relation>); +BENCHMARK(execute_relation>); +BENCHMARK(execute_relation>); + +BENCHMARK(execute_relation>); +BENCHMARK(execute_relation>); +BENCHMARK(execute_relation>); +BENCHMARK(execute_relation>); +BENCHMARK(execute_relation>); +BENCHMARK(execute_relation>); } // namespace proof_system::benchmark::relations diff --git a/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/ultra_honk_rounds.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/ultra_honk_rounds.bench.cpp index 2552a8bc406..643aa216521 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/ultra_honk_rounds.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/ultra_honk_rounds.bench.cpp @@ -9,7 +9,15 @@ using namespace benchmark; using namespace proof_system; // The rounds to measure -enum { PREAMBLE, WIRE_COMMITMENTS, SORTED_LIST_ACCUMULATOR, GRAND_PRODUCT_COMPUTATION, RELATION_CHECK, ZEROMORPH }; +enum { + PREAMBLE, + WIRE_COMMITMENTS, + SORTED_LIST_ACCUMULATOR, + LOG_DERIVATIVE_INVERSE, + GRAND_PRODUCT_COMPUTATION, + RELATION_CHECK, + ZEROMORPH +}; /** * @details Benchmark ultrahonk by performing all the rounds, but only measuring one. @@ -34,6 +42,7 @@ BBERG_PROFILE static void test_round_inner(State& state, honk::UltraProver& prov time_if_index(PREAMBLE, [&] { prover.execute_preamble_round(); }); time_if_index(WIRE_COMMITMENTS, [&] { prover.execute_wire_commitments_round(); }); time_if_index(SORTED_LIST_ACCUMULATOR, [&] { prover.execute_sorted_list_accumulator_round(); }); + time_if_index(LOG_DERIVATIVE_INVERSE, [&] { prover.execute_log_derivative_inverse_round(); }); time_if_index(GRAND_PRODUCT_COMPUTATION, [&] { prover.execute_grand_product_computation_round(); }); time_if_index(RELATION_CHECK, [&] { prover.execute_relation_check_rounds(); }); time_if_index(ZEROMORPH, [&] { prover.execute_zeromorph_rounds(); }); @@ -65,6 +74,7 @@ BBERG_PROFILE static void test_round(State& state, size_t index) noexcept ROUND_BENCHMARK(PREAMBLE)->Iterations(1); ROUND_BENCHMARK(WIRE_COMMITMENTS)->Iterations(1); ROUND_BENCHMARK(SORTED_LIST_ACCUMULATOR)->Iterations(1); +ROUND_BENCHMARK(LOG_DERIVATIVE_INVERSE)->Iterations(1); ROUND_BENCHMARK(GRAND_PRODUCT_COMPUTATION)->Iterations(1); ROUND_BENCHMARK(RELATION_CHECK); ROUND_BENCHMARK(ZEROMORPH); diff --git a/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/ultra_plonk_rounds.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/ultra_plonk_rounds.bench.cpp index 63a70c7625c..1c15df07798 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/ultra_plonk_rounds.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/ultra_bench/ultra_plonk_rounds.bench.cpp @@ -71,9 +71,9 @@ BBERG_PROFILE static void test_round(State& state, size_t index) noexcept // Fast rounds take a long time to benchmark because of how we compute statistical significance. // Limit to one iteration so we don't spend a lot of time redoing full proofs just to measure this part. ROUND_BENCHMARK(PREAMBLE)->Iterations(1); -ROUND_BENCHMARK(FIRST_WIRE_COMMITMENTS); -ROUND_BENCHMARK(SECOND_FIAT_SHAMIR_ETA); -ROUND_BENCHMARK(THIRD_FIAT_SHAMIR_BETA_GAMMA); -ROUND_BENCHMARK(FOURTH_FIAT_SHAMIR_ALPHA_AND_COMMIT); -ROUND_BENCHMARK(FIFTH_COMPUTE_QUOTIENT_EVALUTION); -ROUND_BENCHMARK(SIXTH_BATCH_OPEN); +ROUND_BENCHMARK(FIRST_WIRE_COMMITMENTS)->Iterations(1); +ROUND_BENCHMARK(SECOND_FIAT_SHAMIR_ETA)->Iterations(1); +ROUND_BENCHMARK(THIRD_FIAT_SHAMIR_BETA_GAMMA)->Iterations(1); +ROUND_BENCHMARK(FOURTH_FIAT_SHAMIR_ALPHA_AND_COMMIT)->Iterations(1); +ROUND_BENCHMARK(FIFTH_COMPUTE_QUOTIENT_EVALUTION)->Iterations(1); +ROUND_BENCHMARK(SIXTH_BATCH_OPEN)->Iterations(1); diff --git a/barretenberg/cpp/src/barretenberg/benchmark/widgets_bench/widget.bench.cpp b/barretenberg/cpp/src/barretenberg/benchmark/widgets_bench/widget.bench.cpp index 59cd739fce4..289679d0354 100644 --- a/barretenberg/cpp/src/barretenberg/benchmark/widgets_bench/widget.bench.cpp +++ b/barretenberg/cpp/src/barretenberg/benchmark/widgets_bench/widget.bench.cpp @@ -3,8 +3,20 @@ #include "barretenberg/flavor/ultra.hpp" #include "barretenberg/plonk/composer/standard_composer.hpp" #include "barretenberg/plonk/composer/ultra_composer.hpp" +#include "barretenberg/plonk/proof_system/widgets/random_widgets/permutation_widget.hpp" +#include "barretenberg/plonk/proof_system/widgets/random_widgets/plookup_widget.hpp" +#include "barretenberg/plonk/proof_system/widgets/transition_widgets/elliptic_widget.hpp" +#include "barretenberg/plonk/proof_system/widgets/transition_widgets/genperm_sort_widget.hpp" +#include "barretenberg/plonk/proof_system/widgets/transition_widgets/plookup_arithmetic_widget.hpp" #include "barretenberg/plonk/proof_system/widgets/transition_widgets/plookup_auxiliary_widget.hpp" -#include + +// The widgets are implemented in a non-uniform way where the transition widgets provide a per-row execution function +// `accumulate_contribution` while the random widgets do not. Defining this preprocessor variable allows to derive a +// per-row exeuction cost that is suitable for comparing against the cost of executing the Honk relations. For +// validation, we also directly benchmark the available `accumulate_contribution` functions. +// +// NOTE: this code is to be run singly threaded via taskset, e.g. taskset -c 0 +// #define GET_PER_ROW_TIME namespace { auto& engine = numeric::random::get_debug_engine(); @@ -12,6 +24,11 @@ auto& engine = numeric::random::get_debug_engine(); namespace proof_system::plonk { +#ifdef GET_PER_ROW_TIME +constexpr size_t LARGE_DOMAIN_SIZE = 4; +constexpr size_t WIDGET_BENCH_TEST_CIRCUIT_SIZE = 1 << 16; +#endif + struct BasicPlonkKeyAndTranscript { std::shared_ptr key; transcript::StandardTranscript transcript; @@ -22,8 +39,13 @@ BasicPlonkKeyAndTranscript get_plonk_key_and_transcript() barretenberg::srs::init_crs_factory("../srs_db/ignition"); auto inner_composer = plonk::UltraComposer(); auto builder = typename plonk::UltraComposer::CircuitBuilder(); - bench_utils::generate_basic_arithmetic_circuit(builder, 80); + bench_utils::generate_basic_arithmetic_circuit(builder, 16); UltraProver inner_prover = inner_composer.create_prover(builder); +#ifdef GET_PER_ROW_TIME + if (!(inner_prover.key->circuit_size == WIDGET_BENCH_TEST_CIRCUIT_SIZE)) { + throw_or_abort("Circit size changed; update value for accurate benchmarks"); + } +#endif inner_prover.construct_proof(); return { inner_composer.circuit_proving_key, inner_prover.transcript }; } @@ -36,34 +58,59 @@ template void execute_widget(::benchmark::Sta widget.compute_quotient_contribution(barretenberg::fr::random_element(), data.transcript); } } -void plookup_auxiliary_kernel(::benchmark::State& state) noexcept + +template void quotient_contribution(::benchmark::State& state) noexcept { BasicPlonkKeyAndTranscript data = get_plonk_key_and_transcript(); + Widget widget(data.key.get()); + for (auto _ : state) { +#ifdef GET_PER_ROW_TIME + auto start = std::chrono::high_resolution_clock::now(); +#endif + widget.compute_quotient_contribution(barretenberg::fr::random_element(), data.transcript); +#ifdef GET_PER_ROW_TIME + auto end = std::chrono::high_resolution_clock::now(); + auto elapsed_seconds = std::chrono::duration_cast>(end - start); + state.SetIterationTime(elapsed_seconds.count() / (LARGE_DOMAIN_SIZE * WIDGET_BENCH_TEST_CIRCUIT_SIZE)); +#endif + } +} - using FFTGetter = ProverPlookupAuxiliaryWidget::FFTGetter; - using FFTKernel = ProverPlookupAuxiliaryWidget::FFTKernel; +#ifdef GET_PER_ROW_TIME +BENCHMARK(quotient_contribution>)->Iterations(1)->UseManualTime(); +BENCHMARK(quotient_contribution>)->Iterations(1)->UseManualTime(); +BENCHMARK(quotient_contribution>)->Iterations(1)->UseManualTime(); +BENCHMARK(quotient_contribution>)->Iterations(1)->UseManualTime(); +BENCHMARK(quotient_contribution>)->Iterations(1)->UseManualTime(); +BENCHMARK(quotient_contribution>)->Iterations(1)->UseManualTime(); +#else +BENCHMARK(quotient_contribution>)->Iterations(1); +BENCHMARK(quotient_contribution>)->Iterations(1); +BENCHMARK(quotient_contribution>)->Iterations(1); +BENCHMARK(quotient_contribution>)->Iterations(1); +BENCHMARK(quotient_contribution>)->Iterations(1); +BENCHMARK(quotient_contribution>)->Iterations(1); +#endif + +template void accumulate_contribution(::benchmark::State& state) noexcept +{ + BasicPlonkKeyAndTranscript data = get_plonk_key_and_transcript(); + + using FFTGetter = typename Widget::FFTGetter; + using FFTKernel = typename Widget::FFTKernel; auto polynomials = FFTGetter::get_polynomials(data.key.get(), FFTKernel::get_required_polynomial_ids()); auto challenges = FFTGetter::get_challenges( data.transcript, barretenberg::fr::random_element(), FFTKernel::quotient_required_challenges); for (auto _ : state) { - // NOTE: this simply calls the following 3 functions it does NOT try to replicate ProverPlookupAuxiliaryWidget - // logic exactly barretenberg::fr result{ 0 }; FFTKernel::accumulate_contribution(polynomials, challenges, result, 0); } } -BENCHMARK(plookup_auxiliary_kernel); - -void plookup_auxiliary_widget(::benchmark::State& state) noexcept -{ - BasicPlonkKeyAndTranscript data = get_plonk_key_and_transcript(); - ProverPlookupAuxiliaryWidget widget(data.key.get()); - for (auto _ : state) { - widget.compute_quotient_contribution(barretenberg::fr::random_element(), data.transcript); - } -} -BENCHMARK(plookup_auxiliary_widget); +BENCHMARK(accumulate_contribution>); +BENCHMARK(accumulate_contribution>); +BENCHMARK(accumulate_contribution>); +BENCHMARK(accumulate_contribution>); } // namespace proof_system::plonk