-
Notifications
You must be signed in to change notification settings - Fork 293
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Analyze % of time spent on field arithmetic (#4501)
## Benchmark GoblinFull/1 ``` ------------------------------------------------------------------ Benchmark Time CPU Iterations ------------------------------------------------------------------- GoblinBench/GoblinFull/1 69409 ms 12395 ms 1 aggregating 517841027 counts of asm_add_with_coarse_reduction at time 2.2429419969079083 ns. aggregating 2385088 counts of asm_conditional_negate at time 1.4026850702603462 ns. aggregating 268246459 counts of asm_mul_with_coarse_reduction at time 16.81106049038619 ns. aggregating 972751082 counts of asm_self_add_with_coarse_reduction at time 2.276063249739074 ns. aggregating 1740115104 counts of asm_self_mul_with_coarse_reduction at time 18.54383332441474 ns. aggregating 177404712 counts of asm_self_reduce_once at time 2.2564670875677177 ns. aggregating 294979853 counts of asm_self_sqr_with_coarse_reduction at time 19.935956005726094 ns. aggregating 605409741 counts of asm_self_sub_with_coarse_reduction at time 2.530065801186028 ns. aggregating 189416246 counts of asm_sqr_with_coarse_reduction at time 15.879287202925925 ns. Time spent on field ops: 50.977s. ``` $50.977/69.409 = 0.734$ ## Benchmark GoblinFull/6 (the "medium-complexity transaction case") ``` ------------------------------------------------------------------- Benchmark Time CPU Iterations ------------------------------------------------------------------- GoblinBench/GoblinFull/6 221121 ms 37562 ms 1 aggregating 1673577237 counts of asm_add_with_coarse_reduction at time 2.2429419969079083 ns. aggregating 12834880 counts of asm_conditional_negate at time 1.4026850702603462 ns. aggregating 796532808 counts of asm_mul_with_coarse_reduction at time 16.81106049038619 ns. aggregating 3011401875 counts of asm_self_add_with_coarse_reduction at time 2.276063249739074 ns. aggregating 5865206849 counts of asm_self_mul_with_coarse_reduction at time 18.54383332441474 ns. aggregating 565422726 counts of asm_self_reduce_once at time 2.2564670875677177 ns. aggregating 1428616720 counts of asm_self_sqr_with_coarse_reduction at time 19.935956005726094 ns. aggregating 2034002527 counts of asm_self_sub_with_coarse_reduction at time 2.530065801186028 ns. aggregating 659886656 counts of asm_sqr_with_coarse_reduction at time 15.879287202925925 ns. Time spent on field ops: 178.161s. ``` $178.161/221.121 = 0.806$ --------- Co-authored-by: ludamad <adam.domurad@gmail.com> Co-authored-by: ludamad <adam@aztecprotocol.com>
- Loading branch information
1 parent
5285010
commit 5ddfa16
Showing
8 changed files
with
367 additions
and
30 deletions.
There are no files selected for viewing
40 changes: 40 additions & 0 deletions
40
barretenberg/cpp/scripts/benchmark_field_ops_percentage.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
#!/usr/bin/env bash | ||
set -eu | ||
|
||
TARGET=${1:-goblin_bench} | ||
FILTER=${2:-./"GoblinFull/1$"} | ||
COMMAND=${2:-./$TARGET} | ||
|
||
BUILD_OP_COUNT_TRACK_DIR=build\-op\-count-track | ||
|
||
# Move above script dir. | ||
cd $(dirname $0)/.. | ||
|
||
# Measure the benchmarks with ops counting | ||
cmake --preset op-count-track | ||
cmake --build --preset op-count-track --target $TARGET | ||
# This can be run multithreaded | ||
cd $BUILD_OP_COUNT_TRACK_DIR | ||
./bin/$TARGET --benchmark_filter=$FILTER\ | ||
--benchmark_out=$TARGET.json\ | ||
--benchmark_out_format=json\ | ||
--benchmark_counters_tabular=true\ | ||
|
||
# If needed, benchmark the basic Fr operations | ||
FIELD_OP_COSTS=field_op_costs.json | ||
if [ ! -f $FIELD_OP_COSTS ]; then | ||
cd ../ | ||
FIELD_OPS_TARGET=fr_straight_bench | ||
cmake --preset clang16 | ||
cmake --build --preset clang16 --target $FIELD_OPS_TARGET | ||
cd build | ||
./bin/$FIELD_OPS_TARGET --benchmark_out=../$BUILD_OP_COUNT_TRACK_DIR/$FIELD_OP_COSTS \ | ||
--benchmark_out_format=json | ||
fi | ||
|
||
# Compute the singly-threaded benchmarks for comparison | ||
cd ../ | ||
./scripts/benchmark_remote.sh goblin_bench "taskset -c 0 ./goblin_bench --benchmark_filter=Full/1$" | ||
|
||
# Analyze the results | ||
python3 ./scripts/compute_field_operations_time.py |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,63 @@ | ||
import json | ||
from pathlib import Path | ||
|
||
PREFIX = Path("build-op-count-track") | ||
OPS_BENCH = Path("field_op_costs.json") | ||
GOBLIN_BENCH_JSON = Path("goblin_bench.json") | ||
BENCHMARK = "GoblinBench/GoblinFull/1" | ||
|
||
# We will populate time per operation for a subset of the operations | ||
# For accurate counting, we must select operations that do not call other | ||
# operations on the list. | ||
ns_per_op = {} | ||
to_keep = [ | ||
"asm_add_with_coarse_reduction", | ||
"asm_conditional_negate", | ||
"asm_mul_with_coarse_reduction", | ||
# "asm_reduce_once", | ||
"asm_self_add_with_coarse_reduction", | ||
"asm_self_mul_with_coarse_reduction", | ||
"asm_self_reduce_once", | ||
"asm_self_sqr_with_coarse_reduction", | ||
"asm_self_sub_with_coarse_reduction", | ||
"asm_sqr_with_coarse_reduction", | ||
# "mul", | ||
# "self_mul", | ||
# "add", | ||
# "self_add", | ||
# "sub", | ||
# "self_sub", | ||
# "invert", // mostly just self_sqr and *= | ||
# "self_neg", | ||
# "self_reduce_once", | ||
# "self_to_montgomery_form", | ||
# "self_sqr", | ||
# "sqr", | ||
] | ||
|
||
# read the measuremens of the basic field operations | ||
with open(PREFIX/OPS_BENCH, "r") as read_file: | ||
read_result = json.load(read_file) | ||
for bench in read_result["benchmarks"]: | ||
if bench["name"] in to_keep: | ||
ns_per_op[bench["name"]] = bench["real_time"] | ||
|
||
with open(PREFIX/GOBLIN_BENCH_JSON, "r") as read_file: | ||
read_result = json.load(read_file) | ||
for bench in read_result["benchmarks"]: | ||
if bench["name"] == BENCHMARK: | ||
mct = bench | ||
|
||
total_time = 0 | ||
|
||
for (key, time) in ns_per_op.items(): | ||
full_key = "fr::" + key | ||
if (full_key in mct.keys()): | ||
count = int(mct[full_key]) | ||
if (count is not None): | ||
print(f'aggregating { count } counts of {key} at time {ns_per_op[key]} ns.') | ||
total_time += count * ns_per_op[key] | ||
|
||
total_time /= 1e9 | ||
|
||
print(f'Time spent on field ops: {round(total_time, 3)}s.') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
209 changes: 209 additions & 0 deletions
209
barretenberg/cpp/src/barretenberg/ecc/curves/bn254/fr_straight.bench.cpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,209 @@ | ||
#include "fr.hpp" | ||
|
||
#include <benchmark/benchmark.h> | ||
|
||
using namespace bb; | ||
using namespace benchmark; | ||
|
||
namespace { | ||
void asm_add_with_coarse_reduction(State& state) noexcept | ||
{ | ||
fr x, y; | ||
for (auto _ : state) { | ||
DoNotOptimize(fr::asm_add_with_coarse_reduction(x, y)); | ||
} | ||
} | ||
BENCHMARK(asm_add_with_coarse_reduction); | ||
|
||
void asm_conditional_negate(State& state) noexcept | ||
{ | ||
fr x; | ||
for (auto _ : state) { | ||
fr::asm_conditional_negate(x, true); | ||
} | ||
} | ||
BENCHMARK(asm_conditional_negate); | ||
|
||
void asm_mul_with_coarse_reduction(State& state) noexcept | ||
{ | ||
fr x, y; | ||
for (auto _ : state) { | ||
DoNotOptimize(fr::asm_mul_with_coarse_reduction(x, y)); | ||
} | ||
} | ||
BENCHMARK(asm_mul_with_coarse_reduction); | ||
|
||
void asm_reduce_once(State& state) noexcept | ||
{ | ||
fr x; | ||
for (auto _ : state) { | ||
DoNotOptimize(fr::asm_reduce_once(x)); | ||
} | ||
} | ||
BENCHMARK(asm_reduce_once); | ||
|
||
void asm_self_add_with_coarse_reduction(State& state) noexcept | ||
{ | ||
fr x, y; | ||
for (auto _ : state) { | ||
fr::asm_self_add_with_coarse_reduction(x, y); | ||
} | ||
} | ||
BENCHMARK(asm_self_add_with_coarse_reduction); | ||
|
||
void asm_self_mul_with_coarse_reduction(State& state) noexcept | ||
{ | ||
fr x, y; | ||
for (auto _ : state) { | ||
fr::asm_self_mul_with_coarse_reduction(x, y); | ||
} | ||
} | ||
BENCHMARK(asm_self_mul_with_coarse_reduction); | ||
|
||
void asm_self_reduce_once(State& state) noexcept | ||
{ | ||
fr x; | ||
for (auto _ : state) { | ||
fr::asm_self_reduce_once(x); | ||
} | ||
} | ||
BENCHMARK(asm_self_reduce_once); | ||
|
||
void asm_self_sqr_with_coarse_reduction(State& state) noexcept | ||
{ | ||
fr x; | ||
for (auto _ : state) { | ||
fr::asm_self_sqr_with_coarse_reduction(x); | ||
} | ||
} | ||
BENCHMARK(asm_self_sqr_with_coarse_reduction); | ||
|
||
void asm_self_sub_with_coarse_reduction(State& state) noexcept | ||
{ | ||
fr x, y; | ||
for (auto _ : state) { | ||
fr::asm_self_sub_with_coarse_reduction(x, y); | ||
} | ||
} | ||
BENCHMARK(asm_self_sub_with_coarse_reduction); | ||
|
||
void asm_sqr_with_coarse_reduction(State& state) noexcept | ||
{ | ||
fr x; | ||
for (auto _ : state) { | ||
DoNotOptimize(fr::asm_sqr_with_coarse_reduction(x)); | ||
} | ||
} | ||
BENCHMARK(asm_sqr_with_coarse_reduction); | ||
|
||
void mul(State& state) noexcept | ||
{ | ||
fr x, y; | ||
for (auto _ : state) { | ||
DoNotOptimize(x * y); | ||
} | ||
} | ||
BENCHMARK(mul); | ||
|
||
void self_mul(State& state) noexcept | ||
{ | ||
fr x, y; | ||
for (auto _ : state) { | ||
x *= y; | ||
} | ||
} | ||
BENCHMARK(self_mul); | ||
|
||
void add(State& state) noexcept | ||
{ | ||
fr x, y; | ||
for (auto _ : state) { | ||
DoNotOptimize(x + y); | ||
} | ||
} | ||
BENCHMARK(add); | ||
|
||
void self_add(State& state) noexcept | ||
{ | ||
fr x, y; | ||
for (auto _ : state) { | ||
x += y; | ||
} | ||
} | ||
BENCHMARK(self_add); | ||
|
||
void sub(State& state) noexcept | ||
{ | ||
fr x, y; | ||
for (auto _ : state) { | ||
DoNotOptimize(x - y); | ||
} | ||
} | ||
BENCHMARK(sub); | ||
|
||
void self_sub(State& state) noexcept | ||
{ | ||
fr x, y; | ||
for (auto _ : state) { | ||
x -= y; | ||
} | ||
} | ||
BENCHMARK(self_sub); | ||
|
||
void invert(State& state) noexcept | ||
{ | ||
fr x; | ||
for (auto _ : state) { | ||
DoNotOptimize(x.invert()); | ||
} | ||
} | ||
BENCHMARK(invert); | ||
|
||
void self_neg(State& state) noexcept | ||
{ | ||
fr x; | ||
for (auto _ : state) { | ||
x.self_neg(); | ||
} | ||
} | ||
BENCHMARK(self_neg); | ||
|
||
void self_reduce_once(State& state) noexcept | ||
{ | ||
fr x; | ||
for (auto _ : state) { | ||
x.self_reduce_once(); | ||
} | ||
} | ||
BENCHMARK(self_reduce_once); | ||
|
||
void self_to_montgomery_form(State& state) noexcept | ||
{ | ||
fr x; | ||
for (auto _ : state) { | ||
x.self_to_montgomery_form(); | ||
} | ||
} | ||
BENCHMARK(self_to_montgomery_form); | ||
|
||
void self_sqr(State& state) noexcept | ||
{ | ||
fr x; | ||
for (auto _ : state) { | ||
x.self_sqr(); | ||
} | ||
} | ||
BENCHMARK(self_sqr); | ||
|
||
void sqr(State& state) noexcept | ||
{ | ||
fr x; | ||
for (auto _ : state) { | ||
DoNotOptimize(x.sqr()); | ||
} | ||
} | ||
BENCHMARK(sqr); | ||
} // namespace | ||
|
||
// NOLINTNEXTLINE macro invokation triggers style guideline errors from googletest code | ||
BENCHMARK_MAIN(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.