Skip to content

Commit

Permalink
feat(bb): op counting mode (#4437)
Browse files Browse the repository at this point in the history
- Introduces preset 'op-counting' that builds with support for operation
counts
- Introduces mechanism to connect this to google benchmark
- Support for ultra_honk_rounds_bench and goblin_bench right now

![image](https://github.com/AztecProtocol/aztec-packages/assets/163993/9785e99b-ef1f-4ea6-bfab-cd63d33499e1)

For best results run with e.g. `./bin/goblin_bench
--benchmark_min_time=0s --benchmark_counters_tabular=true`

other: - Make macros consistently have BB_, rename some BBERG_ macros

---------

Co-authored-by: ludamad <adam@aztecprotocol.com>
  • Loading branch information
ludamad and ludamad0 authored Feb 6, 2024
1 parent 934fabc commit 5d00cff
Show file tree
Hide file tree
Showing 24 changed files with 397 additions and 143 deletions.
15 changes: 15 additions & 0 deletions barretenberg/cpp/CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,16 @@
"LDFLAGS": "-fsanitize=memory"
}
},
{
"name": "op-counting",
"displayName": "Release build with operation counts for benchmarks",
"description": "Build with op counting",
"inherits": "clang16",
"binaryDir": "build-op-counting",
"environment": {
"CXXFLAGS": "-DBB_USE_OP_COUNT"
}
},
{
"name": "coverage",
"displayName": "Build with coverage",
Expand Down Expand Up @@ -300,6 +310,11 @@
"inherits": "default",
"configurePreset": "clang16"
},
{
"name": "op-counting",
"inherits": "default",
"configurePreset": "op-counting"
},
{
"name": "clang16-dbg",
"inherits": "default",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
* sequential_copy: 3.3
*
*/
#include "barretenberg/common/op_count.hpp"
#include "barretenberg/common/thread.hpp"
#include "barretenberg/ecc/curves/bn254/bn254.hpp"
#include <benchmark/benchmark.h>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
#include <benchmark/benchmark.h>

#include "barretenberg/benchmark/ultra_bench/mock_proofs.hpp"
#include "barretenberg/common/op_count_google_bench.hpp"
#include "barretenberg/goblin/goblin.hpp"
#include "barretenberg/goblin/mock_circuits.hpp"
#include "barretenberg/proof_system/circuit_builder/ultra_circuit_builder.hpp"
Expand Down Expand Up @@ -67,6 +68,7 @@ BENCHMARK_DEFINE_F(GoblinBench, GoblinFull)(benchmark::State& state)
GoblinMockCircuits::perform_op_queue_interactions_for_mock_first_circuit(goblin.op_queue);

for (auto _ : state) {
BB_REPORT_OP_COUNT_IN_BENCH(state);
// Perform a specified number of iterations of function/kernel accumulation
perform_goblin_accumulation_rounds(state, goblin);

Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <benchmark/benchmark.h>

#include "barretenberg/benchmark/ultra_bench/mock_proofs.hpp"
#include "barretenberg/common/op_count_google_bench.hpp"
#include "barretenberg/proof_system/circuit_builder/ultra_circuit_builder.hpp"
#include "barretenberg/ultra_honk/ultra_composer.hpp"
#include "barretenberg/ultra_honk/ultra_prover.hpp"
Expand All @@ -27,15 +28,20 @@ enum {
* @param prover - The ultrahonk prover.
* @param index - The pass to measure.
**/
BBERG_PROFILE static void test_round_inner(State& state, UltraProver& prover, size_t index) noexcept
BB_PROFILE static void test_round_inner(State& state, UltraProver& prover, size_t index) noexcept
{
auto time_if_index = [&](size_t target_index, auto&& func) -> void {
BB_REPORT_OP_COUNT_IN_BENCH(state);
if (index == target_index) {
state.ResumeTiming();
}

func();
if (index == target_index) {
state.PauseTiming();
} else {
// We don't actually want to write to user-defined counters
BB_REPORT_OP_COUNT_BENCH_CANCEL();
}
};

Expand All @@ -47,7 +53,7 @@ BBERG_PROFILE static void test_round_inner(State& state, UltraProver& prover, si
time_if_index(RELATION_CHECK, [&] { prover.execute_relation_check_rounds(); });
time_if_index(ZEROMORPH, [&] { prover.execute_zeromorph_rounds(); });
}
BBERG_PROFILE static void test_round(State& state, size_t index) noexcept
BB_PROFILE static void test_round(State& state, size_t index) noexcept
{
bb::srs::init_crs_factory("../srs_db/ignition");

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ enum {
SIXTH_BATCH_OPEN
};

BBERG_PROFILE static void plonk_round(
BB_PROFILE static void plonk_round(
State& state, plonk::UltraProver& prover, size_t target_index, size_t index, auto&& func) noexcept
{
if (index == target_index) {
Expand All @@ -37,7 +37,7 @@ BBERG_PROFILE static void plonk_round(
* @param prover - The ultraplonk prover.
* @param index - The pass to measure.
**/
BBERG_PROFILE static void test_round_inner(State& state, plonk::UltraProver& prover, size_t index) noexcept
BB_PROFILE static void test_round_inner(State& state, plonk::UltraProver& prover, size_t index) noexcept
{
plonk_round(state, prover, PREAMBLE, index, [&] { prover.execute_preamble_round(); });
plonk_round(state, prover, FIRST_WIRE_COMMITMENTS, index, [&] { prover.execute_first_round(); });
Expand All @@ -47,7 +47,7 @@ BBERG_PROFILE static void test_round_inner(State& state, plonk::UltraProver& pro
plonk_round(state, prover, FIFTH_COMPUTE_QUOTIENT_EVALUTION, index, [&] { prover.execute_fifth_round(); });
plonk_round(state, prover, SIXTH_BATCH_OPEN, index, [&] { prover.execute_sixth_round(); });
}
BBERG_PROFILE static void test_round(State& state, size_t index) noexcept
BB_PROFILE static void test_round(State& state, size_t index) noexcept
{
bb::srs::init_crs_factory("../srs_db/ignition");
for (auto _ : state) {
Expand Down
22 changes: 16 additions & 6 deletions barretenberg/cpp/src/barretenberg/common/compiler_hints.hpp
Original file line number Diff line number Diff line change
@@ -1,16 +1,26 @@
#pragma once

#ifdef _WIN32
#define BBERG_INLINE __forceinline inline
#define BB_INLINE __forceinline inline
#else
#define BBERG_INLINE __attribute__((always_inline)) inline
#define BB_INLINE __attribute__((always_inline)) inline
#endif

// TODO(AD): Other instrumentation?
#ifdef XRAY
#define BBERG_PROFILE [[clang::xray_always_instrument]] [[clang::noinline]]
#define BBERG_NO_PROFILE [[clang::xray_never_instrument]]
#define BB_PROFILE [[clang::xray_always_instrument]] [[clang::noinline]]
#define BB_NO_PROFILE [[clang::xray_never_instrument]]
#else
#define BBERG_PROFILE
#define BBERG_NO_PROFILE
#define BB_PROFILE
#define BB_NO_PROFILE
#endif

// Optimization hints for clang - which outcome of an expression is expected for better
// branch-prediction optimization
#ifdef __clang__
#define BB_LIKELY(x) __builtin_expect(!!(x), 1)
#define BB_UNLIKELY(x) __builtin_expect(!!(x), 0)
#else
#define BB_LIKELY(x) x
#define BB_UNLIKELY(x) x
#endif
51 changes: 51 additions & 0 deletions barretenberg/cpp/src/barretenberg/common/op_count.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@

#include <cstddef>
#ifdef BB_USE_OP_COUNT
#include "op_count.hpp"
#include <iostream>
#include <sstream>
#include <thread>

namespace bb::detail {
void GlobalOpCountContainer::add_entry(const char* key, std::size_t* count)
{
std::unique_lock<std::mutex> lock(mutex);
std::stringstream ss;
ss << std::this_thread::get_id();
counts.push_back({ key, ss.str(), count });
}

void GlobalOpCountContainer::print() const
{
std::cout << "print_op_counts() START" << std::endl;
for (const Entry& entry : counts) {
if (*entry.count > 0) {
std::cout << entry.key << "\t" << *entry.count << "\t[thread=" << entry.thread_id << "]" << std::endl;
}
}
std::cout << "print_op_counts() END" << std::endl;
}

std::map<std::string, std::size_t> GlobalOpCountContainer::get_aggregate_counts() const
{
std::map<std::string, std::size_t> aggregate_counts;
for (const Entry& entry : counts) {
if (*entry.count > 0) {
aggregate_counts[entry.key] += *entry.count;
}
}
return aggregate_counts;
}

void GlobalOpCountContainer::clear()
{
std::unique_lock<std::mutex> lock(mutex);
for (Entry& entry : counts) {
*entry.count = 0;
}
}

// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
GlobalOpCountContainer GLOBAL_OP_COUNTS;
} // namespace bb::detail
#endif
87 changes: 87 additions & 0 deletions barretenberg/cpp/src/barretenberg/common/op_count.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@

#pragma once

#ifndef BB_USE_OP_COUNT
// require a semicolon to appease formatters
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
#define BB_OP_COUNT_TRACK() (void)0
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
#define BB_OP_COUNT_TRACK_NAME(name) (void)0
#else
/**
* Provides an abstraction that counts operations based on function names.
* For efficiency, we spread out counts across threads.
*/

#include "barretenberg/common/compiler_hints.hpp"
#include <algorithm>
#include <atomic>
#include <cstdlib>
#include <map>
#include <mutex>
#include <string>
#include <vector>
namespace bb::detail {
// Compile-time string
// See e.g. https://www.reddit.com/r/cpp_questions/comments/pumi9r/does_c20_not_support_string_literals_as_template/
template <std::size_t N> struct OperationLabel {
// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays)
constexpr OperationLabel(const char (&str)[N])
{
for (std::size_t i = 0; i < N; ++i) {
value[i] = str[i];
}
}

// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays)
char value[N];
};

// Contains all statically known op counts
struct GlobalOpCountContainer {
public:
struct Entry {
std::string key;
std::string thread_id;
std::size_t* count;
};
std::mutex mutex;
std::vector<Entry> counts;
void print() const;
// NOTE: Should be called when other threads aren't active
void clear();
void add_entry(const char* key, std::size_t* count);
std::map<std::string, std::size_t> get_aggregate_counts() const;
};

// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
extern GlobalOpCountContainer GLOBAL_OP_COUNTS;

template <OperationLabel Op> struct GlobalOpCount {
public:
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
static thread_local std::size_t* thread_local_count;

static constexpr void increment_op_count()
{
if (std::is_constant_evaluated()) {
// We do nothing if the compiler tries to run this
return;
}
if (BB_UNLIKELY(thread_local_count == nullptr)) {
thread_local_count = new std::size_t();
GLOBAL_OP_COUNTS.add_entry(Op.value, thread_local_count);
}
(*thread_local_count)++;
}
};
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
template <OperationLabel Op> thread_local std::size_t* GlobalOpCount<Op>::thread_local_count;

} // namespace bb::detail

// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
#define BB_OP_COUNT_TRACK() bb::detail::GlobalOpCount<__func__>::increment_op_count()
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
#define BB_OP_COUNT_TRACK_NAME(name) bb::detail::GlobalOpCount<name>::increment_op_count()
#endif
50 changes: 50 additions & 0 deletions barretenberg/cpp/src/barretenberg/common/op_count_google_bench.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@

#pragma once
#include <benchmark/benchmark.h>

#ifndef BB_USE_OP_COUNT
namespace bb {
struct GoogleBenchOpCountReporter {
GoogleBenchOpCountReporter(::benchmark::State& state)
{
// unused, we don't have op counts on
(void)state;
}
};
}; // namespace bb
// require a semicolon to appease formatters
#define BB_REPORT_OP_COUNT_IN_BENCH(state) (void)0
#define BB_REPORT_OP_COUNT_BENCH_CANCEL() (void)0
#else
#include "op_count.hpp"
namespace bb {
// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions)
struct GoogleBenchOpCountReporter {
// We allow having a ref member as this only lives inside a function frame
::benchmark::State& state;
bool cancelled = false;
GoogleBenchOpCountReporter(::benchmark::State& state)
: state(state)
{
// Intent: Clear when we enter the state loop
bb::detail::GLOBAL_OP_COUNTS.clear();
}
~GoogleBenchOpCountReporter()
{
// Allow for conditional reporting
if (cancelled) {
return;
}
// Intent: Collect results when we exit the state loop
for (auto& entry : bb::detail::GLOBAL_OP_COUNTS.get_aggregate_counts()) {
state.counters[entry.first] = static_cast<double>(entry.second);
}
}
};
// Allow for integration with google benchmark user-defined counters
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
#define BB_REPORT_OP_COUNT_IN_BENCH(state) GoogleBenchOpCountReporter __bb_report_op_count_in_bench{ state };
// NOLINTNEXTLINE(cppcoreguidelines-macro-usage)
#define BB_REPORT_OP_COUNT_BENCH_CANCEL() __bb_report_op_count_in_bench.cancelled = true;
}; // namespace bb
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class ThreadPool {
std::condition_variable complete_condition_;
bool stop = false;

BBERG_NO_PROFILE void worker_loop(size_t thread_index);
BB_NO_PROFILE void worker_loop(size_t thread_index);

void do_iterations()
{
Expand Down
Loading

0 comments on commit 5d00cff

Please sign in to comment.