Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(bb): integrate tracy memory/cpu profiler #7718

Merged
merged 29 commits into from
Aug 1, 2024
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
c812370
feat: initial tracy cmake config
ludamad Jul 31, 2024
5447fc8
pass on tracy - give up on cmake subproject option passing
ludamad Jul 31, 2024
48a985a
start of tracy automation
ludamad Jul 31, 2024
7e2ec27
more memory instrumentation
ludamad Jul 31, 2024
f09c9e0
more zones
ludamad Jul 31, 2024
fb0a25a
fixes
ludamad Jul 31, 2024
42e53a7
delete operator new
ludamad Jul 31, 2024
01f98ab
fix linking tracy
ludamad Jul 31, 2024
68ef51a
final linker fix
ludamad Jul 31, 2024
9fda270
Merge github.com:AztecProtocol/aztec-packages into ad/tracy-enable
ludamad Aug 1, 2024
b65c8f2
tracy
ludamad Aug 1, 2024
2cd4588
operator new
ludamad Aug 1, 2024
4e37944
Merge branch 'master' into ad/tracy-enable
ludamad Aug 1, 2024
51735c6
Update benchmark_tracy.sh
ludamad Aug 1, 2024
5d95b69
Update client_ivc.bench.cpp
ludamad Aug 1, 2024
6cd0461
Update ci.yml
ludamad Aug 1, 2024
a095cea
tracy readme
ludamad Aug 1, 2024
88d8701
Merge remote-tracking branch 'origin/ad/tracy-enable' into ad/tracy-e…
ludamad Aug 1, 2024
32385eb
cmake comments
ludamad Aug 1, 2024
87209d8
zonescoped fix
ludamad Aug 1, 2024
e073499
Update mem.cpp
ludamad Aug 1, 2024
6328a89
Merge github.com:AztecProtocol/aztec-packages into ad/tracy-enable
ludamad Aug 1, 2024
f8a6655
Update mem.cpp
ludamad Aug 1, 2024
42f3ddc
fix build for wasm
ludamad Aug 1, 2024
c48ccf2
Merge remote-tracking branch 'origin/ad/tracy-enable' into ad/tracy-e…
ludamad Aug 1, 2024
a83a664
format
ludamad Aug 1, 2024
05e72ea
wasm build fix for single-threaded
ludamad Aug 1, 2024
bc8c7bd
Update CMakePresets.json
ludamad Aug 1, 2024
1feb04c
Update client_ivc.bench.cpp
ludamad Aug 1, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions barretenberg/cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ option(ENABLE_ASAN "Address sanitizer for debugging tricky memory corruption" OF
option(ENABLE_HEAVY_TESTS "Enable heavy tests when collecting coverage" OFF)
# Note: Must do 'sudo apt-get install libdw-dev' or equivalent
option(CHECK_CIRCUIT_STACKTRACES "Enable (slow) stack traces for check circuit" OFF)
option(ENABLE_TRACY "Enable low-medium overhead profiling for memory and performance with tracy" OFF)

if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64" OR CMAKE_SYSTEM_PROCESSOR MATCHES "arm64")
message(STATUS "Compiling for ARM.")
Expand All @@ -52,6 +53,10 @@ if(CHECK_CIRCUIT_STACKTRACES)
add_compile_options(-DCHECK_CIRCUIT_STACKTRACES)
endif()

if(ENABLE_TRACY)
add_compile_options(-DTRACY_ENABLE)
endif()

if(ENABLE_ASAN)
add_compile_options(-fsanitize=address)
add_link_options(-fsanitize=address)
Expand Down Expand Up @@ -139,6 +144,7 @@ include(cmake/build.cmake)
include(GNUInstallDirs)
include(cmake/arch.cmake)
include(cmake/threading.cmake)
include(cmake/tracy.cmake)
include(cmake/gtest.cmake)
include(cmake/benchmark.cmake)
include(cmake/module.cmake)
Expand Down
28 changes: 27 additions & 1 deletion barretenberg/cpp/CMakePresets.json
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,26 @@
"DISABLE_ASM": "ON"
}
},
{
"name": "tracy",
"displayName": "Release build with tracy",
"description": "Release build with tracy",
"inherits": "clang16",
"binaryDir": "build-tracy",
"cacheVariables": {
"ENABLE_TRACY": "ON"
}
},
{
"name": "wasm-tracy",
"displayName": "WASM build with tracy",
"description": "WASM build with tracy",
"inherits": "clang16",
"binaryDir": "build-wasm-tracy",
"cacheVariables": {
"ENABLE_TRACY": "ON"
}
},
{
"name": "clang16-dbg-fast",
"displayName": "Optimized debug build with Clang-16",
Expand Down Expand Up @@ -329,7 +349,8 @@
"CMAKE_BUILD_TYPE": "Release"
},
"cacheVariables": {
"MULTITHREADING": "ON"
"MULTITHREADING": "ON",
"Threads_FOUND": "ON"
}
},
{
Expand Down Expand Up @@ -410,6 +431,11 @@
"inherits": "default",
"configurePreset": "clang16-dbg"
},
{
"name": "tracy",
"inherits": "default",
"configurePreset": "tracy"
},
{
"name": "clang16-dbg-fast",
"inherits": "default",
Expand Down
5 changes: 5 additions & 0 deletions barretenberg/cpp/cmake/module.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ function(barretenberg_module MODULE_NAME)
${MODULE_NAME}
PUBLIC
${ARGN}
Tracy::TracyClient
${TBB_IMPORTED_TARGETS}
)

Expand Down Expand Up @@ -89,6 +90,7 @@ function(barretenberg_module MODULE_NAME)
target_link_libraries(
${MODULE_NAME}_test_objects
PRIVATE
Tracy::TracyClient
GTest::gtest
GTest::gtest_main
GTest::gmock_main
Expand Down Expand Up @@ -147,6 +149,7 @@ function(barretenberg_module MODULE_NAME)
GTest::gtest
GTest::gtest_main
GTest::gmock_main
Tracy::TracyClient
${TBB_IMPORTED_TARGETS}
)

Expand Down Expand Up @@ -238,6 +241,7 @@ function(barretenberg_module MODULE_NAME)
${BENCHMARK_NAME}_bench_objects
PRIVATE
benchmark::benchmark
Tracy::TracyClient
${TBB_IMPORTED_TARGETS}
)

Expand All @@ -253,6 +257,7 @@ function(barretenberg_module MODULE_NAME)
${MODULE_LINK_NAME}
${ARGN}
benchmark::benchmark
Tracy::TracyClient
${TBB_IMPORTED_TARGETS}
)
if(CHECK_CIRCUIT_STACKTRACES)
Expand Down
2 changes: 1 addition & 1 deletion barretenberg/cpp/cmake/msgpack.cmake
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
include(ExternalProject)

# External project: Download msgpack-c from GitHu
# External project: Download msgpack-c from GitHub
set(MSGPACK_PREFIX "${CMAKE_BINARY_DIR}/_deps/msgpack-c")
set(MSGPACK_INCLUDE "${MSGPACK_PREFIX}/src/msgpack-c/include")

Expand Down
10 changes: 10 additions & 0 deletions barretenberg/cpp/cmake/tracy.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
include(FetchContent)

set(TRACY_INCLUDE "${CMAKE_BINARY_DIR}/_deps/tracy-src/public")

FetchContent_Declare(tracy
GIT_REPOSITORY https://github.com/wolfpld/tracy
GIT_TAG ffb98a972401c246b2348fb5341252e2ba855d00
SYSTEM # optional, the tracy include directory will be treated as system directory
)
FetchContent_MakeAvailable(tracy)
32 changes: 32 additions & 0 deletions barretenberg/cpp/scripts/benchmark_tracy.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@

# NOTE: intended to be ran from one's external computer, connecting to Aztec mainframe
# the benchmark runs with headless capture and then we copy the trace file and run tracy profiler
# This is thus only really useful internally at Aztec, sorry external folks. It can be easily tweaked
# however for any SSH setup, especially an ubuntu one, and of course if you are just tracing on the
# same machine you can use the normal interactive tracy workflow.
set -eux
USER=$1
BOX=$USER-box
BENCHMARK=${2:-ultra_plonk_bench}
COMMAND=${3:-./bin/$BENCHMARK}

ssh $BOX "
set -eux ;
! [ -d ~/tracy ] && git clone https://github.com/wolfpld/tracy ~/tracy ;
cd ~/tracy/capture ;
sudo apt-get install libdbus-1-dev libdbus-glib-1-dev ;
mkdir -p build && cd build && cmake .. && make -j ;
./tracy-capture -a 127.0.0.1 -f -o trace-$BENCHMARK & ;
sleep 0.1 ;
cd ~/aztec-packages/barretenberg/cpp/ ;
cmake --preset tracy && cmake --build --preset tracy --parallel $BENCHMARK ;
cd build-tracy ;
ninja $BENCHMARK ;
$COMMAND ;
"
! [ -d ~/tracy ] && git clone https://github.com/wolfpld/tracy ~/tracy
cd ~/tracy
cmake -B profiler/build -S profiler -DCMAKE_BUILD_TYPE=Release
cmake --build profiler/build --parallel
scp $BOX:/mnt/user-data/$USER/tracy/capture/build/trace-$BENCHMARK .
~/tracy/profiler/build/tracy-profiler trace-$BENCHMARK
2 changes: 1 addition & 1 deletion barretenberg/cpp/src/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ if(WASM)
add_link_options(-Wl,--export-memory,--import-memory,--stack-first,-z,stack-size=1048576,--max-memory=4294967296)
endif()

include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${MSGPACK_INCLUDE})
include_directories(${CMAKE_CURRENT_SOURCE_DIR} ${MSGPACK_INCLUDE} ${TRACY_INCLUDE})

# I feel this should be limited to ecc, however it's currently used in headers that go across libraries,
# and there currently isn't an easy way to inherit the DDISABLE_ASM parameter.
Expand Down
1 change: 1 addition & 0 deletions barretenberg/cpp/src/barretenberg/bb/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ if (NOT(FUZZING) AND NOT(WASM))
barretenberg
env
circuit_checker
Tracy::TracyClient
libdeflate::libdeflate_static
)
if(CHECK_CIRCUIT_STACKTRACES)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "barretenberg/goblin/mock_circuits.hpp"
#include "barretenberg/stdlib_circuit_builders/ultra_circuit_builder.hpp"
#include "barretenberg/ultra_honk/ultra_verifier.hpp"
#include "tracy/Tracy.hpp"

using namespace benchmark;
using namespace bb;
Expand All @@ -30,6 +31,8 @@ class ClientIVCBench : public benchmark::Fixture {
bb::srs::init_grumpkin_crs_factory("../srs_db/grumpkin");
}

void TearDown([[maybe_unused]] const ::benchmark::State& state) override {}

/**
* @brief Compute verification key for each circuit in the IVC based on the number of desired function circuits
* @details Assumes the following circuit ordering: one initial function circuit followed by pairs of {function,
Expand Down
3 changes: 2 additions & 1 deletion barretenberg/cpp/src/barretenberg/client_ivc/client_ivc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ void ClientIVC::accumulate(ClientCircuit& circuit, const std::shared_ptr<Verific
*/
ClientIVC::Proof ClientIVC::prove()
{
ZoneScoped();
max_block_size_tracker.print(); // print minimum structured sizes for each block
return { fold_output.proof, decider_prove(), goblin.prove() };
};
Expand Down Expand Up @@ -85,7 +86,7 @@ bool ClientIVC::verify(const Proof& proof,
* @param proof
* @return bool
*/
bool ClientIVC::verify(Proof& proof, const std::vector<std::shared_ptr<VerifierInstance>>& verifier_instances)
bool ClientIVC::verify(Proof& proof, const std::vector<std::shared_ptr<VerifierInstance>>& verifier_instances) const
{
auto eccvm_vk = std::make_shared<ECCVMVerificationKey>(goblin.get_eccvm_proving_key());
auto translator_vk = std::make_shared<TranslatorVerificationKey>(goblin.get_translator_proving_key());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ class ClientIVC {
const std::shared_ptr<ClientIVC::ECCVMVerificationKey>& eccvm_vk,
const std::shared_ptr<ClientIVC::TranslatorVerificationKey>& translator_vk);

bool verify(Proof& proof, const std::vector<std::shared_ptr<VerifierInstance>>& verifier_instances);
bool verify(Proof& proof, const std::vector<std::shared_ptr<VerifierInstance>>& verifier_instances) const;

bool prove_and_verify();

Expand Down
18 changes: 18 additions & 0 deletions barretenberg/cpp/src/barretenberg/common/mem.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#include "barretenberg/common/mem.hpp"

void* operator new(std::size_t count)
{
// NOLINTBEGIN(cppcoreguidelines-no-malloc)
void* ptr = malloc(count);
// NOLINTEND(cppcoreguidelines-no-malloc)
TRACY_ALLOC(ptr, count);
return ptr;
}

void operator delete(void* ptr) noexcept
{
TRACY_FREE(ptr);
// NOLINTBEGIN(cppcoreguidelines-no-malloc)
free(ptr);
// NOLINTEND(cppcoreguidelines-no-malloc)
}
35 changes: 32 additions & 3 deletions barretenberg/cpp/src/barretenberg/common/mem.hpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,17 @@
#pragma once
#include "log.hpp"
#include "memory.h"
#include "tracy/Tracy.hpp"
#include "wasm_export.hpp"
#include <cstdlib>
#include <memory>
// #include <malloc.h>

// This can be altered to capture stack traces, though more expensive
// This is the only reason we wrap TracyAlloc or TracyAllocS
#define TRACY_ALLOC(t, size) TracyAllocS(t, size, /*stack depth*/ 10)
#define TRACY_FREE(t) TracyFreeS(t, /*stack depth*/ 10)
// #define TRACY_ALLOC(t, size) TracyAlloc(t, size)
// #define TRACY_FREE(t) TracyFree(t)

#define pad(size, alignment) (size - (size % alignment) + ((size % alignment) == 0 ? 0 : alignment))

Expand All @@ -17,11 +24,13 @@ inline void* aligned_alloc(size_t alignment, size_t size)
info("bad alloc of size: ", size);
std::abort();
}
TRACY_ALLOC(t, size);
return t;
}

inline void aligned_free(void* mem)
{
TRACY_FREE(mem);
free(mem);
}
#endif
Expand All @@ -41,13 +50,15 @@ inline void* protected_aligned_alloc(size_t alignment, size_t size)
info("bad alloc of size: ", size);
std::abort();
}
TRACY_ALLOC(t, size);
return t;
}

#define aligned_alloc protected_aligned_alloc

inline void aligned_free(void* mem)
{
TRACY_FREE(mem);
// NOLINTNEXTLINE(cppcoreguidelines-owning-memory, cppcoreguidelines-no-malloc)
free(mem);
}
Expand All @@ -56,11 +67,14 @@ inline void aligned_free(void* mem)
#ifdef _WIN32
inline void* aligned_alloc(size_t alignment, size_t size)
{
return _aligned_malloc(size, alignment);
void* t = _aligned_malloc(size, alignment);
TRACY_ALLOC(t, size);
return t;
}

inline void aligned_free(void* mem)
{
TRACY_FREE(mem);
_aligned_free(mem);
}
#endif
Expand All @@ -79,4 +93,19 @@ inline void aligned_free(void* mem)
// info("Total allocated space (uordblks): ", minfo.uordblks);
// info("Total free space (fordblks): ", minfo.fordblks);
// info("Top-most, releasable space (keepcost): ", minfo.keepcost);
// }
// }

inline void* tracy_malloc(size_t size)
{
// NOLINTNEXTLINE(cppcoreguidelines-owning-memory, cppcoreguidelines-no-malloc)
void* t = malloc(size);
TRACY_ALLOC(t, size);
return t;
}

inline void tracy_free(void* mem)
{
TRACY_FREE(mem);
// NOLINTNEXTLINE(cppcoreguidelines-owning-memory, cppcoreguidelines-no-malloc)
free(mem);
}
7 changes: 1 addition & 6 deletions barretenberg/cpp/src/barretenberg/common/slab_allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ std::shared_ptr<void> SlabAllocator::get(size_t req_size)
return { aligned_alloc(32, req_size), aligned_free };
}
// NOLINTNEXTLINE(cppcoreguidelines-no-malloc)
return { malloc(req_size), free };
return { tracy_malloc(req_size), tracy_free };
}

size_t SlabAllocator::get_total_size()
Expand Down Expand Up @@ -209,11 +209,6 @@ void init_slab_allocator(size_t circuit_subgroup_size)
allocator.init(circuit_subgroup_size);
}

// auto init = ([]() {
// init_slab_allocator(524288);
// return 0;
// })();

std::shared_ptr<void> get_mem_slab(size_t size)
{
return allocator.get(size);
Expand Down
Loading