Skip to content

Commit

Permalink
Add basic output of where memory is stored after a compile.
Browse files Browse the repository at this point in the history
  • Loading branch information
jonmeow committed Jul 15, 2024
1 parent 547c8a6 commit ac63061
Show file tree
Hide file tree
Showing 20 changed files with 322 additions and 2 deletions.
12 changes: 12 additions & 0 deletions toolchain/base/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,23 @@ cc_library(
],
)

cc_library(
name = "mem_usage",
hdrs = ["mem_usage.h"],
deps = [
":yaml",
"//common:map",
"//common:set",
"@llvm-project//llvm:Support",
],
)

cc_library(
name = "value_store",
hdrs = ["value_store.h"],
deps = [
":index_base",
":mem_usage",
":yaml",
"//common:check",
"//common:hashing",
Expand Down
113 changes: 113 additions & 0 deletions toolchain/base/mem_usage.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
// Exceptions. See /LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#ifndef CARBON_TOOLCHAIN_BASE_MEM_USAGE_H_
#define CARBON_TOOLCHAIN_BASE_MEM_USAGE_H_

#include <cstdint>

#include "common/map.h"
#include "common/set.h"
#include "llvm/ADT/STLFunctionalExtras.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/FormatVariadic.h"
#include "toolchain/base/yaml.h"

namespace Carbon {

// Types supporting memory usage tracking should implement:
//
// // Collects memory usage of members.
// auto CollectMemUsage(llvm::StringRef label, AddMemUsageFn add_mem_usage)
// const -> void;
//
// The label should be concatenated with any child labels using MemUsageLabel in
// order to reflect allocation structure.
//
// The arguments for AddMemUsageFn are the label and byte size. It should be
// called once per tracked size.
class MemUsage {
public:
auto Add(std::string label, int64_t used, int64_t reserved) -> void {
mem_usage_.push_back({std::move(label), used, reserved});
}

auto Add(std::string label, const llvm::BumpPtrAllocator& allocator) -> void {
mem_usage_.push_back({std::move(label), allocator.getBytesAllocated(),
allocator.getTotalMemory()});
}

template <typename KeyT, typename ValueT, ssize_t SmallSize,
typename KeyContextT>
auto Add(std::string label, Map<KeyT, ValueT, SmallSize, KeyContextT> map,
KeyContextT key_context = KeyContextT()) -> void {
auto bytes = map.ComputeMetrics(key_context).storage_bytes;
mem_usage_.push_back({std::move(label), bytes, bytes});
}

template <typename KeyT, ssize_t SmallSize, typename KeyContextT>
auto Add(std::string label, Set<KeyT, SmallSize, KeyContextT> map,
KeyContextT key_context = KeyContextT()) -> void {
auto bytes = map.ComputeMetrics(key_context).storage_bytes;
mem_usage_.push_back({std::move(label), bytes, bytes});
}

// Adds memory usage of an array's data. This ignores the possible overhead of
// a SmallVector's in-place storage; if it's used, it's going to be tiny
// relative to scaling memory costs.
//
// This uses SmallVector in order to get proper inference for T, which
// ArrayRef misses.
template <typename T, unsigned N>
auto Add(std::string label, const llvm::SmallVector<T, N>& array) -> void {
Add(std::move(label), array.size_in_bytes(), array.capacity_in_bytes());
}

template <typename T>
auto Collect(llvm::StringRef label, const T& arg) -> void {
arg.CollectMemUsage(*this, label);
}

// Constructs a label for memory usage, handling the `.` concatenation.
// We don't expect much depth in labels per-call.
static auto ConcatLabel(llvm::StringRef label, llvm::StringRef child_label)
-> std::string {
return llvm::formatv("{0}.{1}", label, child_label);
}
static auto ConcatLabel(llvm::StringRef label, llvm::StringRef child_label1,
llvm::StringRef child_label2) -> std::string {
return llvm::formatv("{0}.{1}.{2}", label, child_label1, child_label2);
}

auto OutputYaml(llvm::StringRef filename) const -> Yaml::OutputMapping {
// Explicitly copy the filename.
return Yaml::OutputMapping([&, filename](Yaml::OutputMapping::Map map) {
map.Add("filename", filename);
int64_t total_used = 0;
int64_t total_reserved = 0;
for (auto [label, used, reserved] : mem_usage_) {
total_used += used;
total_reserved += reserved;
map.Add(label,
Yaml::OutputMapping([&](Yaml::OutputMapping::Map byte_map) {
byte_map.Add("used", used);
byte_map.Add("reserved", reserved);
}));
}
map.Add("Total",
Yaml::OutputMapping([&](Yaml::OutputMapping::Map byte_map) {
byte_map.Add("used", total_used);
byte_map.Add("reserved", total_reserved);
}));
});
}

private:
llvm::SmallVector<std::tuple<std::string, int64_t, int64_t>> mem_usage_;
};

} // namespace Carbon

#endif // CARBON_TOOLCHAIN_BASE_MEM_USAGE_H_
28 changes: 28 additions & 0 deletions toolchain/base/value_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/YAMLParser.h"
#include "toolchain/base/index_base.h"
#include "toolchain/base/mem_usage.h"
#include "toolchain/base/yaml.h"

namespace Carbon {
Expand Down Expand Up @@ -187,6 +188,12 @@ class ValueStore
});
}

// Collects memory usage of the values.
auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
-> void {
mem_usage.Add(label.str(), values_);
}

auto array_ref() const -> llvm::ArrayRef<ValueType> { return values_; }
auto size() const -> size_t { return values_.size(); }

Expand Down Expand Up @@ -237,6 +244,15 @@ class CanonicalValueStore {
}
auto size() const -> size_t { return values_.size(); }

// Collects memory usage the values and deduplication set.
auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
-> void {
mem_usage.Collect(MemUsage::ConcatLabel(label, "values_"), values_);
auto bytes =
set_.ComputeMetrics(KeyContext(values_.array_ref())).storage_bytes;
mem_usage.Add(MemUsage::ConcatLabel(label, "set_"), bytes, bytes);
}

private:
class KeyContext;

Expand Down Expand Up @@ -322,6 +338,18 @@ class SharedValueStores : public Yaml::Printable<SharedValueStores> {
});
}

// Collects memory usage for the various shared stores.
auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
-> void {
mem_usage.Collect(MemUsage::ConcatLabel(label, "ints_"), ints_);
mem_usage.Collect(MemUsage::ConcatLabel(label, "reals_"), reals_);
mem_usage.Collect(MemUsage::ConcatLabel(label, "floats_"), floats_);
mem_usage.Collect(MemUsage::ConcatLabel(label, "identifiers_"),
identifiers_);
mem_usage.Collect(MemUsage::ConcatLabel(label, "string_literals_"),
string_literals_);
}

private:
CanonicalValueStore<IntId> ints_;
ValueStore<RealId> reals_;
Expand Down
38 changes: 36 additions & 2 deletions toolchain/driver/driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -308,6 +308,14 @@ Dump the generated assembly to stdout after codegen.
)""",
},
[&](auto& arg_b) { arg_b.Set(&dump_asm); });
b.AddFlag(
{
.name = "dump-mem-usage",
.help = R"""(
Dumps the amount of memory used.
)""",
},
[&](auto& arg_b) { arg_b.Set(&dump_mem_usage); });
b.AddFlag(
{
.name = "prelude-import",
Expand Down Expand Up @@ -344,6 +352,7 @@ Excludes files with the given prefix from dumps.
bool dump_sem_ir = false;
bool dump_llvm_ir = false;
bool dump_asm = false;
bool dump_mem_usage = false;
bool stream_errors = false;
bool preorder_parse_tree = false;
bool builtin_sem_ir = false;
Expand Down Expand Up @@ -540,6 +549,9 @@ class Driver::CompilationUnit {
sorting_consumer_ = SortingDiagnosticConsumer(*consumer);
consumer_ = &*sorting_consumer_;
}
if (options_.dump_mem_usage && IncludeInDumps()) {
mem_usage_ = MemUsage();
}
}

// Loads source and lexes it. Returns true on success.
Expand All @@ -552,6 +564,10 @@ class Driver::CompilationUnit {
*consumer_);
}
});
if (mem_usage_) {
mem_usage_->Add("source_", source_->text().size(),
source_->text().size());
}
if (!source_) {
success_ = false;
return;
Expand All @@ -565,6 +581,9 @@ class Driver::CompilationUnit {
consumer_->Flush();
driver_->output_stream_ << tokens_;
}
if (mem_usage_) {
mem_usage_->Collect("tokens_", *tokens_);
}
CARBON_VLOG() << "*** Lex::TokenizedBuffer ***\n" << tokens_;
if (tokens_->has_errors()) {
success_ = false;
Expand All @@ -582,6 +601,9 @@ class Driver::CompilationUnit {
consumer_->Flush();
parse_tree_->Print(driver_->output_stream_, options_.preorder_parse_tree);
}
if (mem_usage_) {
mem_usage_->Collect("parse_tree_", *parse_tree_);
}
CARBON_VLOG() << "*** Parse::Tree ***\n" << parse_tree_;
if (parse_tree_->has_errors()) {
success_ = false;
Expand All @@ -607,8 +629,12 @@ class Driver::CompilationUnit {
// to wait for code generation.
consumer_->Flush();

CARBON_VLOG() << "*** Raw SemIR::File ***\n" << *sem_ir_ << "\n";
if (mem_usage_) {
mem_usage_->Collect("sem_ir_", *sem_ir_);
}

if (options_.dump_raw_sem_ir && IncludeInDumps()) {
CARBON_VLOG() << "*** Raw SemIR::File ***\n" << *sem_ir_ << "\n";
sem_ir_->Print(driver_->output_stream_, options_.builtin_sem_ir);
if (options_.dump_sem_ir) {
driver_->output_stream_ << "\n";
Expand Down Expand Up @@ -659,11 +685,16 @@ class Driver::CompilationUnit {

// Runs post-compile logic. This is always called, and called after all other
// actions on the CompilationUnit.
auto PostCompile() const -> void {
auto PostCompile() -> void {
if (options_.dump_shared_values && IncludeInDumps()) {
Yaml::Print(driver_->output_stream_,
value_stores_.OutputYaml(input_filename_));
}
if (mem_usage_) {
mem_usage_->Collect("value_stores_", value_stores_);
Yaml::Print(driver_->output_stream_,
mem_usage_->OutputYaml(input_filename_));
}

// The diagnostics consumer must be flushed before compilation artifacts are
// destructed, because diagnostics can refer to their state.
Expand Down Expand Up @@ -773,6 +804,9 @@ class Driver::CompilationUnit {

bool success_ = true;

// Tracks memory usage of the compile.
std::optional<MemUsage> mem_usage_;

// These are initialized as steps are run.
std::optional<SourceBuffer> source_;
std::optional<Lex::TokenizedBuffer> tokens_;
Expand Down
19 changes: 19 additions & 0 deletions toolchain/driver/testdata/dump_mem_usage.carbon
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
// Part of the Carbon Language project, under the Apache License v2.0 with LLVM
// Exceptions. See /LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
// ARGS: compile --phase=check --dump-mem-usage %s
//
// NOAUTOUPDATE
//
// Avoid testing specific values:
// SET-CHECK-SUBSET

var x: i32 = 1;

// CHECK:STDOUT: ---
// CHECK:STDOUT: filename: dump_mem_usage.carbon
// CHECK:STDOUT: source_:
// CHECK:STDOUT: used: 0
// CHECK:STDOUT: reserved: 0
// CHECK:STDOUT: ...
1 change: 1 addition & 0 deletions toolchain/lex/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ cc_library(
"//common:ostream",
"//common:string_helpers",
"//toolchain/base:index_base",
"//toolchain/base:mem_usage",
"//toolchain/base:value_store",
"//toolchain/diagnostics:diagnostic_emitter",
"//toolchain/source:source_buffer",
Expand Down
7 changes: 7 additions & 0 deletions toolchain/lex/tokenized_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -340,6 +340,13 @@ auto TokenizedBuffer::AddToken(TokenInfo info) -> TokenIndex {
return TokenIndex(static_cast<int>(token_infos_.size()) - 1);
}

auto TokenizedBuffer::CollectMemUsage(MemUsage& mem_usage,
llvm::StringRef label) const -> void {
mem_usage.Add(MemUsage::ConcatLabel(label, "allocator_"), allocator_);
mem_usage.Add(MemUsage::ConcatLabel(label, "token_infos_"), token_infos_);
mem_usage.Add(MemUsage::ConcatLabel(label, "line_infos_"), line_infos_);
}

auto TokenIterator::Print(llvm::raw_ostream& output) const -> void {
output << token_.index;
}
Expand Down
5 changes: 5 additions & 0 deletions toolchain/lex/tokenized_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "llvm/Support/Allocator.h"
#include "llvm/Support/raw_ostream.h"
#include "toolchain/base/index_base.h"
#include "toolchain/base/mem_usage.h"
#include "toolchain/base/value_store.h"
#include "toolchain/diagnostics/diagnostic_emitter.h"
#include "toolchain/lex/token_index.h"
Expand Down Expand Up @@ -204,6 +205,10 @@ class TokenizedBuffer : public Printable<TokenizedBuffer> {
auto PrintToken(llvm::raw_ostream& output_stream, TokenIndex token) const
-> void;

// Collects memory usage of members.
auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
-> void;

// Returns true if the buffer has errors that were detected at lexing time.
auto has_errors() const -> bool { return has_errors_; }

Expand Down
6 changes: 6 additions & 0 deletions toolchain/parse/tree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,12 @@ auto Tree::Print(llvm::raw_ostream& output, bool preorder) const -> void {
output << " ]\n";
}

auto Tree::CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
-> void {
mem_usage.Add(MemUsage::ConcatLabel(label, "node_impls_"), node_impls_);
mem_usage.Add(MemUsage::ConcatLabel(label, "imports_"), imports_);
}

auto Tree::VerifyExtract(NodeId node_id, NodeKind kind,
ErrorBuilder* trace) const -> bool {
switch (kind) {
Expand Down
4 changes: 4 additions & 0 deletions toolchain/parse/tree.h
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,10 @@ class Tree : public Printable<Tree> {
// line-oriented shell tools from `grep` to `awk`.
auto Print(llvm::raw_ostream& output, bool preorder) const -> void;

// Collects memory usage of members.
auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
-> void;

// The following `Extract*` function provide an alternative way of accessing
// the nodes of a tree. It is intended to be more convenient and type-safe,
// but slower and can't be used on nodes that are marked as having an error.
Expand Down
8 changes: 8 additions & 0 deletions toolchain/sem_ir/block_value_store.h
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,14 @@ class BlockValueStore : public Yaml::Printable<BlockValueStore<IdT>> {
});
}

// Collects memory usage of members.
auto CollectMemUsage(MemUsage& mem_usage, llvm::StringRef label) const
-> void {
mem_usage.Collect(MemUsage::ConcatLabel(label, "values_"), values_);
mem_usage.Add(MemUsage::ConcatLabel(label, "canonical_blocks_"),
canonical_blocks_, KeyContext(this));
}

auto size() const -> int { return values_.size(); }

protected:
Expand Down
Loading

0 comments on commit ac63061

Please sign in to comment.