Skip to content

Commit

Permalink
Separate subtree size information from parse nodes. (#4174)
Browse files Browse the repository at this point in the history
Move subtree sizes over to TreeAndSubtrees, using the different
structure to represent the additional parse work that occurs, as well as
making it clear which functions require the extra information. My intent
is to make it hard to use this by accident.

The subtree size is still tracked during Parse::Tree construction. I
think a lot of that can be cleaned up, although we use it during
placeholder assignment so it may take some work. I wanted to see what
people thought about this before taking action on such a change.

I'm using a 1m line source file generated by #4124 for testing. Command
is `time bazel-bin/toolchain/install/prefix_root/bin/carbon compile
--phase=check --dump-mem-usage ~/tmp/data.carbon`

At head, what I'm seeing is:

```
...
parse_tree_.node_impls_:
  used_bytes:      61516116
  reserved_bytes:  61516116
...
Total:
  used_bytes:      447814230
  reserved_bytes:  551663894
...
1.43s user 0.14s system 99% cpu 1.565 total
```

With `Tree::Verify` disabled completely, it looks like:
```
parse_tree_.node_impls_:
  used_bytes:      41010744
  reserved_bytes:  41010744
...
Total:
  used_bytes:      427308858
  reserved_bytes:  531158522
...
1.20s user 0.13s system 99% cpu 1.332 total
```

Re-enabling just the basic verification (what is now `Tree::Verify`),
I'm seeing maybe 0.05s slower, but that's within noise for my system. I
do see variability in my timing results, and overall I think this is a
0.2s +/- 0.1s improvement versus the earlier (always testing `Extract`
code) implementation. That's opt; debug builds will be unaffected,
because the same checking occurs as before.

Note, the subtree size is a third of the node representation, which is
why I'm showing the decrease in memory usage here.
  • Loading branch information
jonmeow authored Jul 31, 2024
1 parent e6e61e1 commit f67791c
Show file tree
Hide file tree
Showing 40 changed files with 767 additions and 692 deletions.
12 changes: 8 additions & 4 deletions language_server/language_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "toolchain/lex/lex.h"
#include "toolchain/parse/node_kind.h"
#include "toolchain/parse/parse.h"
#include "toolchain/parse/tree_and_subtrees.h"
#include "toolchain/source/source_buffer.h"

namespace Carbon::LS {
Expand Down Expand Up @@ -79,11 +80,12 @@ auto LanguageServer::onReply(llvm::json::Value /*id*/,
// Returns the text of first child of kind Parse::NodeKind::IdentifierName.
static auto GetIdentifierName(const SharedValueStores& value_stores,
const Lex::TokenizedBuffer& tokens,
const Parse::Tree& p, Parse::NodeId node)
const Parse::TreeAndSubtrees& p,
Parse::NodeId node)
-> std::optional<llvm::StringRef> {
for (auto ch : p.children(node)) {
if (p.node_kind(ch) == Parse::NodeKind::IdentifierName) {
auto token = p.node_token(ch);
if (p.tree().node_kind(ch) == Parse::NodeKind::IdentifierName) {
auto token = p.tree().node_token(ch);
if (tokens.GetKind(token) == Lex::TokenKind::Identifier) {
return value_stores.identifiers().Get(tokens.GetIdentifier(token));
}
Expand All @@ -104,6 +106,7 @@ void LanguageServer::OnDocumentSymbol(
auto buf = SourceBuffer::MakeFromFile(vfs, file, NullDiagnosticConsumer());
auto lexed = Lex::Lex(value_stores, *buf, NullDiagnosticConsumer());
auto parsed = Parse::Parse(lexed, NullDiagnosticConsumer(), nullptr);
Parse::TreeAndSubtrees tree_and_subtrees(lexed, parsed);
std::vector<clang::clangd::DocumentSymbol> result;
for (const auto& node : parsed.postorder()) {
clang::clangd::SymbolKind symbol_kind;
Expand All @@ -126,7 +129,8 @@ void LanguageServer::OnDocumentSymbol(
continue;
}

if (auto name = GetIdentifierName(value_stores, lexed, parsed, node)) {
if (auto name =
GetIdentifierName(value_stores, lexed, tree_and_subtrees, node)) {
auto tok = parsed.node_token(node);
clang::clangd::Position pos{lexed.GetLineNumber(tok) - 1,
lexed.GetColumnNumber(tok) - 1};
Expand Down
5 changes: 3 additions & 2 deletions toolchain/check/check.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ struct UnitInfo {
: check_ir_id(check_ir_id),
unit(&unit),
converter(unit.tokens, unit.tokens->source().filename(),
unit.parse_tree),
unit.get_parse_tree_and_subtrees),
err_tracker(*unit.consumer),
emitter(converter, err_tracker) {}

Expand Down Expand Up @@ -891,7 +891,8 @@ static auto CheckParseTree(
SemIRDiagnosticConverter converter(node_converters, &sem_ir);
Context::DiagnosticEmitter emitter(converter, unit_info.err_tracker);
Context context(*unit_info.unit->tokens, emitter, *unit_info.unit->parse_tree,
sem_ir, vlog_stream);
unit_info.unit->get_parse_tree_and_subtrees, sem_ir,
vlog_stream);
PrettyStackTraceFunction context_dumper(
[&](llvm::raw_ostream& output) { context.PrintForStackDump(output); });

Expand Down
3 changes: 3 additions & 0 deletions toolchain/check/check.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "toolchain/diagnostics/diagnostic_emitter.h"
#include "toolchain/lex/tokenized_buffer.h"
#include "toolchain/parse/tree.h"
#include "toolchain/parse/tree_and_subtrees.h"
#include "toolchain/sem_ir/file.h"

namespace Carbon::Check {
Expand All @@ -20,6 +21,8 @@ struct Unit {
const Lex::TokenizedBuffer* tokens;
const Parse::Tree* parse_tree;
DiagnosticConsumer* consumer;
// Returns a lazily constructed TreeAndSubtrees.
std::function<const Parse::TreeAndSubtrees&()> get_parse_tree_and_subtrees;
// The generated IR. Unset on input, set on output.
std::optional<SemIR::File>* sem_ir;
};
Expand Down
7 changes: 5 additions & 2 deletions toolchain/check/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,14 @@
namespace Carbon::Check {

Context::Context(const Lex::TokenizedBuffer& tokens, DiagnosticEmitter& emitter,
const Parse::Tree& parse_tree, SemIR::File& sem_ir,
llvm::raw_ostream* vlog_stream)
const Parse::Tree& parse_tree,
llvm::function_ref<const Parse::TreeAndSubtrees&()>
get_parse_tree_and_subtrees,
SemIR::File& sem_ir, llvm::raw_ostream* vlog_stream)
: tokens_(&tokens),
emitter_(&emitter),
parse_tree_(&parse_tree),
get_parse_tree_and_subtrees_(get_parse_tree_and_subtrees),
sem_ir_(&sem_ir),
vlog_stream_(vlog_stream),
node_stack_(parse_tree, vlog_stream),
Expand Down
11 changes: 11 additions & 0 deletions toolchain/check/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "toolchain/check/scope_stack.h"
#include "toolchain/parse/node_ids.h"
#include "toolchain/parse/tree.h"
#include "toolchain/parse/tree_and_subtrees.h"
#include "toolchain/sem_ir/file.h"
#include "toolchain/sem_ir/ids.h"
#include "toolchain/sem_ir/import_ir.h"
Expand Down Expand Up @@ -53,6 +54,8 @@ class Context {
// Stores references for work.
explicit Context(const Lex::TokenizedBuffer& tokens,
DiagnosticEmitter& emitter, const Parse::Tree& parse_tree,
llvm::function_ref<const Parse::TreeAndSubtrees&()>
get_parse_tree_and_subtrees,
SemIR::File& sem_ir, llvm::raw_ostream* vlog_stream);

// Marks an implementation TODO. Always returns false.
Expand Down Expand Up @@ -360,6 +363,10 @@ class Context {

auto parse_tree() -> const Parse::Tree& { return *parse_tree_; }

auto parse_tree_and_subtrees() -> const Parse::TreeAndSubtrees& {
return get_parse_tree_and_subtrees_();
}

auto sem_ir() -> SemIR::File& { return *sem_ir_; }

auto node_stack() -> NodeStack& { return node_stack_; }
Expand Down Expand Up @@ -486,6 +493,10 @@ class Context {
// The file's parse tree.
const Parse::Tree* parse_tree_;

// Returns a lazily constructed TreeAndSubtrees.
llvm::function_ref<const Parse::TreeAndSubtrees&()>
get_parse_tree_and_subtrees_;

// The SemIR::File being added to.
SemIR::File* sem_ir_;

Expand Down
3 changes: 2 additions & 1 deletion toolchain/check/handle_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,8 @@ static auto ExtendImpl(Context& context, Parse::NodeId extend_node,
// The explicit self type is the same as the default self type, so suggest
// removing it and recover as if it were not present.
if (auto self_as =
context.parse_tree().ExtractAs<Parse::TypeImplAs>(self_type_node)) {
context.parse_tree_and_subtrees().ExtractAs<Parse::TypeImplAs>(
self_type_node)) {
CARBON_DIAGNOSTIC(ExtendImplSelfAsDefault, Note,
"Remove the explicit `Self` type here.");
diag.Note(self_as->type_expr, ExtendImplSelfAsDefault);
Expand Down
1 change: 1 addition & 0 deletions toolchain/driver/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ cc_library(
"//toolchain/lex",
"//toolchain/lower",
"//toolchain/parse",
"//toolchain/parse:tree",
"//toolchain/sem_ir:file",
"//toolchain/sem_ir:formatter",
"//toolchain/sem_ir:inst_namer",
Expand Down
36 changes: 30 additions & 6 deletions toolchain/driver/driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "toolchain/lex/lex.h"
#include "toolchain/lower/lower.h"
#include "toolchain/parse/parse.h"
#include "toolchain/parse/tree_and_subtrees.h"
#include "toolchain/sem_ir/formatter.h"
#include "toolchain/sem_ir/inst_namer.h"
#include "toolchain/source/source_buffer.h"
Expand Down Expand Up @@ -599,7 +600,12 @@ class Driver::CompilationUnit {
});
if (options_.dump_parse_tree && IncludeInDumps()) {
consumer_->Flush();
parse_tree_->Print(driver_->output_stream_, options_.preorder_parse_tree);
const auto& tree_and_subtrees = GetParseTreeAndSubtrees();
if (options_.preorder_parse_tree) {
tree_and_subtrees.PrintPreorder(driver_->output_stream_);
} else {
tree_and_subtrees.Print(driver_->output_stream_);
}
}
if (mem_usage_) {
mem_usage_->Collect("parse_tree_", *parse_tree_);
Expand All @@ -613,11 +619,15 @@ class Driver::CompilationUnit {
// Returns information needed to check this unit.
auto GetCheckUnit() -> Check::Unit {
CARBON_CHECK(parse_tree_);
return {.value_stores = &value_stores_,
.tokens = &*tokens_,
.parse_tree = &*parse_tree_,
.consumer = consumer_,
.sem_ir = &sem_ir_};
return {
.value_stores = &value_stores_,
.tokens = &*tokens_,
.parse_tree = &*parse_tree_,
.consumer = consumer_,
.get_parse_tree_and_subtrees = [&]() -> const Parse::TreeAndSubtrees& {
return GetParseTreeAndSubtrees();
},
.sem_ir = &sem_ir_};
}

// Runs post-check logic. Returns true if checking succeeded for the IR.
Expand Down Expand Up @@ -778,6 +788,19 @@ class Driver::CompilationUnit {
return true;
}

// The TreeAndSubtrees is mainly used for debugging and diagnostics, and has
// significant overhead. Avoid constructing it when unused.
auto GetParseTreeAndSubtrees() -> const Parse::TreeAndSubtrees& {
if (!parse_tree_and_subtrees_) {
parse_tree_and_subtrees_ = Parse::TreeAndSubtrees(*tokens_, *parse_tree_);
if (mem_usage_) {
mem_usage_->Collect("parse_tree_and_subtrees_",
*parse_tree_and_subtrees_);
}
}
return *parse_tree_and_subtrees_;
}

// Wraps a call with log statements to indicate start and end.
auto LogCall(llvm::StringLiteral label, llvm::function_ref<void()> fn)
-> void {
Expand Down Expand Up @@ -814,6 +837,7 @@ class Driver::CompilationUnit {
std::optional<SourceBuffer> source_;
std::optional<Lex::TokenizedBuffer> tokens_;
std::optional<Parse::Tree> parse_tree_;
std::optional<Parse::TreeAndSubtrees> parse_tree_and_subtrees_;
std::optional<SemIR::File> sem_ir_;
std::unique_ptr<llvm::LLVMContext> llvm_context_;
std::unique_ptr<llvm::Module> module_;
Expand Down
6 changes: 5 additions & 1 deletion toolchain/parse/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,12 @@ cc_library(
srcs = [
"extract.cpp",
"tree.cpp",
"tree_and_subtrees.cpp",
],
hdrs = [
"tree.h",
"tree_and_subtrees.h",
],
hdrs = ["tree.h"],
deps = [
":node_kind",
"//common:check",
Expand Down
38 changes: 15 additions & 23 deletions toolchain/parse/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,18 +68,15 @@ Context::Context(Tree& tree, Lex::TokenizedBuffer& tokens,

auto Context::AddLeafNode(NodeKind kind, Lex::TokenIndex token, bool has_error)
-> void {
tree_->node_impls_.push_back(
Tree::NodeImpl(kind, has_error, token, /*subtree_size=*/1));
tree_->node_impls_.push_back(Tree::NodeImpl(kind, has_error, token));
if (has_error) {
tree_->has_errors_ = true;
}
}

auto Context::AddNode(NodeKind kind, Lex::TokenIndex token, int subtree_start,
bool has_error) -> void {
int subtree_size = tree_->size() - subtree_start + 1;
tree_->node_impls_.push_back(
Tree::NodeImpl(kind, has_error, token, subtree_size));
auto Context::AddNode(NodeKind kind, Lex::TokenIndex token, bool has_error)
-> void {
tree_->node_impls_.push_back(Tree::NodeImpl(kind, has_error, token));
if (has_error) {
tree_->has_errors_ = true;
}
Expand All @@ -91,7 +88,6 @@ auto Context::ReplacePlaceholderNode(int32_t position, NodeKind kind,
CARBON_CHECK(position >= 0 && position < tree_->size())
<< "position: " << position << " size: " << tree_->size();
auto* node_impl = &tree_->node_impls_[position];
CARBON_CHECK(node_impl->subtree_size == 1);
CARBON_CHECK(node_impl->kind == NodeKind::Placeholder);
node_impl->kind = kind;
node_impl->has_error = has_error;
Expand Down Expand Up @@ -123,9 +119,9 @@ auto Context::ConsumeAndAddCloseSymbol(Lex::TokenIndex expected_open,
Lex::TokenKind open_token_kind = tokens().GetKind(expected_open);

if (!open_token_kind.is_opening_symbol()) {
AddNode(close_kind, state.token, state.subtree_start, /*has_error=*/true);
AddNode(close_kind, state.token, /*has_error=*/true);
} else if (auto close_token = ConsumeIf(open_token_kind.closing_symbol())) {
AddNode(close_kind, *close_token, state.subtree_start, state.has_error);
AddNode(close_kind, *close_token, state.has_error);
} else {
// TODO: Include the location of the matching opening delimiter in the
// diagnostic.
Expand All @@ -135,7 +131,7 @@ auto Context::ConsumeAndAddCloseSymbol(Lex::TokenIndex expected_open,
open_token_kind.closing_symbol().fixed_spelling());

SkipTo(tokens().GetMatchedClosingToken(expected_open));
AddNode(close_kind, Consume(), state.subtree_start, /*has_error=*/true);
AddNode(close_kind, Consume(), /*has_error=*/true);
}
}

Expand Down Expand Up @@ -415,7 +411,7 @@ auto Context::AddNodeExpectingDeclSemi(StateStackEntry state,
}

if (auto semi = ConsumeIf(Lex::TokenKind::Semi)) {
AddNode(node_kind, *semi, state.subtree_start, /*has_error=*/false);
AddNode(node_kind, *semi, /*has_error=*/false);
} else {
if (is_def_allowed) {
DiagnoseExpectedDeclSemiOrDefinition(decl_kind);
Expand All @@ -433,8 +429,7 @@ auto Context::RecoverFromDeclError(StateStackEntry state, NodeKind node_kind,
if (skip_past_likely_end) {
token = SkipPastLikelyEnd(token);
}
AddNode(node_kind, token, state.subtree_start,
/*has_error=*/true);
AddNode(node_kind, token, /*has_error=*/true);
}

auto Context::ParseLibraryName(bool accept_default)
Expand Down Expand Up @@ -464,13 +459,11 @@ auto Context::ParseLibraryName(bool accept_default)
auto Context::ParseLibrarySpecifier(bool accept_default)
-> std::optional<StringLiteralValueId> {
auto library_token = ConsumeChecked(Lex::TokenKind::Library);
auto library_subtree_start = tree().size();
auto library_id = ParseLibraryName(accept_default);
if (!library_id) {
AddLeafNode(NodeKind::LibraryName, *position_, /*has_error=*/true);
}
AddNode(NodeKind::LibrarySpecifier, library_token, library_subtree_start,
/*has_error=*/false);
AddNode(NodeKind::LibrarySpecifier, library_token, /*has_error=*/false);
return library_id;
}

Expand Down Expand Up @@ -503,20 +496,19 @@ static auto ParsingInDeferredDefinitionScope(Context& context) -> bool {
state == State::DeclDefinitionFinishAsNamedConstraint;
}

auto Context::AddFunctionDefinitionStart(Lex::TokenIndex token,
int subtree_start, bool has_error)
auto Context::AddFunctionDefinitionStart(Lex::TokenIndex token, bool has_error)
-> void {
if (ParsingInDeferredDefinitionScope(*this)) {
deferred_definition_stack_.push_back(tree_->deferred_definitions_.Add(
{.start_id =
FunctionDefinitionStartId(NodeId(tree_->node_impls_.size()))}));
}

AddNode(NodeKind::FunctionDefinitionStart, token, subtree_start, has_error);
AddNode(NodeKind::FunctionDefinitionStart, token, has_error);
}

auto Context::AddFunctionDefinition(Lex::TokenIndex token, int subtree_start,
bool has_error) -> void {
auto Context::AddFunctionDefinition(Lex::TokenIndex token, bool has_error)
-> void {
if (ParsingInDeferredDefinitionScope(*this)) {
auto definition_index = deferred_definition_stack_.pop_back_val();
auto& definition = tree_->deferred_definitions_.Get(definition_index);
Expand All @@ -526,7 +518,7 @@ auto Context::AddFunctionDefinition(Lex::TokenIndex token, int subtree_start,
DeferredDefinitionIndex(tree_->deferred_definitions().size());
}

AddNode(NodeKind::FunctionDefinition, token, subtree_start, has_error);
AddNode(NodeKind::FunctionDefinition, token, has_error);
}

auto Context::PrintForStackDump(llvm::raw_ostream& output) const -> void {
Expand Down
10 changes: 4 additions & 6 deletions toolchain/parse/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,7 @@ class Context {
-> void;

// Adds a node to the parse tree that has children.
auto AddNode(NodeKind kind, Lex::TokenIndex token, int subtree_start,
bool has_error) -> void;
auto AddNode(NodeKind kind, Lex::TokenIndex token, bool has_error) -> void;

// Replaces the placeholder node at the indicated position with a leaf node.
//
Expand Down Expand Up @@ -328,12 +327,11 @@ class Context {

// Adds a function definition start node, and begins tracking a deferred
// definition if necessary.
auto AddFunctionDefinitionStart(Lex::TokenIndex token, int subtree_start,
bool has_error) -> void;
auto AddFunctionDefinitionStart(Lex::TokenIndex token, bool has_error)
-> void;
// Adds a function definition node, and ends tracking a deferred definition if
// necessary.
auto AddFunctionDefinition(Lex::TokenIndex token, int subtree_start,
bool has_error) -> void;
auto AddFunctionDefinition(Lex::TokenIndex token, bool has_error) -> void;

// Prints information for a stack dump.
auto PrintForStackDump(llvm::raw_ostream& output) const -> void;
Expand Down
Loading

0 comments on commit f67791c

Please sign in to comment.