Skip to content

Commit

Permalink
Separate child count information from parse nodes.
Browse files Browse the repository at this point in the history
  • Loading branch information
jonmeow committed Jul 29, 2024
1 parent e6e61e1 commit d813e9a
Show file tree
Hide file tree
Showing 40 changed files with 767 additions and 692 deletions.
12 changes: 8 additions & 4 deletions language_server/language_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "toolchain/lex/lex.h"
#include "toolchain/parse/node_kind.h"
#include "toolchain/parse/parse.h"
#include "toolchain/parse/tree_and_subtrees.h"
#include "toolchain/source/source_buffer.h"

namespace Carbon::LS {
Expand Down Expand Up @@ -79,11 +80,12 @@ auto LanguageServer::onReply(llvm::json::Value /*id*/,
// Returns the text of first child of kind Parse::NodeKind::IdentifierName.
static auto GetIdentifierName(const SharedValueStores& value_stores,
const Lex::TokenizedBuffer& tokens,
const Parse::Tree& p, Parse::NodeId node)
const Parse::TreeAndSubtrees& p,
Parse::NodeId node)
-> std::optional<llvm::StringRef> {
for (auto ch : p.children(node)) {
if (p.node_kind(ch) == Parse::NodeKind::IdentifierName) {
auto token = p.node_token(ch);
if (p.tree().node_kind(ch) == Parse::NodeKind::IdentifierName) {
auto token = p.tree().node_token(ch);
if (tokens.GetKind(token) == Lex::TokenKind::Identifier) {
return value_stores.identifiers().Get(tokens.GetIdentifier(token));
}
Expand All @@ -104,6 +106,7 @@ void LanguageServer::OnDocumentSymbol(
auto buf = SourceBuffer::MakeFromFile(vfs, file, NullDiagnosticConsumer());
auto lexed = Lex::Lex(value_stores, *buf, NullDiagnosticConsumer());
auto parsed = Parse::Parse(lexed, NullDiagnosticConsumer(), nullptr);
Parse::TreeAndSubtrees tree_and_subtrees(lexed, parsed);
std::vector<clang::clangd::DocumentSymbol> result;
for (const auto& node : parsed.postorder()) {
clang::clangd::SymbolKind symbol_kind;
Expand All @@ -126,7 +129,8 @@ void LanguageServer::OnDocumentSymbol(
continue;
}

if (auto name = GetIdentifierName(value_stores, lexed, parsed, node)) {
if (auto name =
GetIdentifierName(value_stores, lexed, tree_and_subtrees, node)) {
auto tok = parsed.node_token(node);
clang::clangd::Position pos{lexed.GetLineNumber(tok) - 1,
lexed.GetColumnNumber(tok) - 1};
Expand Down
5 changes: 3 additions & 2 deletions toolchain/check/check.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ struct UnitInfo {
: check_ir_id(check_ir_id),
unit(&unit),
converter(unit.tokens, unit.tokens->source().filename(),
unit.parse_tree),
unit.get_parse_tree_and_subtrees),
err_tracker(*unit.consumer),
emitter(converter, err_tracker) {}

Expand Down Expand Up @@ -891,7 +891,8 @@ static auto CheckParseTree(
SemIRDiagnosticConverter converter(node_converters, &sem_ir);
Context::DiagnosticEmitter emitter(converter, unit_info.err_tracker);
Context context(*unit_info.unit->tokens, emitter, *unit_info.unit->parse_tree,
sem_ir, vlog_stream);
unit_info.unit->get_parse_tree_and_subtrees, sem_ir,
vlog_stream);
PrettyStackTraceFunction context_dumper(
[&](llvm::raw_ostream& output) { context.PrintForStackDump(output); });

Expand Down
3 changes: 3 additions & 0 deletions toolchain/check/check.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "toolchain/diagnostics/diagnostic_emitter.h"
#include "toolchain/lex/tokenized_buffer.h"
#include "toolchain/parse/tree.h"
#include "toolchain/parse/tree_and_subtrees.h"
#include "toolchain/sem_ir/file.h"

namespace Carbon::Check {
Expand All @@ -20,6 +21,8 @@ struct Unit {
const Lex::TokenizedBuffer* tokens;
const Parse::Tree* parse_tree;
DiagnosticConsumer* consumer;
// Returns a lazily constructed TreeAndSubtrees.
std::function<const Parse::TreeAndSubtrees&()> get_parse_tree_and_subtrees;
// The generated IR. Unset on input, set on output.
std::optional<SemIR::File>* sem_ir;
};
Expand Down
7 changes: 5 additions & 2 deletions toolchain/check/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,14 @@
namespace Carbon::Check {

Context::Context(const Lex::TokenizedBuffer& tokens, DiagnosticEmitter& emitter,
const Parse::Tree& parse_tree, SemIR::File& sem_ir,
llvm::raw_ostream* vlog_stream)
const Parse::Tree& parse_tree,
llvm::function_ref<const Parse::TreeAndSubtrees&()>
get_parse_tree_and_subtrees,
SemIR::File& sem_ir, llvm::raw_ostream* vlog_stream)
: tokens_(&tokens),
emitter_(&emitter),
parse_tree_(&parse_tree),
get_parse_tree_and_subtrees_(get_parse_tree_and_subtrees),
sem_ir_(&sem_ir),
vlog_stream_(vlog_stream),
node_stack_(parse_tree, vlog_stream),
Expand Down
11 changes: 11 additions & 0 deletions toolchain/check/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "toolchain/check/scope_stack.h"
#include "toolchain/parse/node_ids.h"
#include "toolchain/parse/tree.h"
#include "toolchain/parse/tree_and_subtrees.h"
#include "toolchain/sem_ir/file.h"
#include "toolchain/sem_ir/ids.h"
#include "toolchain/sem_ir/import_ir.h"
Expand Down Expand Up @@ -53,6 +54,8 @@ class Context {
// Stores references for work.
explicit Context(const Lex::TokenizedBuffer& tokens,
DiagnosticEmitter& emitter, const Parse::Tree& parse_tree,
llvm::function_ref<const Parse::TreeAndSubtrees&()>
get_parse_tree_and_subtrees,
SemIR::File& sem_ir, llvm::raw_ostream* vlog_stream);

// Marks an implementation TODO. Always returns false.
Expand Down Expand Up @@ -360,6 +363,10 @@ class Context {

auto parse_tree() -> const Parse::Tree& { return *parse_tree_; }

auto parse_tree_and_subtrees() -> const Parse::TreeAndSubtrees& {
return get_parse_tree_and_subtrees_();
}

auto sem_ir() -> SemIR::File& { return *sem_ir_; }

auto node_stack() -> NodeStack& { return node_stack_; }
Expand Down Expand Up @@ -486,6 +493,10 @@ class Context {
// The file's parse tree.
const Parse::Tree* parse_tree_;

// Returns a lazily constructed TreeAndSubtrees.
llvm::function_ref<const Parse::TreeAndSubtrees&()>
get_parse_tree_and_subtrees_;

// The SemIR::File being added to.
SemIR::File* sem_ir_;

Expand Down
3 changes: 2 additions & 1 deletion toolchain/check/handle_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,8 @@ static auto ExtendImpl(Context& context, Parse::NodeId extend_node,
// The explicit self type is the same as the default self type, so suggest
// removing it and recover as if it were not present.
if (auto self_as =
context.parse_tree().ExtractAs<Parse::TypeImplAs>(self_type_node)) {
context.parse_tree_and_subtrees().ExtractAs<Parse::TypeImplAs>(
self_type_node)) {
CARBON_DIAGNOSTIC(ExtendImplSelfAsDefault, Note,
"Remove the explicit `Self` type here.");
diag.Note(self_as->type_expr, ExtendImplSelfAsDefault);
Expand Down
1 change: 1 addition & 0 deletions toolchain/driver/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ cc_library(
"//toolchain/lex",
"//toolchain/lower",
"//toolchain/parse",
"//toolchain/parse:tree",
"//toolchain/sem_ir:file",
"//toolchain/sem_ir:formatter",
"//toolchain/sem_ir:inst_namer",
Expand Down
36 changes: 30 additions & 6 deletions toolchain/driver/driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "toolchain/lex/lex.h"
#include "toolchain/lower/lower.h"
#include "toolchain/parse/parse.h"
#include "toolchain/parse/tree_and_subtrees.h"
#include "toolchain/sem_ir/formatter.h"
#include "toolchain/sem_ir/inst_namer.h"
#include "toolchain/source/source_buffer.h"
Expand Down Expand Up @@ -599,7 +600,12 @@ class Driver::CompilationUnit {
});
if (options_.dump_parse_tree && IncludeInDumps()) {
consumer_->Flush();
parse_tree_->Print(driver_->output_stream_, options_.preorder_parse_tree);
const auto& tree_and_subtrees = GetParseTreeAndSubtrees();
if (options_.preorder_parse_tree) {
tree_and_subtrees.PrintPreorder(driver_->output_stream_);
} else {
tree_and_subtrees.Print(driver_->output_stream_);
}
}
if (mem_usage_) {
mem_usage_->Collect("parse_tree_", *parse_tree_);
Expand All @@ -613,11 +619,15 @@ class Driver::CompilationUnit {
// Returns information needed to check this unit.
auto GetCheckUnit() -> Check::Unit {
CARBON_CHECK(parse_tree_);
return {.value_stores = &value_stores_,
.tokens = &*tokens_,
.parse_tree = &*parse_tree_,
.consumer = consumer_,
.sem_ir = &sem_ir_};
return {
.value_stores = &value_stores_,
.tokens = &*tokens_,
.parse_tree = &*parse_tree_,
.consumer = consumer_,
.get_parse_tree_and_subtrees = [&]() -> const Parse::TreeAndSubtrees& {
return GetParseTreeAndSubtrees();
},
.sem_ir = &sem_ir_};
}

// Runs post-check logic. Returns true if checking succeeded for the IR.
Expand Down Expand Up @@ -778,6 +788,19 @@ class Driver::CompilationUnit {
return true;
}

// The TreeAndSubtrees is mainly used for debugging and diagnostics, and has
// significant overhead. Avoid constructing it otherwise.
auto GetParseTreeAndSubtrees() -> const Parse::TreeAndSubtrees& {
if (!parse_tree_and_subtrees_) {
parse_tree_and_subtrees_ = Parse::TreeAndSubtrees(*tokens_, *parse_tree_);
if (mem_usage_) {
mem_usage_->Collect("parse_tree_and_subtrees_",
*parse_tree_and_subtrees_);
}
}
return *parse_tree_and_subtrees_;
}

// Wraps a call with log statements to indicate start and end.
auto LogCall(llvm::StringLiteral label, llvm::function_ref<void()> fn)
-> void {
Expand Down Expand Up @@ -814,6 +837,7 @@ class Driver::CompilationUnit {
std::optional<SourceBuffer> source_;
std::optional<Lex::TokenizedBuffer> tokens_;
std::optional<Parse::Tree> parse_tree_;
std::optional<Parse::TreeAndSubtrees> parse_tree_and_subtrees_;
std::optional<SemIR::File> sem_ir_;
std::unique_ptr<llvm::LLVMContext> llvm_context_;
std::unique_ptr<llvm::Module> module_;
Expand Down
6 changes: 5 additions & 1 deletion toolchain/parse/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,12 @@ cc_library(
srcs = [
"extract.cpp",
"tree.cpp",
"tree_and_subtrees.cpp",
],
hdrs = [
"tree.h",
"tree_and_subtrees.h",
],
hdrs = ["tree.h"],
deps = [
":node_kind",
"//common:check",
Expand Down
38 changes: 15 additions & 23 deletions toolchain/parse/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,18 +68,15 @@ Context::Context(Tree& tree, Lex::TokenizedBuffer& tokens,

auto Context::AddLeafNode(NodeKind kind, Lex::TokenIndex token, bool has_error)
-> void {
tree_->node_impls_.push_back(
Tree::NodeImpl(kind, has_error, token, /*subtree_size=*/1));
tree_->node_impls_.push_back(Tree::NodeImpl(kind, has_error, token));
if (has_error) {
tree_->has_errors_ = true;
}
}

auto Context::AddNode(NodeKind kind, Lex::TokenIndex token, int subtree_start,
bool has_error) -> void {
int subtree_size = tree_->size() - subtree_start + 1;
tree_->node_impls_.push_back(
Tree::NodeImpl(kind, has_error, token, subtree_size));
auto Context::AddNode(NodeKind kind, Lex::TokenIndex token, bool has_error)
-> void {
tree_->node_impls_.push_back(Tree::NodeImpl(kind, has_error, token));
if (has_error) {
tree_->has_errors_ = true;
}
Expand All @@ -91,7 +88,6 @@ auto Context::ReplacePlaceholderNode(int32_t position, NodeKind kind,
CARBON_CHECK(position >= 0 && position < tree_->size())
<< "position: " << position << " size: " << tree_->size();
auto* node_impl = &tree_->node_impls_[position];
CARBON_CHECK(node_impl->subtree_size == 1);
CARBON_CHECK(node_impl->kind == NodeKind::Placeholder);
node_impl->kind = kind;
node_impl->has_error = has_error;
Expand Down Expand Up @@ -123,9 +119,9 @@ auto Context::ConsumeAndAddCloseSymbol(Lex::TokenIndex expected_open,
Lex::TokenKind open_token_kind = tokens().GetKind(expected_open);

if (!open_token_kind.is_opening_symbol()) {
AddNode(close_kind, state.token, state.subtree_start, /*has_error=*/true);
AddNode(close_kind, state.token, /*has_error=*/true);
} else if (auto close_token = ConsumeIf(open_token_kind.closing_symbol())) {
AddNode(close_kind, *close_token, state.subtree_start, state.has_error);
AddNode(close_kind, *close_token, state.has_error);
} else {
// TODO: Include the location of the matching opening delimiter in the
// diagnostic.
Expand All @@ -135,7 +131,7 @@ auto Context::ConsumeAndAddCloseSymbol(Lex::TokenIndex expected_open,
open_token_kind.closing_symbol().fixed_spelling());

SkipTo(tokens().GetMatchedClosingToken(expected_open));
AddNode(close_kind, Consume(), state.subtree_start, /*has_error=*/true);
AddNode(close_kind, Consume(), /*has_error=*/true);
}
}

Expand Down Expand Up @@ -415,7 +411,7 @@ auto Context::AddNodeExpectingDeclSemi(StateStackEntry state,
}

if (auto semi = ConsumeIf(Lex::TokenKind::Semi)) {
AddNode(node_kind, *semi, state.subtree_start, /*has_error=*/false);
AddNode(node_kind, *semi, /*has_error=*/false);
} else {
if (is_def_allowed) {
DiagnoseExpectedDeclSemiOrDefinition(decl_kind);
Expand All @@ -433,8 +429,7 @@ auto Context::RecoverFromDeclError(StateStackEntry state, NodeKind node_kind,
if (skip_past_likely_end) {
token = SkipPastLikelyEnd(token);
}
AddNode(node_kind, token, state.subtree_start,
/*has_error=*/true);
AddNode(node_kind, token, /*has_error=*/true);
}

auto Context::ParseLibraryName(bool accept_default)
Expand Down Expand Up @@ -464,13 +459,11 @@ auto Context::ParseLibraryName(bool accept_default)
auto Context::ParseLibrarySpecifier(bool accept_default)
-> std::optional<StringLiteralValueId> {
auto library_token = ConsumeChecked(Lex::TokenKind::Library);
auto library_subtree_start = tree().size();
auto library_id = ParseLibraryName(accept_default);
if (!library_id) {
AddLeafNode(NodeKind::LibraryName, *position_, /*has_error=*/true);
}
AddNode(NodeKind::LibrarySpecifier, library_token, library_subtree_start,
/*has_error=*/false);
AddNode(NodeKind::LibrarySpecifier, library_token, /*has_error=*/false);
return library_id;
}

Expand Down Expand Up @@ -503,20 +496,19 @@ static auto ParsingInDeferredDefinitionScope(Context& context) -> bool {
state == State::DeclDefinitionFinishAsNamedConstraint;
}

auto Context::AddFunctionDefinitionStart(Lex::TokenIndex token,
int subtree_start, bool has_error)
auto Context::AddFunctionDefinitionStart(Lex::TokenIndex token, bool has_error)
-> void {
if (ParsingInDeferredDefinitionScope(*this)) {
deferred_definition_stack_.push_back(tree_->deferred_definitions_.Add(
{.start_id =
FunctionDefinitionStartId(NodeId(tree_->node_impls_.size()))}));
}

AddNode(NodeKind::FunctionDefinitionStart, token, subtree_start, has_error);
AddNode(NodeKind::FunctionDefinitionStart, token, has_error);
}

auto Context::AddFunctionDefinition(Lex::TokenIndex token, int subtree_start,
bool has_error) -> void {
auto Context::AddFunctionDefinition(Lex::TokenIndex token, bool has_error)
-> void {
if (ParsingInDeferredDefinitionScope(*this)) {
auto definition_index = deferred_definition_stack_.pop_back_val();
auto& definition = tree_->deferred_definitions_.Get(definition_index);
Expand All @@ -526,7 +518,7 @@ auto Context::AddFunctionDefinition(Lex::TokenIndex token, int subtree_start,
DeferredDefinitionIndex(tree_->deferred_definitions().size());
}

AddNode(NodeKind::FunctionDefinition, token, subtree_start, has_error);
AddNode(NodeKind::FunctionDefinition, token, has_error);
}

auto Context::PrintForStackDump(llvm::raw_ostream& output) const -> void {
Expand Down
10 changes: 4 additions & 6 deletions toolchain/parse/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,7 @@ class Context {
-> void;

// Adds a node to the parse tree that has children.
auto AddNode(NodeKind kind, Lex::TokenIndex token, int subtree_start,
bool has_error) -> void;
auto AddNode(NodeKind kind, Lex::TokenIndex token, bool has_error) -> void;

// Replaces the placeholder node at the indicated position with a leaf node.
//
Expand Down Expand Up @@ -328,12 +327,11 @@ class Context {

// Adds a function definition start node, and begins tracking a deferred
// definition if necessary.
auto AddFunctionDefinitionStart(Lex::TokenIndex token, int subtree_start,
bool has_error) -> void;
auto AddFunctionDefinitionStart(Lex::TokenIndex token, bool has_error)
-> void;
// Adds a function definition node, and ends tracking a deferred definition if
// necessary.
auto AddFunctionDefinition(Lex::TokenIndex token, int subtree_start,
bool has_error) -> void;
auto AddFunctionDefinition(Lex::TokenIndex token, bool has_error) -> void;

// Prints information for a stack dump.
auto PrintForStackDump(llvm::raw_ostream& output) const -> void;
Expand Down
Loading

0 comments on commit d813e9a

Please sign in to comment.