Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Separate subtree size information from parse nodes. #4174

Merged
merged 2 commits into from
Jul 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions language_server/language_server.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "toolchain/lex/lex.h"
#include "toolchain/parse/node_kind.h"
#include "toolchain/parse/parse.h"
#include "toolchain/parse/tree_and_subtrees.h"
#include "toolchain/source/source_buffer.h"

namespace Carbon::LS {
Expand Down Expand Up @@ -79,11 +80,12 @@ auto LanguageServer::onReply(llvm::json::Value /*id*/,
// Returns the text of first child of kind Parse::NodeKind::IdentifierName.
static auto GetIdentifierName(const SharedValueStores& value_stores,
const Lex::TokenizedBuffer& tokens,
const Parse::Tree& p, Parse::NodeId node)
const Parse::TreeAndSubtrees& p,
Parse::NodeId node)
-> std::optional<llvm::StringRef> {
for (auto ch : p.children(node)) {
if (p.node_kind(ch) == Parse::NodeKind::IdentifierName) {
auto token = p.node_token(ch);
if (p.tree().node_kind(ch) == Parse::NodeKind::IdentifierName) {
auto token = p.tree().node_token(ch);
if (tokens.GetKind(token) == Lex::TokenKind::Identifier) {
return value_stores.identifiers().Get(tokens.GetIdentifier(token));
}
Expand All @@ -104,6 +106,7 @@ void LanguageServer::OnDocumentSymbol(
auto buf = SourceBuffer::MakeFromFile(vfs, file, NullDiagnosticConsumer());
auto lexed = Lex::Lex(value_stores, *buf, NullDiagnosticConsumer());
auto parsed = Parse::Parse(lexed, NullDiagnosticConsumer(), nullptr);
Parse::TreeAndSubtrees tree_and_subtrees(lexed, parsed);
std::vector<clang::clangd::DocumentSymbol> result;
for (const auto& node : parsed.postorder()) {
clang::clangd::SymbolKind symbol_kind;
Expand All @@ -126,7 +129,8 @@ void LanguageServer::OnDocumentSymbol(
continue;
}

if (auto name = GetIdentifierName(value_stores, lexed, parsed, node)) {
if (auto name =
GetIdentifierName(value_stores, lexed, tree_and_subtrees, node)) {
auto tok = parsed.node_token(node);
clang::clangd::Position pos{lexed.GetLineNumber(tok) - 1,
lexed.GetColumnNumber(tok) - 1};
Expand Down
5 changes: 3 additions & 2 deletions toolchain/check/check.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ struct UnitInfo {
: check_ir_id(check_ir_id),
unit(&unit),
converter(unit.tokens, unit.tokens->source().filename(),
unit.parse_tree),
unit.get_parse_tree_and_subtrees),
err_tracker(*unit.consumer),
emitter(converter, err_tracker) {}

Expand Down Expand Up @@ -891,7 +891,8 @@ static auto CheckParseTree(
SemIRDiagnosticConverter converter(node_converters, &sem_ir);
Context::DiagnosticEmitter emitter(converter, unit_info.err_tracker);
Context context(*unit_info.unit->tokens, emitter, *unit_info.unit->parse_tree,
sem_ir, vlog_stream);
unit_info.unit->get_parse_tree_and_subtrees, sem_ir,
vlog_stream);
PrettyStackTraceFunction context_dumper(
[&](llvm::raw_ostream& output) { context.PrintForStackDump(output); });

Expand Down
3 changes: 3 additions & 0 deletions toolchain/check/check.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "toolchain/diagnostics/diagnostic_emitter.h"
#include "toolchain/lex/tokenized_buffer.h"
#include "toolchain/parse/tree.h"
#include "toolchain/parse/tree_and_subtrees.h"
#include "toolchain/sem_ir/file.h"

namespace Carbon::Check {
Expand All @@ -20,6 +21,8 @@ struct Unit {
const Lex::TokenizedBuffer* tokens;
const Parse::Tree* parse_tree;
DiagnosticConsumer* consumer;
// Returns a lazily constructed TreeAndSubtrees.
std::function<const Parse::TreeAndSubtrees&()> get_parse_tree_and_subtrees;
// The generated IR. Unset on input, set on output.
std::optional<SemIR::File>* sem_ir;
};
Expand Down
7 changes: 5 additions & 2 deletions toolchain/check/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,11 +36,14 @@
namespace Carbon::Check {

Context::Context(const Lex::TokenizedBuffer& tokens, DiagnosticEmitter& emitter,
const Parse::Tree& parse_tree, SemIR::File& sem_ir,
llvm::raw_ostream* vlog_stream)
const Parse::Tree& parse_tree,
llvm::function_ref<const Parse::TreeAndSubtrees&()>
get_parse_tree_and_subtrees,
SemIR::File& sem_ir, llvm::raw_ostream* vlog_stream)
: tokens_(&tokens),
emitter_(&emitter),
parse_tree_(&parse_tree),
get_parse_tree_and_subtrees_(get_parse_tree_and_subtrees),
sem_ir_(&sem_ir),
vlog_stream_(vlog_stream),
node_stack_(parse_tree, vlog_stream),
Expand Down
11 changes: 11 additions & 0 deletions toolchain/check/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "toolchain/check/scope_stack.h"
#include "toolchain/parse/node_ids.h"
#include "toolchain/parse/tree.h"
#include "toolchain/parse/tree_and_subtrees.h"
#include "toolchain/sem_ir/file.h"
#include "toolchain/sem_ir/ids.h"
#include "toolchain/sem_ir/import_ir.h"
Expand Down Expand Up @@ -53,6 +54,8 @@ class Context {
// Stores references for work.
explicit Context(const Lex::TokenizedBuffer& tokens,
DiagnosticEmitter& emitter, const Parse::Tree& parse_tree,
llvm::function_ref<const Parse::TreeAndSubtrees&()>
get_parse_tree_and_subtrees,
SemIR::File& sem_ir, llvm::raw_ostream* vlog_stream);

// Marks an implementation TODO. Always returns false.
Expand Down Expand Up @@ -360,6 +363,10 @@ class Context {

auto parse_tree() -> const Parse::Tree& { return *parse_tree_; }

auto parse_tree_and_subtrees() -> const Parse::TreeAndSubtrees& {
return get_parse_tree_and_subtrees_();
}

auto sem_ir() -> SemIR::File& { return *sem_ir_; }

auto node_stack() -> NodeStack& { return node_stack_; }
Expand Down Expand Up @@ -486,6 +493,10 @@ class Context {
// The file's parse tree.
const Parse::Tree* parse_tree_;

// Returns a lazily constructed TreeAndSubtrees.
llvm::function_ref<const Parse::TreeAndSubtrees&()>
get_parse_tree_and_subtrees_;

// The SemIR::File being added to.
SemIR::File* sem_ir_;

Expand Down
3 changes: 2 additions & 1 deletion toolchain/check/handle_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,8 @@ static auto ExtendImpl(Context& context, Parse::NodeId extend_node,
// The explicit self type is the same as the default self type, so suggest
// removing it and recover as if it were not present.
if (auto self_as =
context.parse_tree().ExtractAs<Parse::TypeImplAs>(self_type_node)) {
context.parse_tree_and_subtrees().ExtractAs<Parse::TypeImplAs>(
self_type_node)) {
CARBON_DIAGNOSTIC(ExtendImplSelfAsDefault, Note,
"Remove the explicit `Self` type here.");
diag.Note(self_as->type_expr, ExtendImplSelfAsDefault);
Expand Down
1 change: 1 addition & 0 deletions toolchain/driver/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ cc_library(
"//toolchain/lex",
"//toolchain/lower",
"//toolchain/parse",
"//toolchain/parse:tree",
"//toolchain/sem_ir:file",
"//toolchain/sem_ir:formatter",
"//toolchain/sem_ir:inst_namer",
Expand Down
36 changes: 30 additions & 6 deletions toolchain/driver/driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "toolchain/lex/lex.h"
#include "toolchain/lower/lower.h"
#include "toolchain/parse/parse.h"
#include "toolchain/parse/tree_and_subtrees.h"
#include "toolchain/sem_ir/formatter.h"
#include "toolchain/sem_ir/inst_namer.h"
#include "toolchain/source/source_buffer.h"
Expand Down Expand Up @@ -599,7 +600,12 @@ class Driver::CompilationUnit {
});
if (options_.dump_parse_tree && IncludeInDumps()) {
consumer_->Flush();
parse_tree_->Print(driver_->output_stream_, options_.preorder_parse_tree);
const auto& tree_and_subtrees = GetParseTreeAndSubtrees();
if (options_.preorder_parse_tree) {
tree_and_subtrees.PrintPreorder(driver_->output_stream_);
} else {
tree_and_subtrees.Print(driver_->output_stream_);
}
}
if (mem_usage_) {
mem_usage_->Collect("parse_tree_", *parse_tree_);
Expand All @@ -613,11 +619,15 @@ class Driver::CompilationUnit {
// Returns information needed to check this unit.
auto GetCheckUnit() -> Check::Unit {
CARBON_CHECK(parse_tree_);
return {.value_stores = &value_stores_,
.tokens = &*tokens_,
.parse_tree = &*parse_tree_,
.consumer = consumer_,
.sem_ir = &sem_ir_};
return {
.value_stores = &value_stores_,
.tokens = &*tokens_,
.parse_tree = &*parse_tree_,
.consumer = consumer_,
.get_parse_tree_and_subtrees = [&]() -> const Parse::TreeAndSubtrees& {
return GetParseTreeAndSubtrees();
},
.sem_ir = &sem_ir_};
}

// Runs post-check logic. Returns true if checking succeeded for the IR.
Expand Down Expand Up @@ -778,6 +788,19 @@ class Driver::CompilationUnit {
return true;
}

// The TreeAndSubtrees is mainly used for debugging and diagnostics, and has
// significant overhead. Avoid constructing it when unused.
auto GetParseTreeAndSubtrees() -> const Parse::TreeAndSubtrees& {
if (!parse_tree_and_subtrees_) {
parse_tree_and_subtrees_ = Parse::TreeAndSubtrees(*tokens_, *parse_tree_);
if (mem_usage_) {
mem_usage_->Collect("parse_tree_and_subtrees_",
*parse_tree_and_subtrees_);
}
}
return *parse_tree_and_subtrees_;
}

// Wraps a call with log statements to indicate start and end.
auto LogCall(llvm::StringLiteral label, llvm::function_ref<void()> fn)
-> void {
Expand Down Expand Up @@ -814,6 +837,7 @@ class Driver::CompilationUnit {
std::optional<SourceBuffer> source_;
std::optional<Lex::TokenizedBuffer> tokens_;
std::optional<Parse::Tree> parse_tree_;
std::optional<Parse::TreeAndSubtrees> parse_tree_and_subtrees_;
std::optional<SemIR::File> sem_ir_;
std::unique_ptr<llvm::LLVMContext> llvm_context_;
std::unique_ptr<llvm::Module> module_;
Expand Down
6 changes: 5 additions & 1 deletion toolchain/parse/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,12 @@ cc_library(
srcs = [
"extract.cpp",
"tree.cpp",
"tree_and_subtrees.cpp",
],
hdrs = [
"tree.h",
"tree_and_subtrees.h",
],
hdrs = ["tree.h"],
deps = [
":node_kind",
"//common:check",
Expand Down
38 changes: 15 additions & 23 deletions toolchain/parse/context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,18 +68,15 @@ Context::Context(Tree& tree, Lex::TokenizedBuffer& tokens,

auto Context::AddLeafNode(NodeKind kind, Lex::TokenIndex token, bool has_error)
-> void {
tree_->node_impls_.push_back(
Tree::NodeImpl(kind, has_error, token, /*subtree_size=*/1));
tree_->node_impls_.push_back(Tree::NodeImpl(kind, has_error, token));
if (has_error) {
tree_->has_errors_ = true;
}
}

auto Context::AddNode(NodeKind kind, Lex::TokenIndex token, int subtree_start,
bool has_error) -> void {
int subtree_size = tree_->size() - subtree_start + 1;
tree_->node_impls_.push_back(
Tree::NodeImpl(kind, has_error, token, subtree_size));
auto Context::AddNode(NodeKind kind, Lex::TokenIndex token, bool has_error)
-> void {
tree_->node_impls_.push_back(Tree::NodeImpl(kind, has_error, token));
if (has_error) {
tree_->has_errors_ = true;
}
Expand All @@ -91,7 +88,6 @@ auto Context::ReplacePlaceholderNode(int32_t position, NodeKind kind,
CARBON_CHECK(position >= 0 && position < tree_->size())
<< "position: " << position << " size: " << tree_->size();
auto* node_impl = &tree_->node_impls_[position];
CARBON_CHECK(node_impl->subtree_size == 1);
CARBON_CHECK(node_impl->kind == NodeKind::Placeholder);
node_impl->kind = kind;
node_impl->has_error = has_error;
Expand Down Expand Up @@ -123,9 +119,9 @@ auto Context::ConsumeAndAddCloseSymbol(Lex::TokenIndex expected_open,
Lex::TokenKind open_token_kind = tokens().GetKind(expected_open);

if (!open_token_kind.is_opening_symbol()) {
AddNode(close_kind, state.token, state.subtree_start, /*has_error=*/true);
AddNode(close_kind, state.token, /*has_error=*/true);
} else if (auto close_token = ConsumeIf(open_token_kind.closing_symbol())) {
AddNode(close_kind, *close_token, state.subtree_start, state.has_error);
AddNode(close_kind, *close_token, state.has_error);
} else {
// TODO: Include the location of the matching opening delimiter in the
// diagnostic.
Expand All @@ -135,7 +131,7 @@ auto Context::ConsumeAndAddCloseSymbol(Lex::TokenIndex expected_open,
open_token_kind.closing_symbol().fixed_spelling());

SkipTo(tokens().GetMatchedClosingToken(expected_open));
AddNode(close_kind, Consume(), state.subtree_start, /*has_error=*/true);
AddNode(close_kind, Consume(), /*has_error=*/true);
}
}

Expand Down Expand Up @@ -415,7 +411,7 @@ auto Context::AddNodeExpectingDeclSemi(StateStackEntry state,
}

if (auto semi = ConsumeIf(Lex::TokenKind::Semi)) {
AddNode(node_kind, *semi, state.subtree_start, /*has_error=*/false);
AddNode(node_kind, *semi, /*has_error=*/false);
} else {
if (is_def_allowed) {
DiagnoseExpectedDeclSemiOrDefinition(decl_kind);
Expand All @@ -433,8 +429,7 @@ auto Context::RecoverFromDeclError(StateStackEntry state, NodeKind node_kind,
if (skip_past_likely_end) {
token = SkipPastLikelyEnd(token);
}
AddNode(node_kind, token, state.subtree_start,
/*has_error=*/true);
AddNode(node_kind, token, /*has_error=*/true);
}

auto Context::ParseLibraryName(bool accept_default)
Expand Down Expand Up @@ -464,13 +459,11 @@ auto Context::ParseLibraryName(bool accept_default)
auto Context::ParseLibrarySpecifier(bool accept_default)
-> std::optional<StringLiteralValueId> {
auto library_token = ConsumeChecked(Lex::TokenKind::Library);
auto library_subtree_start = tree().size();
auto library_id = ParseLibraryName(accept_default);
if (!library_id) {
AddLeafNode(NodeKind::LibraryName, *position_, /*has_error=*/true);
}
AddNode(NodeKind::LibrarySpecifier, library_token, library_subtree_start,
/*has_error=*/false);
AddNode(NodeKind::LibrarySpecifier, library_token, /*has_error=*/false);
return library_id;
}

Expand Down Expand Up @@ -503,20 +496,19 @@ static auto ParsingInDeferredDefinitionScope(Context& context) -> bool {
state == State::DeclDefinitionFinishAsNamedConstraint;
}

auto Context::AddFunctionDefinitionStart(Lex::TokenIndex token,
int subtree_start, bool has_error)
auto Context::AddFunctionDefinitionStart(Lex::TokenIndex token, bool has_error)
-> void {
if (ParsingInDeferredDefinitionScope(*this)) {
deferred_definition_stack_.push_back(tree_->deferred_definitions_.Add(
{.start_id =
FunctionDefinitionStartId(NodeId(tree_->node_impls_.size()))}));
}

AddNode(NodeKind::FunctionDefinitionStart, token, subtree_start, has_error);
AddNode(NodeKind::FunctionDefinitionStart, token, has_error);
}

auto Context::AddFunctionDefinition(Lex::TokenIndex token, int subtree_start,
bool has_error) -> void {
auto Context::AddFunctionDefinition(Lex::TokenIndex token, bool has_error)
-> void {
if (ParsingInDeferredDefinitionScope(*this)) {
auto definition_index = deferred_definition_stack_.pop_back_val();
auto& definition = tree_->deferred_definitions_.Get(definition_index);
Expand All @@ -526,7 +518,7 @@ auto Context::AddFunctionDefinition(Lex::TokenIndex token, int subtree_start,
DeferredDefinitionIndex(tree_->deferred_definitions().size());
}

AddNode(NodeKind::FunctionDefinition, token, subtree_start, has_error);
AddNode(NodeKind::FunctionDefinition, token, has_error);
}

auto Context::PrintForStackDump(llvm::raw_ostream& output) const -> void {
Expand Down
10 changes: 4 additions & 6 deletions toolchain/parse/context.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,7 @@ class Context {
-> void;

// Adds a node to the parse tree that has children.
auto AddNode(NodeKind kind, Lex::TokenIndex token, int subtree_start,
bool has_error) -> void;
auto AddNode(NodeKind kind, Lex::TokenIndex token, bool has_error) -> void;

// Replaces the placeholder node at the indicated position with a leaf node.
//
Expand Down Expand Up @@ -328,12 +327,11 @@ class Context {

// Adds a function definition start node, and begins tracking a deferred
// definition if necessary.
auto AddFunctionDefinitionStart(Lex::TokenIndex token, int subtree_start,
bool has_error) -> void;
auto AddFunctionDefinitionStart(Lex::TokenIndex token, bool has_error)
-> void;
// Adds a function definition node, and ends tracking a deferred definition if
// necessary.
auto AddFunctionDefinition(Lex::TokenIndex token, int subtree_start,
bool has_error) -> void;
auto AddFunctionDefinition(Lex::TokenIndex token, bool has_error) -> void;

// Prints information for a stack dump.
auto PrintForStackDump(llvm::raw_ostream& output) const -> void;
Expand Down
Loading
Loading