diff --git a/toolchain/check/BUILD b/toolchain/check/BUILD index d6a7b15da04de..e1cb268405438 100644 --- a/toolchain/check/BUILD +++ b/toolchain/check/BUILD @@ -96,6 +96,8 @@ cc_library( name = "check", srcs = [ "check.cpp", + "check_unit.cpp", + "check_unit.h", "deferred_definition_worklist.cpp", "deferred_definition_worklist.h", "handle.h", diff --git a/toolchain/check/check.cpp b/toolchain/check/check.cpp index 4331094ce873d..2113deed47916 100644 --- a/toolchain/check/check.cpp +++ b/toolchain/check/check.cpp @@ -6,536 +6,28 @@ #include "common/check.h" #include "common/map.h" -#include "toolchain/base/kind_switch.h" -#include "toolchain/base/pretty_stack_trace_function.h" +#include "toolchain/check/check_unit.h" #include "toolchain/check/context.h" #include "toolchain/check/diagnostic_helpers.h" -#include "toolchain/check/generic.h" -#include "toolchain/check/handle.h" -#include "toolchain/check/import.h" -#include "toolchain/check/import_ref.h" -#include "toolchain/check/node_id_traversal.h" #include "toolchain/check/sem_ir_diagnostic_converter.h" #include "toolchain/diagnostics/diagnostic.h" #include "toolchain/diagnostics/format_providers.h" #include "toolchain/lex/token_kind.h" #include "toolchain/parse/node_ids.h" #include "toolchain/parse/tree.h" -#include "toolchain/parse/tree_node_diagnostic_converter.h" #include "toolchain/sem_ir/file.h" -#include "toolchain/sem_ir/ids.h" #include "toolchain/sem_ir/typed_insts.h" namespace Carbon::Check { -namespace { -struct UnitInfo { - // A given import within the file, with its destination. - struct Import { - Parse::Tree::PackagingNames names; - UnitInfo* unit_info; - }; - // A file's imports corresponding to a single package, for the map. - struct PackageImports { - // Use the constructor so that the SmallVector is only constructed - // as-needed. - explicit PackageImports(IdentifierId package_id, - Parse::ImportDeclId node_id) - : package_id(package_id), node_id(node_id) {} - - // The identifier of the imported package. - IdentifierId package_id; - // The first `import` declaration in the file, which declared the package's - // identifier (even if the import failed). Used for associating diagnostics - // not specific to a single import. - Parse::ImportDeclId node_id; - // The associated `import` instruction. Only valid once a file is checked. - SemIR::InstId import_decl_id = SemIR::InstId::Invalid; - // Whether there's an import that failed to load. - bool has_load_error = false; - // The list of valid imports. - llvm::SmallVector imports; - }; - - explicit UnitInfo(SemIR::CheckIRId check_ir_id, Unit& unit) - : check_ir_id(check_ir_id), - unit(&unit), - err_tracker(*unit.consumer), - emitter(*unit.node_converter, err_tracker) {} - - auto parse_tree() -> const Parse::Tree& { return unit->sem_ir->parse_tree(); } - auto source() -> const SourceBuffer& { - return parse_tree().tokens().source(); - } - - SemIR::CheckIRId check_ir_id; - Unit* unit; - - // Emitter information. - ErrorTrackingDiagnosticConsumer err_tracker; - DiagnosticEmitter emitter; - - // List of the outgoing imports. If a package includes unavailable library - // imports, it has an entry with has_load_error set. Invalid imports (for - // example, `import Main;`) aren't added because they won't add identifiers to - // name lookup. - llvm::SmallVector package_imports; - - // A map of the package names to the outgoing imports above. - Map package_imports_map; - - // The remaining number of imports which must be checked before this unit can - // be processed. - int32_t imports_remaining = 0; - - // A list of incoming imports. This will be empty for `impl` files, because - // imports only touch `api` files. - llvm::SmallVector incoming_imports; - - // The corresponding `api` unit if this is an `impl` file. The entry should - // also be in the corresponding `PackageImports`. - UnitInfo* api_for_impl = nullptr; - - // Whether the unit has been checked. - bool is_checked = false; -}; -} // namespace - -// Collects direct imports, for CollectTransitiveImports. -static auto CollectDirectImports(llvm::SmallVector& results, - llvm::MutableArrayRef ir_to_result_index, - SemIR::InstId import_decl_id, - const UnitInfo::PackageImports& imports, - bool is_local) -> void { - for (const auto& import : imports.imports) { - const auto& direct_ir = *import.unit_info->unit->sem_ir; - auto& index = ir_to_result_index[direct_ir.check_ir_id().index]; - if (index != -1) { - // This should only happen when doing API imports for an implementation - // file. Don't change the entry; is_export doesn't matter. - continue; - } - index = results.size(); - results.push_back({.decl_id = import_decl_id, - // Only tag exports in API files, ignoring the value in - // implementation files. - .is_export = is_local && import.names.is_export, - .sem_ir = &direct_ir}); - } -} - -// Collects transitive imports, handling deduplication. These will be unified -// between local_imports and api_imports. -static auto CollectTransitiveImports( - SemIR::InstId import_decl_id, const UnitInfo::PackageImports* local_imports, - const UnitInfo::PackageImports* api_imports, int total_ir_count) - -> llvm::SmallVector { - llvm::SmallVector results; - - // Track whether an IR was imported in full, including `export import`. This - // distinguishes from IRs that are indirectly added without all names being - // exported to this IR. - llvm::SmallVector ir_to_result_index(total_ir_count, -1); - - // First add direct imports. This means that if an entity is imported both - // directly and indirectly, the import path will reflect the direct import. - if (local_imports) { - CollectDirectImports(results, ir_to_result_index, import_decl_id, - *local_imports, - /*is_local=*/true); - } - if (api_imports) { - CollectDirectImports(results, ir_to_result_index, import_decl_id, - *api_imports, - /*is_local=*/false); - } - - // Loop through direct imports for any indirect exports. The underlying vector - // is appended during iteration, so take the size first. - const int direct_imports = results.size(); - for (int direct_index : llvm::seq(direct_imports)) { - bool is_export = results[direct_index].is_export; - - for (const auto& indirect_ir : - results[direct_index].sem_ir->import_irs().array_ref()) { - if (!indirect_ir.is_export) { - continue; - } - - auto& indirect_index = - ir_to_result_index[indirect_ir.sem_ir->check_ir_id().index]; - if (indirect_index == -1) { - indirect_index = results.size(); - // TODO: In the case of a recursive `export import`, this only points at - // the outermost import. May want something that better reflects the - // recursion. - results.push_back({.decl_id = results[direct_index].decl_id, - .is_export = is_export, - .sem_ir = indirect_ir.sem_ir}); - } else if (is_export) { - results[indirect_index].is_export = true; - } - } - } - - return results; -} - -// Imports the current package. -static auto ImportCurrentPackage(Context& context, UnitInfo& unit_info, - int total_ir_count, - SemIR::InstId package_inst_id, - SemIR::TypeId namespace_type_id) -> void { - // Add imports from the current package. - auto import_map_lookup = - unit_info.package_imports_map.Lookup(IdentifierId::Invalid); - if (!import_map_lookup) { - // Push the scope; there are no names to add. - context.scope_stack().Push(package_inst_id, SemIR::NameScopeId::Package); - return; - } - UnitInfo::PackageImports& self_import = - unit_info.package_imports[import_map_lookup.value()]; - - if (self_import.has_load_error) { - context.name_scopes().Get(SemIR::NameScopeId::Package).set_has_error(); - } - - ImportLibrariesFromCurrentPackage( - context, namespace_type_id, - CollectTransitiveImports(self_import.import_decl_id, &self_import, - /*api_imports=*/nullptr, total_ir_count)); - - context.scope_stack().Push( - package_inst_id, SemIR::NameScopeId::Package, SemIR::SpecificId::Invalid, - context.name_scopes().Get(SemIR::NameScopeId::Package).has_error()); -} - -// Imports all other packages (excluding the current package). -static auto ImportOtherPackages(Context& context, UnitInfo& unit_info, - int total_ir_count, - SemIR::TypeId namespace_type_id) -> void { - // api_imports_list is initially the size of the current file's imports, - // including for API files, for simplicity in iteration. It's only really used - // when processing an implementation file, in order to combine the API file - // imports. - // - // For packages imported by the API file, the IdentifierId is the package name - // and the index is into the API's import list. Otherwise, the initial - // {Invalid, -1} state remains. - llvm::SmallVector> api_imports_list; - api_imports_list.resize(unit_info.package_imports.size(), - {IdentifierId::Invalid, -1}); - - // When there's an API file, add the mapping to api_imports_list. - if (unit_info.api_for_impl) { - const auto& api_identifiers = - unit_info.api_for_impl->unit->value_stores->identifiers(); - auto& impl_identifiers = unit_info.unit->value_stores->identifiers(); - - for (auto [api_imports_index, api_imports] : - llvm::enumerate(unit_info.api_for_impl->package_imports)) { - // Skip the current package. - if (!api_imports.package_id.is_valid()) { - continue; - } - // Translate the package ID from the API file to the implementation file. - auto impl_package_id = - impl_identifiers.Add(api_identifiers.Get(api_imports.package_id)); - if (auto lookup = unit_info.package_imports_map.Lookup(impl_package_id)) { - // On a hit, replace the entry to unify the API and implementation - // imports. - api_imports_list[lookup.value()] = {impl_package_id, api_imports_index}; - } else { - // On a miss, add the package as API-only. - api_imports_list.push_back({impl_package_id, api_imports_index}); - } - } - } - - for (auto [i, api_imports_entry] : llvm::enumerate(api_imports_list)) { - // These variables are updated after figuring out which imports are present. - auto import_decl_id = SemIR::InstId::Invalid; - IdentifierId package_id = IdentifierId::Invalid; - bool has_load_error = false; - - // Identify the local package imports if present. - UnitInfo::PackageImports* local_imports = nullptr; - if (i < unit_info.package_imports.size()) { - local_imports = &unit_info.package_imports[i]; - if (!local_imports->package_id.is_valid()) { - // Skip the current package. - continue; - } - import_decl_id = local_imports->import_decl_id; - - package_id = local_imports->package_id; - has_load_error |= local_imports->has_load_error; - } - - // Identify the API package imports if present. - UnitInfo::PackageImports* api_imports = nullptr; - if (api_imports_entry.second != -1) { - api_imports = - &unit_info.api_for_impl->package_imports[api_imports_entry.second]; - - if (local_imports) { - CARBON_CHECK(package_id == api_imports_entry.first); - } else { - auto import_ir_inst_id = context.import_ir_insts().Add( - {.ir_id = SemIR::ImportIRId::ApiForImpl, - .inst_id = api_imports->import_decl_id}); - import_decl_id = - context.AddInst(context.MakeImportedLocAndInst( - import_ir_inst_id, {.package_id = SemIR::NameId::ForIdentifier( - api_imports_entry.first)})); - package_id = api_imports_entry.first; - } - has_load_error |= api_imports->has_load_error; - } - - // Do the actual import. - ImportLibrariesFromOtherPackage( - context, namespace_type_id, import_decl_id, package_id, - CollectTransitiveImports(import_decl_id, local_imports, api_imports, - total_ir_count), - has_load_error); - } -} - -// Add imports to the root block. -static auto InitPackageScopeAndImports(Context& context, UnitInfo& unit_info, - int total_ir_count) -> void { - // First create the constant values map for all imported IRs. We'll populate - // these with mappings for namespaces as we go. - size_t num_irs = 0; - for (auto& package_imports : unit_info.package_imports) { - num_irs += package_imports.imports.size(); - } - if (!unit_info.api_for_impl) { - // Leave an empty slot for ImportIRId::ApiForImpl. - ++num_irs; - } - - context.import_irs().Reserve(num_irs); - context.import_ir_constant_values().reserve(num_irs); - - context.SetTotalIRCount(total_ir_count); - - // Importing makes many namespaces, so only canonicalize the type once. - auto namespace_type_id = - context.GetSingletonType(SemIR::NamespaceType::SingletonInstId); - - // Define the package scope, with an instruction for `package` expressions to - // reference. - auto package_scope_id = context.name_scopes().Add( - SemIR::Namespace::PackageInstId, SemIR::NameId::PackageNamespace, - SemIR::NameScopeId::Invalid); - CARBON_CHECK(package_scope_id == SemIR::NameScopeId::Package); - - auto package_inst_id = context.AddInst( - Parse::NodeId::Invalid, {.type_id = namespace_type_id, - .name_scope_id = SemIR::NameScopeId::Package, - .import_id = SemIR::InstId::Invalid}); - CARBON_CHECK(package_inst_id == SemIR::Namespace::PackageInstId); - - // If there is an implicit `api` import, set it first so that it uses the - // ImportIRId::ApiForImpl when processed for imports. - if (unit_info.api_for_impl) { - const auto& names = context.parse_tree().packaging_decl()->names; - auto import_decl_id = context.AddInst( - names.node_id, - {.package_id = SemIR::NameId::ForIdentifier(names.package_id)}); - SetApiImportIR(context, {.decl_id = import_decl_id, - .is_export = false, - .sem_ir = unit_info.api_for_impl->unit->sem_ir}); - } else { - SetApiImportIR(context, - {.decl_id = SemIR::InstId::Invalid, .sem_ir = nullptr}); - } - - // Add import instructions for everything directly imported. Implicit imports - // are handled separately. - for (auto& package_imports : unit_info.package_imports) { - CARBON_CHECK(!package_imports.import_decl_id.is_valid()); - package_imports.import_decl_id = context.AddInst( - package_imports.node_id, {.package_id = SemIR::NameId::ForIdentifier( - package_imports.package_id)}); - } - - // Process the imports. - if (unit_info.api_for_impl) { - ImportApiFile(context, namespace_type_id, - *unit_info.api_for_impl->unit->sem_ir); - } - ImportCurrentPackage(context, unit_info, total_ir_count, package_inst_id, - namespace_type_id); - CARBON_CHECK(context.scope_stack().PeekIndex() == ScopeIndex::Package); - ImportOtherPackages(context, unit_info, total_ir_count, namespace_type_id); -} - -// Checks that each required definition is available. If the definition can be -// generated by resolving a specific, does so, otherwise emits a diagnostic for -// each declaration in context.definitions_required() that doesn't have a -// definition. -static auto CheckRequiredDefinitions(Context& context, - Context::DiagnosticEmitter& emitter) - -> void { - CARBON_DIAGNOSTIC(MissingDefinitionInImpl, Error, - "no definition found for declaration in impl file"); - // Note that more required definitions can be added during this loop. - for (size_t i = 0; i != context.definitions_required().size(); ++i) { - SemIR::InstId decl_inst_id = context.definitions_required()[i]; - SemIR::Inst decl_inst = context.insts().Get(decl_inst_id); - CARBON_KIND_SWITCH(context.insts().Get(decl_inst_id)) { - case CARBON_KIND(SemIR::ClassDecl class_decl): { - if (!context.classes().Get(class_decl.class_id).is_defined()) { - emitter.Emit(decl_inst_id, MissingDefinitionInImpl); - } - break; - } - case CARBON_KIND(SemIR::FunctionDecl function_decl): { - if (context.functions().Get(function_decl.function_id).definition_id == - SemIR::InstId::Invalid) { - emitter.Emit(decl_inst_id, MissingDefinitionInImpl); - } - break; - } - case CARBON_KIND(SemIR::ImplDecl impl_decl): { - if (!context.impls().Get(impl_decl.impl_id).is_defined()) { - emitter.Emit(decl_inst_id, MissingDefinitionInImpl); - } - break; - } - case SemIR::InterfaceDecl::Kind: { - // TODO: Handle `interface` as well, once we can test it without - // triggering - // https://github.com/carbon-language/carbon-lang/issues/4071. - CARBON_FATAL("TODO: Support interfaces in DiagnoseMissingDefinitions"); - } - case CARBON_KIND(SemIR::SpecificFunction specific_function): { - if (!ResolveSpecificDefinition(context, - specific_function.specific_id)) { - CARBON_DIAGNOSTIC(MissingGenericFunctionDefinition, Error, - "use of undefined generic function"); - CARBON_DIAGNOSTIC(MissingGenericFunctionDefinitionHere, Note, - "generic function declared here"); - auto generic_decl_id = - context.generics() - .Get(context.specifics() - .Get(specific_function.specific_id) - .generic_id) - .decl_id; - emitter.Build(decl_inst_id, MissingGenericFunctionDefinition) - .Note(generic_decl_id, MissingGenericFunctionDefinitionHere) - .Emit(); - } - break; - } - default: { - CARBON_FATAL("Unexpected inst in definitions_required: {0}", decl_inst); - } - } - } -} - -// Loops over all nodes in the tree. On some errors, this may return early, -// for example if an unrecoverable state is encountered. -// NOLINTNEXTLINE(readability-function-size) -static auto ProcessNodeIds(Context& context, llvm::raw_ostream* vlog_stream, - ErrorTrackingDiagnosticConsumer& err_tracker, - Parse::NodeLocConverter& converter) -> bool { - NodeIdTraversal traversal(context, vlog_stream); - - Parse::NodeId node_id = Parse::NodeId::Invalid; - - // On crash, report which token we were handling. - PrettyStackTraceFunction node_dumper([&](llvm::raw_ostream& output) { - auto loc = converter.ConvertLoc( - node_id, [](DiagnosticLoc, const DiagnosticBase<>&) {}); - loc.FormatLocation(output); - output << ": checking " << context.parse_tree().node_kind(node_id) << "\n"; - // Crash output has a tab indent; try to indent slightly past that. - loc.FormatSnippet(output, /*indent=*/10); - }); - - while (auto maybe_node_id = traversal.Next()) { - node_id = *maybe_node_id; - auto parse_kind = context.parse_tree().node_kind(node_id); - - switch (parse_kind) { -#define CARBON_PARSE_NODE_KIND(Name) \ - case Parse::NodeKind::Name: { \ - if (!HandleParseNode(context, Parse::Name##Id(node_id))) { \ - CARBON_CHECK(err_tracker.seen_error(), \ - "Handle" #Name \ - " returned false without printing a diagnostic"); \ - return false; \ - } \ - break; \ - } -#include "toolchain/parse/node_kind.def" - } - - traversal.Handle(parse_kind); - } - return true; -} - -// Produces and checks the IR for the provided Parse::Tree. -static auto CheckParseTree(UnitInfo& unit_info, int total_ir_count, - llvm::raw_ostream* vlog_stream) -> void { - Timings::ScopedTiming timing(unit_info.unit->timings, "check"); - - // We can safely mark this as checked at the start. - unit_info.is_checked = true; - - SemIR::File* sem_ir = unit_info.unit->sem_ir; - Context::DiagnosticEmitter emitter(*unit_info.unit->sem_ir_converter, - unit_info.err_tracker); - Context context(&emitter, unit_info.unit->get_parse_tree_and_subtrees, sem_ir, - vlog_stream); - PrettyStackTraceFunction context_dumper( - [&](llvm::raw_ostream& output) { context.PrintForStackDump(output); }); - - // Add a block for the file. - context.inst_block_stack().Push(); - - InitPackageScopeAndImports(context, unit_info, total_ir_count); - - // Eagerly import the impls declared in the api file to prepare to redeclare - // them. - ImportImplsFromApiFile(context); - - if (!ProcessNodeIds(context, vlog_stream, unit_info.err_tracker, - *unit_info.unit->node_converter)) { - context.sem_ir().set_has_errors(true); - return; - } - - CheckRequiredDefinitions(context, emitter); - - context.Finalize(); - - context.VerifyOnFinish(); - - sem_ir->set_has_errors(unit_info.err_tracker.seen_error()); - -#ifndef NDEBUG - if (auto verify = sem_ir->Verify(); !verify.ok()) { - CARBON_FATAL("{0}Built invalid semantics IR: {1}\n", *sem_ir, - verify.error()); - } -#endif -} - // The package and library names, used as map keys. using ImportKey = std::pair; // Returns a key form of the package object. file_package_id is only used for // imports, not the main package declaration; as a consequence, it will be // invalid for the main package declaration. -static auto GetImportKey(UnitInfo& unit_info, IdentifierId file_package_id, +static auto GetImportKey(UnitAndImports& unit_info, + IdentifierId file_package_id, Parse::Tree::PackagingNames names) -> ImportKey { auto* stores = unit_info.unit->value_stores; llvm::StringRef package_name = @@ -566,10 +58,10 @@ static auto RenderImportKey(ImportKey import_key) -> std::string { // // The ID comparisons between the import and unit are okay because they both // come from the same file. -static auto TrackImport(Map& api_map, +static auto TrackImport(Map& api_map, Map* explicit_import_map, - UnitInfo& unit_info, Parse::Tree::PackagingNames import) - -> void { + UnitAndImports& unit_info, + Parse::Tree::PackagingNames import) -> void { const auto& packaging = unit_info.parse_tree().packaging_decl(); IdentifierId file_package_id = @@ -670,17 +162,17 @@ static auto TrackImport(Map& api_map, auto create_imports = [&]() -> int32_t { int32_t index = unit_info.package_imports.size(); unit_info.package_imports.push_back( - UnitInfo::PackageImports(import.package_id, import.node_id)); + PackageImports(import.package_id, import.node_id)); return index; }; auto insert_result = unit_info.package_imports_map.Insert(import.package_id, create_imports); - UnitInfo::PackageImports& package_imports = + PackageImports& package_imports = unit_info.package_imports[insert_result.value()]; if (auto api_lookup = api_map.Lookup(import_key)) { // Add references between the file and imported api. - UnitInfo* api = api_lookup.value(); + UnitAndImports* api = api_lookup.value(); package_imports.imports.push_back({import, api}); ++unit_info.imports_remaining; api->incoming_imports.push_back(&unit_info); @@ -713,8 +205,9 @@ static auto TrackImport(Map& api_map, // related to the packaging because the strings are loaded as part of getting // the ImportKey (which we then do for `impl` files too). static auto BuildApiMapAndDiagnosePackaging( - llvm::MutableArrayRef unit_infos) -> Map { - Map api_map; + llvm::MutableArrayRef unit_infos) + -> Map { + Map api_map; for (auto& unit_info : unit_infos) { const auto& packaging = unit_info.parse_tree().packaging_decl(); // An import key formed from the `package` or `library` declaration. Or, for @@ -803,19 +296,19 @@ static auto BuildApiMapAndDiagnosePackaging( auto CheckParseTrees(llvm::MutableArrayRef units, bool prelude_import, llvm::raw_ostream* vlog_stream) -> void { - // UnitInfo is big due to its SmallVectors, so we default to 0 on the + // UnitAndImports is big due to its SmallVectors, so we default to 0 on the // stack. - llvm::SmallVector unit_infos; + llvm::SmallVector unit_infos; unit_infos.reserve(units.size()); for (auto [i, unit] : llvm::enumerate(units)) { unit_infos.emplace_back(SemIR::CheckIRId(i), unit); } - Map api_map = + Map api_map = BuildApiMapAndDiagnosePackaging(unit_infos); // Mark down imports for all files. - llvm::SmallVector ready_to_check; + llvm::SmallVector ready_to_check; ready_to_check.reserve(units.size()); for (auto& unit_info : unit_infos) { const auto& packaging = unit_info.parse_tree().packaging_decl(); @@ -857,7 +350,7 @@ auto CheckParseTrees(llvm::MutableArrayRef units, bool prelude_import, for (int check_index = 0; check_index < static_cast(ready_to_check.size()); ++check_index) { auto* unit_info = ready_to_check[check_index]; - CheckParseTree(*unit_info, units.size(), vlog_stream); + CheckUnit(unit_info, units.size(), vlog_stream).Run(); for (auto* incoming_import : unit_info->incoming_imports) { --incoming_import->imports_remaining; if (incoming_import->imports_remaining == 0) { @@ -904,7 +397,7 @@ auto CheckParseTrees(llvm::MutableArrayRef units, bool prelude_import, // incomplete imports. for (auto& unit_info : unit_infos) { if (unit_info.imports_remaining > 0) { - CheckParseTree(unit_info, units.size(), vlog_stream); + CheckUnit(&unit_info, units.size(), vlog_stream).Run(); } } } diff --git a/toolchain/check/check.h b/toolchain/check/check.h index fdce0495f913a..5820d8030e745 100644 --- a/toolchain/check/check.h +++ b/toolchain/check/check.h @@ -10,8 +10,6 @@ #include "toolchain/base/timings.h" #include "toolchain/check/sem_ir_diagnostic_converter.h" #include "toolchain/diagnostics/diagnostic_emitter.h" -#include "toolchain/lex/tokenized_buffer.h" -#include "toolchain/parse/tree.h" #include "toolchain/parse/tree_and_subtrees.h" #include "toolchain/sem_ir/file.h" diff --git a/toolchain/check/check_unit.cpp b/toolchain/check/check_unit.cpp new file mode 100644 index 0000000000000..3d3e8d4bd8d64 --- /dev/null +++ b/toolchain/check/check_unit.cpp @@ -0,0 +1,430 @@ +// Part of the Carbon Language project, under the Apache License v2.0 with LLVM +// Exceptions. See /LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "toolchain/check/check_unit.h" + +#include "toolchain/base/kind_switch.h" +#include "toolchain/base/pretty_stack_trace_function.h" +#include "toolchain/check/generic.h" +#include "toolchain/check/handle.h" +#include "toolchain/check/import.h" +#include "toolchain/check/import_ref.h" +#include "toolchain/check/node_id_traversal.h" + +namespace Carbon::Check { + +// Returns the number of imported IRs, to assist in Context construction. +static auto GetImportedIRCount(UnitAndImports* unit_and_imports) -> int { + int count = 0; + for (auto& package_imports : unit_and_imports->package_imports) { + count += package_imports.imports.size(); + } + if (!unit_and_imports->api_for_impl) { + // Leave an empty slot for ImportIRId::ApiForImpl. + ++count; + } + return count; +} + +CheckUnit::CheckUnit(UnitAndImports* unit_and_imports, int total_ir_count, + llvm::raw_ostream* vlog_stream) + : unit_and_imports_(unit_and_imports), + total_ir_count_(total_ir_count), + vlog_stream_(vlog_stream), + emitter_(*unit_and_imports_->unit->sem_ir_converter, + unit_and_imports_->err_tracker), + context_(&emitter_, unit_and_imports_->unit->get_parse_tree_and_subtrees, + unit_and_imports_->unit->sem_ir, + GetImportedIRCount(unit_and_imports), total_ir_count, + vlog_stream) {} + +auto CheckUnit::Run() -> void { + Timings::ScopedTiming timing(unit_and_imports_->unit->timings, "check"); + + // We can safely mark this as checked at the start. + unit_and_imports_->is_checked = true; + + PrettyStackTraceFunction context_dumper( + [&](llvm::raw_ostream& output) { context_.PrintForStackDump(output); }); + + // Add a block for the file. + context_.inst_block_stack().Push(); + + InitPackageScopeAndImports(); + + // Eagerly import the impls declared in the api file to prepare to redeclare + // them. + ImportImplsFromApiFile(context_); + + if (!ProcessNodeIds()) { + context_.sem_ir().set_has_errors(true); + return; + } + + CheckRequiredDefinitions(); + + context_.Finalize(); + + context_.VerifyOnFinish(); + + context_.sem_ir().set_has_errors(unit_and_imports_->err_tracker.seen_error()); + +#ifndef NDEBUG + if (auto verify = context_.sem_ir().Verify(); !verify.ok()) { + CARBON_FATAL("{0}Built invalid semantics IR: {1}\n", context_.sem_ir(), + verify.error()); + } +#endif +} + +auto CheckUnit::InitPackageScopeAndImports() -> void { + // Importing makes many namespaces, so only canonicalize the type once. + auto namespace_type_id = + context_.GetSingletonType(SemIR::NamespaceType::SingletonInstId); + + // Define the package scope, with an instruction for `package` expressions to + // reference. + auto package_scope_id = context_.name_scopes().Add( + SemIR::Namespace::PackageInstId, SemIR::NameId::PackageNamespace, + SemIR::NameScopeId::Invalid); + CARBON_CHECK(package_scope_id == SemIR::NameScopeId::Package); + + auto package_inst_id = context_.AddInst( + Parse::NodeId::Invalid, {.type_id = namespace_type_id, + .name_scope_id = SemIR::NameScopeId::Package, + .import_id = SemIR::InstId::Invalid}); + CARBON_CHECK(package_inst_id == SemIR::Namespace::PackageInstId); + + // If there is an implicit `api` import, set it first so that it uses the + // ImportIRId::ApiForImpl when processed for imports. + if (unit_and_imports_->api_for_impl) { + const auto& names = context_.parse_tree().packaging_decl()->names; + auto import_decl_id = context_.AddInst( + names.node_id, + {.package_id = SemIR::NameId::ForIdentifier(names.package_id)}); + SetApiImportIR(context_, + {.decl_id = import_decl_id, + .is_export = false, + .sem_ir = unit_and_imports_->api_for_impl->unit->sem_ir}); + } else { + SetApiImportIR(context_, + {.decl_id = SemIR::InstId::Invalid, .sem_ir = nullptr}); + } + + // Add import instructions for everything directly imported. Implicit imports + // are handled separately. + for (auto& package_imports : unit_and_imports_->package_imports) { + CARBON_CHECK(!package_imports.import_decl_id.is_valid()); + package_imports.import_decl_id = context_.AddInst( + package_imports.node_id, {.package_id = SemIR::NameId::ForIdentifier( + package_imports.package_id)}); + } + + // Process the imports. + if (unit_and_imports_->api_for_impl) { + ImportApiFile(context_, namespace_type_id, + *unit_and_imports_->api_for_impl->unit->sem_ir); + } + ImportCurrentPackage(package_inst_id, namespace_type_id); + CARBON_CHECK(context_.scope_stack().PeekIndex() == ScopeIndex::Package); + ImportOtherPackages(namespace_type_id); +} + +auto CheckUnit::CollectDirectImports( + llvm::SmallVector& results, + llvm::MutableArrayRef ir_to_result_index, SemIR::InstId import_decl_id, + const PackageImports& imports, bool is_local) -> void { + for (const auto& import : imports.imports) { + const auto& direct_ir = *import.unit_info->unit->sem_ir; + auto& index = ir_to_result_index[direct_ir.check_ir_id().index]; + if (index != -1) { + // This should only happen when doing API imports for an implementation + // file. Don't change the entry; is_export doesn't matter. + continue; + } + index = results.size(); + results.push_back({.decl_id = import_decl_id, + // Only tag exports in API files, ignoring the value in + // implementation files. + .is_export = is_local && import.names.is_export, + .sem_ir = &direct_ir}); + } +} + +auto CheckUnit::CollectTransitiveImports(SemIR::InstId import_decl_id, + const PackageImports* local_imports, + const PackageImports* api_imports) + -> llvm::SmallVector { + llvm::SmallVector results; + + // Track whether an IR was imported in full, including `export import`. This + // distinguishes from IRs that are indirectly added without all names being + // exported to this IR. + llvm::SmallVector ir_to_result_index(total_ir_count_, -1); + + // First add direct imports. This means that if an entity is imported both + // directly and indirectly, the import path will reflect the direct import. + if (local_imports) { + CollectDirectImports(results, ir_to_result_index, import_decl_id, + *local_imports, + /*is_local=*/true); + } + if (api_imports) { + CollectDirectImports(results, ir_to_result_index, import_decl_id, + *api_imports, + /*is_local=*/false); + } + + // Loop through direct imports for any indirect exports. The underlying vector + // is appended during iteration, so take the size first. + const int direct_imports = results.size(); + for (int direct_index : llvm::seq(direct_imports)) { + bool is_export = results[direct_index].is_export; + + for (const auto& indirect_ir : + results[direct_index].sem_ir->import_irs().array_ref()) { + if (!indirect_ir.is_export) { + continue; + } + + auto& indirect_index = + ir_to_result_index[indirect_ir.sem_ir->check_ir_id().index]; + if (indirect_index == -1) { + indirect_index = results.size(); + // TODO: In the case of a recursive `export import`, this only points at + // the outermost import. May want something that better reflects the + // recursion. + results.push_back({.decl_id = results[direct_index].decl_id, + .is_export = is_export, + .sem_ir = indirect_ir.sem_ir}); + } else if (is_export) { + results[indirect_index].is_export = true; + } + } + } + + return results; +} + +auto CheckUnit::ImportCurrentPackage(SemIR::InstId package_inst_id, + SemIR::TypeId namespace_type_id) -> void { + // Add imports from the current package. + auto import_map_lookup = + unit_and_imports_->package_imports_map.Lookup(IdentifierId::Invalid); + if (!import_map_lookup) { + // Push the scope; there are no names to add. + context_.scope_stack().Push(package_inst_id, SemIR::NameScopeId::Package); + return; + } + PackageImports& self_import = + unit_and_imports_->package_imports[import_map_lookup.value()]; + + if (self_import.has_load_error) { + context_.name_scopes().Get(SemIR::NameScopeId::Package).set_has_error(); + } + + ImportLibrariesFromCurrentPackage( + context_, namespace_type_id, + CollectTransitiveImports(self_import.import_decl_id, &self_import, + /*api_imports=*/nullptr)); + + context_.scope_stack().Push( + package_inst_id, SemIR::NameScopeId::Package, SemIR::SpecificId::Invalid, + context_.name_scopes().Get(SemIR::NameScopeId::Package).has_error()); +} + +auto CheckUnit::ImportOtherPackages(SemIR::TypeId namespace_type_id) -> void { + // api_imports_list is initially the size of the current file's imports, + // including for API files, for simplicity in iteration. It's only really used + // when processing an implementation file, in order to combine the API file + // imports. + // + // For packages imported by the API file, the IdentifierId is the package name + // and the index is into the API's import list. Otherwise, the initial + // {Invalid, -1} state remains. + llvm::SmallVector> api_imports_list; + api_imports_list.resize(unit_and_imports_->package_imports.size(), + {IdentifierId::Invalid, -1}); + + // When there's an API file, add the mapping to api_imports_list. + if (unit_and_imports_->api_for_impl) { + const auto& api_identifiers = + unit_and_imports_->api_for_impl->unit->value_stores->identifiers(); + auto& impl_identifiers = + unit_and_imports_->unit->value_stores->identifiers(); + + for (auto [api_imports_index, api_imports] : + llvm::enumerate(unit_and_imports_->api_for_impl->package_imports)) { + // Skip the current package. + if (!api_imports.package_id.is_valid()) { + continue; + } + // Translate the package ID from the API file to the implementation file. + auto impl_package_id = + impl_identifiers.Add(api_identifiers.Get(api_imports.package_id)); + if (auto lookup = + unit_and_imports_->package_imports_map.Lookup(impl_package_id)) { + // On a hit, replace the entry to unify the API and implementation + // imports. + api_imports_list[lookup.value()] = {impl_package_id, api_imports_index}; + } else { + // On a miss, add the package as API-only. + api_imports_list.push_back({impl_package_id, api_imports_index}); + } + } + } + + for (auto [i, api_imports_entry] : llvm::enumerate(api_imports_list)) { + // These variables are updated after figuring out which imports are present. + auto import_decl_id = SemIR::InstId::Invalid; + IdentifierId package_id = IdentifierId::Invalid; + bool has_load_error = false; + + // Identify the local package imports if present. + PackageImports* local_imports = nullptr; + if (i < unit_and_imports_->package_imports.size()) { + local_imports = &unit_and_imports_->package_imports[i]; + if (!local_imports->package_id.is_valid()) { + // Skip the current package. + continue; + } + import_decl_id = local_imports->import_decl_id; + + package_id = local_imports->package_id; + has_load_error |= local_imports->has_load_error; + } + + // Identify the API package imports if present. + PackageImports* api_imports = nullptr; + if (api_imports_entry.second != -1) { + api_imports = &unit_and_imports_->api_for_impl + ->package_imports[api_imports_entry.second]; + + if (local_imports) { + CARBON_CHECK(package_id == api_imports_entry.first); + } else { + auto import_ir_inst_id = context_.import_ir_insts().Add( + {.ir_id = SemIR::ImportIRId::ApiForImpl, + .inst_id = api_imports->import_decl_id}); + import_decl_id = + context_.AddInst(context_.MakeImportedLocAndInst( + import_ir_inst_id, {.package_id = SemIR::NameId::ForIdentifier( + api_imports_entry.first)})); + package_id = api_imports_entry.first; + } + has_load_error |= api_imports->has_load_error; + } + + // Do the actual import. + ImportLibrariesFromOtherPackage( + context_, namespace_type_id, import_decl_id, package_id, + CollectTransitiveImports(import_decl_id, local_imports, api_imports), + has_load_error); + } +} + +// Loops over all nodes in the tree. On some errors, this may return early, +// for example if an unrecoverable state is encountered. +// NOLINTNEXTLINE(readability-function-size) +auto CheckUnit::ProcessNodeIds() -> bool { + NodeIdTraversal traversal(context_, vlog_stream_); + + Parse::NodeId node_id = Parse::NodeId::Invalid; + + // On crash, report which token we were handling. + PrettyStackTraceFunction node_dumper([&](llvm::raw_ostream& output) { + auto loc = unit_and_imports_->unit->node_converter->ConvertLoc( + node_id, [](DiagnosticLoc, const DiagnosticBase<>&) {}); + loc.FormatLocation(output); + output << ": checking " << context_.parse_tree().node_kind(node_id) << "\n"; + // Crash output has a tab indent; try to indent slightly past that. + loc.FormatSnippet(output, /*indent=*/10); + }); + + while (auto maybe_node_id = traversal.Next()) { + node_id = *maybe_node_id; + auto parse_kind = context_.parse_tree().node_kind(node_id); + + bool result; + switch (parse_kind) { +#define CARBON_PARSE_NODE_KIND(Name) \ + case Parse::NodeKind::Name: { \ + result = HandleParseNode(context_, Parse::Name##Id(node_id)); \ + break; \ + } +#include "toolchain/parse/node_kind.def" + } + + if (!result) { + CARBON_CHECK( + unit_and_imports_->err_tracker.seen_error(), + "HandleParseNode for `{0}` returned false without diagnosing.", + parse_kind); + return false; + } + traversal.Handle(parse_kind); + } + return true; +} + +auto CheckUnit::CheckRequiredDefinitions() -> void { + CARBON_DIAGNOSTIC(MissingDefinitionInImpl, Error, + "no definition found for declaration in impl file"); + // Note that more required definitions can be added during this loop. + for (size_t i = 0; i != context_.definitions_required().size(); ++i) { + SemIR::InstId decl_inst_id = context_.definitions_required()[i]; + SemIR::Inst decl_inst = context_.insts().Get(decl_inst_id); + CARBON_KIND_SWITCH(context_.insts().Get(decl_inst_id)) { + case CARBON_KIND(SemIR::ClassDecl class_decl): { + if (!context_.classes().Get(class_decl.class_id).is_defined()) { + emitter_.Emit(decl_inst_id, MissingDefinitionInImpl); + } + break; + } + case CARBON_KIND(SemIR::FunctionDecl function_decl): { + if (context_.functions().Get(function_decl.function_id).definition_id == + SemIR::InstId::Invalid) { + emitter_.Emit(decl_inst_id, MissingDefinitionInImpl); + } + break; + } + case CARBON_KIND(SemIR::ImplDecl impl_decl): { + if (!context_.impls().Get(impl_decl.impl_id).is_defined()) { + emitter_.Emit(decl_inst_id, MissingDefinitionInImpl); + } + break; + } + case SemIR::InterfaceDecl::Kind: { + // TODO: Handle `interface` as well, once we can test it without + // triggering + // https://github.com/carbon-language/carbon-lang/issues/4071. + CARBON_FATAL("TODO: Support interfaces in DiagnoseMissingDefinitions"); + } + case CARBON_KIND(SemIR::SpecificFunction specific_function): { + if (!ResolveSpecificDefinition(context_, + specific_function.specific_id)) { + CARBON_DIAGNOSTIC(MissingGenericFunctionDefinition, Error, + "use of undefined generic function"); + CARBON_DIAGNOSTIC(MissingGenericFunctionDefinitionHere, Note, + "generic function declared here"); + auto generic_decl_id = + context_.generics() + .Get(context_.specifics() + .Get(specific_function.specific_id) + .generic_id) + .decl_id; + emitter_.Build(decl_inst_id, MissingGenericFunctionDefinition) + .Note(generic_decl_id, MissingGenericFunctionDefinitionHere) + .Emit(); + } + break; + } + default: { + CARBON_FATAL("Unexpected inst in definitions_required: {0}", decl_inst); + } + } + } +} + +} // namespace Carbon::Check diff --git a/toolchain/check/check_unit.h b/toolchain/check/check_unit.h new file mode 100644 index 0000000000000..2aba6799aa048 --- /dev/null +++ b/toolchain/check/check_unit.h @@ -0,0 +1,153 @@ +// Part of the Carbon Language project, under the Apache License v2.0 with LLVM +// Exceptions. See /LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef CARBON_TOOLCHAIN_CHECK_CHECK_UNIT_H_ +#define CARBON_TOOLCHAIN_CHECK_CHECK_UNIT_H_ + +#include "common/map.h" +#include "llvm/ADT/SmallVector.h" +#include "toolchain/check/check.h" +#include "toolchain/check/context.h" +#include "toolchain/parse/tree_node_diagnostic_converter.h" +#include "toolchain/sem_ir/ids.h" + +namespace Carbon::Check { + +struct UnitAndImports; + +// A file's imports corresponding to a single package, for +// `UnitAndImports::package_imports`. +struct PackageImports { + // A given import within the file, with its destination. + struct Import { + Parse::Tree::PackagingNames names; + UnitAndImports* unit_info; + }; + + // Use the constructor so that the SmallVector is only constructed + // as-needed. + explicit PackageImports(IdentifierId package_id, Parse::ImportDeclId node_id) + : package_id(package_id), node_id(node_id) {} + + // The identifier of the imported package. + IdentifierId package_id; + // The first `import` declaration in the file, which declared the package's + // identifier (even if the import failed). Used for associating diagnostics + // not specific to a single import. + Parse::ImportDeclId node_id; + // The associated `import` instruction. Only valid once a file is checked. + SemIR::InstId import_decl_id = SemIR::InstId::Invalid; + // Whether there's an import that failed to load. + bool has_load_error = false; + // The list of valid imports. + llvm::SmallVector imports; +}; + +// Contains information accumulated while checking a `Unit` (primarily import +// information), in addition to the `Unit` itself. +struct UnitAndImports { + explicit UnitAndImports(SemIR::CheckIRId check_ir_id, Unit& unit) + : check_ir_id(check_ir_id), + unit(&unit), + err_tracker(*unit.consumer), + emitter(*unit.node_converter, err_tracker) {} + + auto parse_tree() -> const Parse::Tree& { return unit->sem_ir->parse_tree(); } + auto source() -> const SourceBuffer& { + return parse_tree().tokens().source(); + } + + SemIR::CheckIRId check_ir_id; + Unit* unit; + + // Emitter information. + ErrorTrackingDiagnosticConsumer err_tracker; + DiagnosticEmitter emitter; + + // List of the outgoing imports. If a package includes unavailable library + // imports, it has an entry with has_load_error set. Invalid imports (for + // example, `import Main;`) aren't added because they won't add identifiers to + // name lookup. + llvm::SmallVector package_imports; + + // A map of the package names to the outgoing imports above. + Map package_imports_map; + + // The remaining number of imports which must be checked before this unit can + // be processed. + int32_t imports_remaining = 0; + + // A list of incoming imports. This will be empty for `impl` files, because + // imports only touch `api` files. + llvm::SmallVector incoming_imports; + + // The corresponding `api` unit if this is an `impl` file. The entry should + // also be in the corresponding `PackageImports`. + UnitAndImports* api_for_impl = nullptr; + + // Whether the unit has been checked. + bool is_checked = false; +}; + +// Handles checking of a single unit. Requires that all dependencies have been +// checked. +// +// This mainly splits out the single-unit logic from the higher level cross-unit +// logic in check.cpp. +class CheckUnit { + public: + explicit CheckUnit(UnitAndImports* unit_and_imports, int total_ir_count, + llvm::raw_ostream* vlog_stream); + + // Produces and checks the IR for the provided unit. + auto Run() -> void; + + private: + // Add imports to the root block. + auto InitPackageScopeAndImports() -> void; + + // Collects direct imports, for CollectTransitiveImports. + auto CollectDirectImports(llvm::SmallVector& results, + llvm::MutableArrayRef ir_to_result_index, + SemIR::InstId import_decl_id, + const PackageImports& imports, bool is_local) + -> void; + + // Collects transitive imports, handling deduplication. These will be unified + // between local_imports and api_imports. + auto CollectTransitiveImports(SemIR::InstId import_decl_id, + const PackageImports* local_imports, + const PackageImports* api_imports) + -> llvm::SmallVector; + + // Imports the current package. + auto ImportCurrentPackage(SemIR::InstId package_inst_id, + SemIR::TypeId namespace_type_id) -> void; + + // Imports all other packages (excluding the current package). + auto ImportOtherPackages(SemIR::TypeId namespace_type_id) -> void; + + // Checks that each required definition is available. If the definition can be + // generated by resolving a specific, does so, otherwise emits a diagnostic + // for each declaration in context.definitions_required() that doesn't have a + // definition. + auto CheckRequiredDefinitions() -> void; + + // Loops over all nodes in the tree. On some errors, this may return early, + // for example if an unrecoverable state is encountered. + // NOLINTNEXTLINE(readability-function-size) + auto ProcessNodeIds() -> bool; + + UnitAndImports* unit_and_imports_; + // The number of IRs being checked in total. + int total_ir_count_; + llvm::raw_ostream* vlog_stream_; + + Context::DiagnosticEmitter emitter_; + Context context_; +}; + +} // namespace Carbon::Check + +#endif // CARBON_TOOLCHAIN_CHECK_CHECK_UNIT_H_ diff --git a/toolchain/check/context.cpp b/toolchain/check/context.cpp index d4f7dd01a53b0..5bb666cd0e401 100644 --- a/toolchain/check/context.cpp +++ b/toolchain/check/context.cpp @@ -41,7 +41,8 @@ namespace Carbon::Check { Context::Context(DiagnosticEmitter* emitter, llvm::function_ref get_parse_tree_and_subtrees, - SemIR::File* sem_ir, llvm::raw_ostream* vlog_stream) + SemIR::File* sem_ir, int imported_ir_count, int total_ir_count, + llvm::raw_ostream* vlog_stream) : emitter_(emitter), get_parse_tree_and_subtrees_(get_parse_tree_and_subtrees), sem_ir_(sem_ir), @@ -54,6 +55,11 @@ Context::Context(DiagnosticEmitter* emitter, decl_name_stack_(this), scope_stack_(sem_ir_->identifiers()), global_init_(this) { + // Prepare fields which relate to the number of IRs available for import. + import_irs().Reserve(imported_ir_count); + import_ir_constant_values_.reserve(imported_ir_count); + check_ir_map_.resize(total_ir_count, SemIR::ImportIRId::Invalid); + // Map the builtin `` and `type` type constants to their corresponding // special `TypeId` values. type_ids_for_type_constants_.Insert( diff --git a/toolchain/check/context.h b/toolchain/check/context.h index cd1a02e2a0274..2b3f4afdb7ff4 100644 --- a/toolchain/check/context.h +++ b/toolchain/check/context.h @@ -72,7 +72,8 @@ class Context { explicit Context(DiagnosticEmitter* emitter, llvm::function_ref get_parse_tree_and_subtrees, - SemIR::File* sem_ir, llvm::raw_ostream* vlog_stream); + SemIR::File* sem_ir, int imported_ir_count, + int total_ir_count, llvm::raw_ostream* vlog_stream); // Marks an implementation TODO. Always returns false. auto TODO(SemIRLoc loc, std::string label) -> bool; @@ -461,13 +462,6 @@ class Context { auto Finalize() -> void; - // Sets the total number of IRs which exist. This is used to prepare a map - // from IR to imported IR. - auto SetTotalIRCount(int num_irs) -> void { - CARBON_CHECK(check_ir_map_.empty(), "SetTotalIRCount is only called once"); - check_ir_map_.resize(num_irs, SemIR::ImportIRId::Invalid); - } - // Returns the imported IR ID for an IR, or invalid if not imported. auto GetImportIRId(const SemIR::File& sem_ir) -> SemIR::ImportIRId& { return check_ir_map_[sem_ir.check_ir_id().index];