From 7a759271c54b78eb940bf12274bcc535a3df22e3 Mon Sep 17 00:00:00 2001 From: Naveen Seth Hanig Date: Mon, 14 Jul 2025 19:17:10 +0200 Subject: [PATCH 1/6] [clang][deps] Recognize 'module;' in dependency directive scanner With this change, the dependency directive scanner now properly identifies "module;" as a directive, as per P1857R3. Previously, the global module fragment was not recognized by the scanner. --- clang/lib/Lex/DependencyDirectivesScanner.cpp | 7 +++++++ clang/unittests/Lex/DependencyDirectivesScannerTest.cpp | 9 +++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp index d894c265a07a2..8822e760274d0 100644 --- a/clang/lib/Lex/DependencyDirectivesScanner.cpp +++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp @@ -728,6 +728,13 @@ bool Scanner::lexModule(const char *&First, const char *const End) { return false; break; } + case ';': { + // Handle the global module fragment `module;`. + if (Id == "module" && !Export) + break; + skipLine(First, End); + return false; + } case '<': case '"': break; diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp index d2ef27155df94..92f6f401ec6b7 100644 --- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp +++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp @@ -1122,16 +1122,17 @@ ort \ )"; ASSERT_FALSE( minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives)); - EXPECT_STREQ("#include \"textual-header.h\"\nexport module m;" + EXPECT_STREQ("module;#include \"textual-header.h\"\nexport module m;" "exp\\\nort import:l[[rename]];" "import<<=3;import a b d e d e f e;" "import foo[[no_unique_address]];import foo();" "import f(:sefse);import f(->a=3);" "\n", Out.data()); - ASSERT_EQ(Directives.size(), 11u); - EXPECT_EQ(Directives[0].Kind, pp_include); - EXPECT_EQ(Directives[1].Kind, cxx_export_module_decl); + ASSERT_EQ(Directives.size(), 12u); + EXPECT_EQ(Directives[0].Kind, cxx_module_decl); + EXPECT_EQ(Directives[1].Kind, pp_include); + EXPECT_EQ(Directives[2].Kind, cxx_export_module_decl); } TEST(MinimizeSourceToDependencyDirectivesTest, ObjCMethodArgs) { From 25553f716701cb627e221deddace2d2956efc593 Mon Sep 17 00:00:00 2001 From: Naveen Seth Hanig Date: Mon, 14 Jul 2025 19:53:07 +0200 Subject: [PATCH 2/6] Add newline after 'module;' to be valid C++ --- clang/unittests/Lex/DependencyDirectivesScannerTest.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp index 92f6f401ec6b7..a28989e6cf174 100644 --- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp +++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp @@ -1122,7 +1122,7 @@ ort \ )"; ASSERT_FALSE( minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives)); - EXPECT_STREQ("module;#include \"textual-header.h\"\nexport module m;" + EXPECT_STREQ("module\n;#include \"textual-header.h\"\nexport module m;" "exp\\\nort import:l[[rename]];" "import<<=3;import a b d e d e f e;" "import foo[[no_unique_address]];import foo();" From 586b4389f28ab98bcb1229144004b74e73cccf4b Mon Sep 17 00:00:00 2001 From: Naveen Seth Hanig Date: Wed, 16 Jul 2025 21:56:51 +0200 Subject: [PATCH 3/6] [clang][deps] Properly capture the global module and '\n' for all module directives Previously, the newline after a module directive was not properly captured (or printed). According to P1857R3, each directive must, after skipping horizontal whitespace, be at the start of a logical line. Because the newline after module directives was missing, this invalidated the following line. This also fixes or removes tests that were in violation of P1857R3. This extends to Objective-C directives, which should also follow P1857R3. This also ensures that the global module fragment `module;` is captured by the dependency directives scanner. --- clang/lib/Lex/DependencyDirectivesScanner.cpp | 28 +++++++++---------- .../Lex/DependencyDirectivesScannerTest.cpp | 23 ++++++++------- 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/clang/lib/Lex/DependencyDirectivesScanner.cpp b/clang/lib/Lex/DependencyDirectivesScanner.cpp index 8822e760274d0..a1876f868474a 100644 --- a/clang/lib/Lex/DependencyDirectivesScanner.cpp +++ b/clang/lib/Lex/DependencyDirectivesScanner.cpp @@ -560,15 +560,13 @@ bool Scanner::lexModuleDirectiveBody(DirectiveKind Kind, const char *&First, if (Tok.is(tok::semi)) break; } + + const auto &Tok = lexToken(First, End); pushDirective(Kind); - skipWhitespace(First, End); - if (First == End) + if (Tok.is(tok::eof) || Tok.is(tok::eod)) return false; - if (!isVerticalWhitespace(*First)) - return reportError( - DirectiveLoc, diag::err_dep_source_scanner_unexpected_tokens_at_import); - skipNewline(First, End); - return false; + return reportError(DirectiveLoc, + diag::err_dep_source_scanner_unexpected_tokens_at_import); } dependency_directives_scan::Token &Scanner::lexToken(const char *&First, @@ -905,14 +903,6 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) { CurDirToks.clear(); }); - // Handle "@import". - if (*First == '@') - return lexAt(First, End); - - // Handle module directives for C++20 modules. - if (*First == 'i' || *First == 'e' || *First == 'm') - return lexModule(First, End); - if (*First == '_') { if (isNextIdentifierOrSkipLine("_Pragma", First, End)) return lex_Pragma(First, End); @@ -925,6 +915,14 @@ bool Scanner::lexPPLine(const char *&First, const char *const End) { auto ScEx2 = make_scope_exit( [&]() { TheLexer.setParsingPreprocessorDirective(false); }); + // Handle "@import". + if (*First == '@') + return lexAt(First, End); + + // Handle module directives for C++20 modules. + if (*First == 'i' || *First == 'e' || *First == 'm') + return lexModule(First, End); + // Lex '#'. const dependency_directives_scan::Token &HashTok = lexToken(First, End); if (HashTok.is(tok::hashhash)) { diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp index a28989e6cf174..1f1dea58a6cc5 100644 --- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp +++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp @@ -639,15 +639,12 @@ TEST(MinimizeSourceToDependencyDirectivesTest, AtImport) { ASSERT_FALSE(minimizeSourceToDependencyDirectives(" @ import A;\n", Out)); EXPECT_STREQ("@import A;\n", Out.data()); - ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A\n;", Out)); - EXPECT_STREQ("@import A;\n", Out.data()); - ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A.B;\n", Out)); EXPECT_STREQ("@import A.B;\n", Out.data()); ASSERT_FALSE(minimizeSourceToDependencyDirectives( - "@import /*x*/ A /*x*/ . /*x*/ B /*x*/ \n /*x*/ ; /*x*/", Out)); - EXPECT_STREQ("@import A.B;\n", Out.data()); + "@import /*x*/ A /*x*/ . /*x*/ B /*x*/ \\n /*x*/ ; /*x*/", Out)); + EXPECT_STREQ("@import A.B\\n;\n", Out.data()); } TEST(MinimizeSourceToDependencyDirectivesTest, EmptyIncludesAndImports) { @@ -1122,11 +1119,17 @@ ort \ )"; ASSERT_FALSE( minimizeSourceToDependencyDirectives(Source, Out, Tokens, Directives)); - EXPECT_STREQ("module\n;#include \"textual-header.h\"\nexport module m;" - "exp\\\nort import:l[[rename]];" - "import<<=3;import a b d e d e f e;" - "import foo[[no_unique_address]];import foo();" - "import f(:sefse);import f(->a=3);" + + EXPECT_STREQ("module;\n" + "#include \"textual-header.h\"\n" + "export module m;\n" + "exp\\\nort import:l[[rename]];\n" + "import<<=3;\n" + "import a b d e d e f e;\n" + "import foo[[no_unique_address]];\n" + "import foo();\n" + "import f(:sefse);\n" + "import f(->a=3);\n" "\n", Out.data()); ASSERT_EQ(Directives.size(), 12u); From e9a6b45800d62e5b929e350a85065c4204028857 Mon Sep 17 00:00:00 2001 From: Naveen Seth Hanig Date: Fri, 18 Jul 2025 03:50:01 +0200 Subject: [PATCH 4/6] Address akyrtzi's review feedback --- clang/unittests/Lex/DependencyDirectivesScannerTest.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp index 1f1dea58a6cc5..2bfb5113daff1 100644 --- a/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp +++ b/clang/unittests/Lex/DependencyDirectivesScannerTest.cpp @@ -639,6 +639,9 @@ TEST(MinimizeSourceToDependencyDirectivesTest, AtImport) { ASSERT_FALSE(minimizeSourceToDependencyDirectives(" @ import A;\n", Out)); EXPECT_STREQ("@import A;\n", Out.data()); + ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A\n;", Out)); + EXPECT_STREQ("@import A\n;\n", Out.data()); + ASSERT_FALSE(minimizeSourceToDependencyDirectives("@import A.B;\n", Out)); EXPECT_STREQ("@import A.B;\n", Out.data()); From e755d18c1cad4273f1399434fa3c8556e4c2235b Mon Sep 17 00:00:00 2001 From: Naveen Seth Hanig Date: Fri, 18 Jul 2025 02:47:27 +0200 Subject: [PATCH 5/6] For review only: additional `TryConsumeToken(tok::eod)` Lines following the comment 'Note: Not required to pass CI' can be removed without the CI failing. The `TryConsumeToken(tok::eod)` is not required in those places, even though we know that a tok::eod follows. This is only for the patch review. --- clang/lib/Lex/Preprocessor.cpp | 2 ++ clang/lib/Parse/Parser.cpp | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/clang/lib/Lex/Preprocessor.cpp b/clang/lib/Lex/Preprocessor.cpp index bcd3ea60ce3da..e278846f6f36d 100644 --- a/clang/lib/Lex/Preprocessor.cpp +++ b/clang/lib/Lex/Preprocessor.cpp @@ -950,6 +950,8 @@ void Preprocessor::Lex(Token &Result) { case tok::period: ModuleDeclState.handlePeriod(); break; + case tok::eod: + break; case tok::identifier: // Check "import" and "module" when there is no open bracket. The two // identifiers are not meaningful with open brackets. diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp index 8834bf80c4016..9c6b1e223fcb1 100644 --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -2361,6 +2361,8 @@ Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) { // Parse a global-module-fragment, if present. if (getLangOpts().CPlusPlusModules && Tok.is(tok::semi)) { SourceLocation SemiLoc = ConsumeToken(); + // Note: Not required to pass CI + TryConsumeToken(tok::eod); if (!Introducer.isFirstPPToken()) { Diag(StartLoc, diag::err_global_module_introducer_not_at_start) << SourceRange(StartLoc, SemiLoc); @@ -2385,6 +2387,8 @@ Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) { SourceLocation PrivateLoc = ConsumeToken(); DiagnoseAndSkipCXX11Attributes(); ExpectAndConsumeSemi(diag::err_private_module_fragment_expected_semi); + // Note: Not required to pass CI + TryConsumeToken(tok::eod); ImportState = ImportState == Sema::ModuleImportState::ImportAllowed ? Sema::ModuleImportState::PrivateFragmentImportAllowed : Sema::ModuleImportState::PrivateFragmentImportFinished; @@ -2416,6 +2420,8 @@ Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) { /*WarnOnUnknownAttrs=*/true); ExpectAndConsumeSemi(diag::err_module_expected_semi); + // Note: Not required to pass CI + TryConsumeToken(tok::eod); return Actions.ActOnModuleDecl(StartLoc, ModuleLoc, MDK, Path, Partition, ImportState, Introducer.isFirstPPToken()); @@ -2519,6 +2525,7 @@ Decl *Parser::ParseModuleImport(SourceLocation AtLoc, break; } ExpectAndConsumeSemi(diag::err_module_expected_semi); + TryConsumeToken(tok::eod); if (SeenError) return nullptr; @@ -2560,6 +2567,8 @@ bool Parser::ParseModuleName(SourceLocation UseLoc, Diag(Tok, diag::err_module_expected_ident) << IsImport; SkipUntil(tok::semi); + // Note: Not required to pass CI + TryConsumeToken(tok::eod); return true; } @@ -2571,6 +2580,8 @@ bool Parser::ParseModuleName(SourceLocation UseLoc, return false; ConsumeToken(); + // Note: Not required to pass CI + TryConsumeToken(tok::eod); } } From 0bc7245a5f5543802d9e11510b21ed254ea8a2da Mon Sep 17 00:00:00 2001 From: Naveen Seth Hanig Date: Fri, 18 Jul 2025 03:54:49 +0200 Subject: [PATCH 6/6] Update Parser and Preprocessor to handle the added tok::eod This commit removes the extra `TryConsumeToken(tok::eod)` which were added in e755d18c1cad and which are not explicitly required for this fix or to pass the CI. --- clang/lib/Parse/Parser.cpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/clang/lib/Parse/Parser.cpp b/clang/lib/Parse/Parser.cpp index 9c6b1e223fcb1..ff50b3f83908c 100644 --- a/clang/lib/Parse/Parser.cpp +++ b/clang/lib/Parse/Parser.cpp @@ -2361,8 +2361,6 @@ Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) { // Parse a global-module-fragment, if present. if (getLangOpts().CPlusPlusModules && Tok.is(tok::semi)) { SourceLocation SemiLoc = ConsumeToken(); - // Note: Not required to pass CI - TryConsumeToken(tok::eod); if (!Introducer.isFirstPPToken()) { Diag(StartLoc, diag::err_global_module_introducer_not_at_start) << SourceRange(StartLoc, SemiLoc); @@ -2387,8 +2385,6 @@ Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) { SourceLocation PrivateLoc = ConsumeToken(); DiagnoseAndSkipCXX11Attributes(); ExpectAndConsumeSemi(diag::err_private_module_fragment_expected_semi); - // Note: Not required to pass CI - TryConsumeToken(tok::eod); ImportState = ImportState == Sema::ModuleImportState::ImportAllowed ? Sema::ModuleImportState::PrivateFragmentImportAllowed : Sema::ModuleImportState::PrivateFragmentImportFinished; @@ -2420,8 +2416,6 @@ Parser::ParseModuleDecl(Sema::ModuleImportState &ImportState) { /*WarnOnUnknownAttrs=*/true); ExpectAndConsumeSemi(diag::err_module_expected_semi); - // Note: Not required to pass CI - TryConsumeToken(tok::eod); return Actions.ActOnModuleDecl(StartLoc, ModuleLoc, MDK, Path, Partition, ImportState, Introducer.isFirstPPToken()); @@ -2567,8 +2561,6 @@ bool Parser::ParseModuleName(SourceLocation UseLoc, Diag(Tok, diag::err_module_expected_ident) << IsImport; SkipUntil(tok::semi); - // Note: Not required to pass CI - TryConsumeToken(tok::eod); return true; } @@ -2580,8 +2572,6 @@ bool Parser::ParseModuleName(SourceLocation UseLoc, return false; ConsumeToken(); - // Note: Not required to pass CI - TryConsumeToken(tok::eod); } }