Skip to content

Commit d46fa02

Browse files
committed
[clang-format] SortIncludes should support "@import" lines in Objective-C
Fixes [[ #38995 | #38995 ]] This is an attempt to modify the regular expression to identify `@import` and `import` alongside the regular `#include`. The challenging part was not to support `@` in addition to `#` but how to handle everything that comes after the `include|import` keywords. Previously everything that wasn't `"` or `<` was consumed. But as you can see in this example from the issue #38995, there is no `"` or `<` following the keyword: ``` @import Foundation; ``` I experimented with a lot of fancy and useful expressions in [this online regex tool](https://regex101.com) only to find out that some things are simply not supported by the regex implementation in LLVM. * For example the beginning `[\t\ ]*` should be replacable by the horizontal whitespace character `\h*` but this will break the `SortIncludesTest.LeadingWhitespace` test. That's why I've chosen to come back to the basic building blocks. The essential change in this patch is the change from this regular expression: ``` ^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]) ~ ~~~~~~~~~~~~~~ ^ ^ | | only support # prefix not @ | only support "" and <> as delimiters no support for C++ modules and ; ending. Also this allows for "> or <" or "" or <> which all seems either off or wrong. ``` to this: ``` ^[\t\ ]*[@#][\t\ ]*(import|include)([^"]*("[^"]+")|[^<]*(<[^>]+>)|[\t\ ]*([^;]+;)) ~~~~ ~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~ ^ ^ ^ ^ ^ | | | | | Now support @ and #. Clearly support "" and <> as well as an include name without enclosing characters. Allows for no mixture of "> or <" or empty include names. ``` Here is how I've tested this patch: ``` ninja clang-Format ninja FormatTests ./tools/clang/unittests/Format/FormatTests --gtest_filter=SortIncludesTest* ``` And if that worked I doubled checked that nothing else broke by running all format checks: ``` ./tools/clang/unittests/Format/FormatTests ``` One side effect of this change is it should partially support [C++20 Module](https://en.cppreference.com/w/cpp/language/modules) `import` lines without the optional `export` in front. Adding this can be a change on its own that shouldn't be too hard. I say partially because the `@` or `#` are currently *NOT* optional in the regular expression. I see an opportunity to optimized the matching to exclude `@include` for example. But eventually these should be caught by the compiler, so... With my change, the matching group is not at a fixed position any longer. I decided to choose the last match (group) that is not empty. Reviewed By: HazardyKnusperkeks Differential Revision: https://reviews.llvm.org/D121370
1 parent 57d1779 commit d46fa02

File tree

4 files changed

+158
-29
lines changed

4 files changed

+158
-29
lines changed

clang/include/clang/Tooling/Inclusions/HeaderIncludes.h

+17
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,23 @@ class HeaderIncludes {
129129
llvm::Regex IncludeRegex;
130130
};
131131

132+
/// \returns a regex that can match various styles of C++ includes.
133+
/// For example:
134+
/// \code
135+
/// #include <foo.h>
136+
/// @import bar;
137+
/// #include "bar.h"
138+
/// \endcode
139+
llvm::Regex getCppIncludeRegex();
140+
141+
/// \returns the last match in the list of matches that is not empty.
142+
llvm::StringRef getIncludeNameFromMatches(
143+
const llvm::SmallVectorImpl<llvm::StringRef> &Matches);
144+
145+
/// \returns the given include name and removes the following symbols from the
146+
/// beginning and ending of the include name: " > < ;
147+
llvm::StringRef trimInclude(llvm::StringRef IncludeName);
148+
132149
} // namespace tooling
133150
} // namespace clang
134151

clang/lib/Format/Format.cpp

+21-19
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include "llvm/Support/VirtualFileSystem.h"
4545
#include "llvm/Support/YAMLTraits.h"
4646
#include <algorithm>
47+
#include <limits>
4748
#include <memory>
4849
#include <mutex>
4950
#include <string>
@@ -2721,13 +2722,6 @@ static void sortCppIncludes(const FormatStyle &Style,
27212722
}
27222723
}
27232724

2724-
namespace {
2725-
2726-
const char CppIncludeRegexPattern[] =
2727-
R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))";
2728-
2729-
} // anonymous namespace
2730-
27312725
tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code,
27322726
ArrayRef<tooling::Range> Ranges,
27332727
StringRef FileName,
@@ -2737,7 +2731,7 @@ tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code,
27372731
.StartsWith("\xEF\xBB\xBF", 3) // UTF-8 BOM
27382732
.Default(0);
27392733
unsigned SearchFrom = 0;
2740-
llvm::Regex IncludeRegex(CppIncludeRegexPattern);
2734+
llvm::Regex IncludeRegex(tooling::getCppIncludeRegex());
27412735
SmallVector<StringRef, 4> Matches;
27422736
SmallVector<IncludeDirective, 16> IncludesInBlock;
27432737

@@ -2793,7 +2787,14 @@ tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code,
27932787
bool MergeWithNextLine = Trimmed.endswith("\\");
27942788
if (!FormattingOff && !MergeWithNextLine) {
27952789
if (IncludeRegex.match(Line, &Matches)) {
2796-
StringRef IncludeName = Matches[2];
2790+
StringRef IncludeName = tooling::getIncludeNameFromMatches(Matches);
2791+
// This addresses https://github.com/llvm/llvm-project/issues/38995
2792+
bool WithSemicolon = false;
2793+
if (!IncludeName.startswith("\"") && !IncludeName.startswith("<") &&
2794+
IncludeName.endswith(";")) {
2795+
WithSemicolon = true;
2796+
}
2797+
27972798
if (Line.contains("/*") && !Line.contains("*/")) {
27982799
// #include with a start of a block comment, but without the end.
27992800
// Need to keep all the lines until the end of the comment together.
@@ -2806,8 +2807,10 @@ tooling::Replacements sortCppIncludes(const FormatStyle &Style, StringRef Code,
28062807
int Category = Categories.getIncludePriority(
28072808
IncludeName,
28082809
/*CheckMainHeader=*/!MainIncludeFound && FirstIncludeBlock);
2809-
int Priority = Categories.getSortIncludePriority(
2810-
IncludeName, !MainIncludeFound && FirstIncludeBlock);
2810+
int Priority = WithSemicolon ? std::numeric_limits<int>::max()
2811+
: Categories.getSortIncludePriority(
2812+
IncludeName, !MainIncludeFound &&
2813+
FirstIncludeBlock);
28112814
if (Category == 0)
28122815
MainIncludeFound = true;
28132816
IncludesInBlock.push_back(
@@ -3067,16 +3070,15 @@ namespace {
30673070

30683071
inline bool isHeaderInsertion(const tooling::Replacement &Replace) {
30693072
return Replace.getOffset() == UINT_MAX && Replace.getLength() == 0 &&
3070-
llvm::Regex(CppIncludeRegexPattern)
3071-
.match(Replace.getReplacementText());
3073+
tooling::getCppIncludeRegex().match(Replace.getReplacementText());
30723074
}
30733075

30743076
inline bool isHeaderDeletion(const tooling::Replacement &Replace) {
30753077
return Replace.getOffset() == UINT_MAX && Replace.getLength() == 1;
30763078
}
30773079

30783080
// FIXME: insert empty lines between newly created blocks.
3079-
tooling::Replacements
3081+
static tooling::Replacements
30803082
fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces,
30813083
const FormatStyle &Style) {
30823084
if (!Style.isCpp())
@@ -3108,7 +3110,7 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces,
31083110

31093111
for (const auto &Header : HeadersToDelete) {
31103112
tooling::Replacements Replaces =
3111-
Includes.remove(Header.trim("\"<>"), Header.startswith("<"));
3113+
Includes.remove(tooling::trimInclude(Header), Header.startswith("<"));
31123114
for (const auto &R : Replaces) {
31133115
auto Err = Result.add(R);
31143116
if (Err) {
@@ -3120,17 +3122,17 @@ fixCppIncludeInsertions(StringRef Code, const tooling::Replacements &Replaces,
31203122
}
31213123
}
31223124

3123-
llvm::Regex IncludeRegex = llvm::Regex(CppIncludeRegexPattern);
3125+
llvm::Regex IncludeRegex = tooling::getCppIncludeRegex();
31243126
llvm::SmallVector<StringRef, 4> Matches;
31253127
for (const auto &R : HeaderInsertions) {
31263128
auto IncludeDirective = R.getReplacementText();
31273129
bool Matched = IncludeRegex.match(IncludeDirective, &Matches);
31283130
assert(Matched && "Header insertion replacement must have replacement text "
31293131
"'#include ...'");
31303132
(void)Matched;
3131-
auto IncludeName = Matches[2];
3132-
auto Replace =
3133-
Includes.insert(IncludeName.trim("\"<>"), IncludeName.startswith("<"));
3133+
StringRef IncludeName = tooling::getIncludeNameFromMatches(Matches);
3134+
auto Replace = Includes.insert(tooling::trimInclude(IncludeName),
3135+
IncludeName.startswith("<"));
31343136
if (Replace) {
31353137
auto Err = Result.add(*Replace);
31363138
if (Err) {

clang/lib/Tooling/Inclusions/HeaderIncludes.cpp

+23-10
Original file line numberDiff line numberDiff line change
@@ -169,13 +169,6 @@ unsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code,
169169
});
170170
}
171171

172-
inline StringRef trimInclude(StringRef IncludeName) {
173-
return IncludeName.trim("\"<>");
174-
}
175-
176-
const char IncludeRegexPattern[] =
177-
R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))";
178-
179172
// The filename of Path excluding extension.
180173
// Used to match implementation with headers, this differs from sys::path::stem:
181174
// - in names with multiple dots (foo.cu.cc) it terminates at the *first*
@@ -274,8 +267,7 @@ HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code,
274267
MaxInsertOffset(MinInsertOffset +
275268
getMaxHeaderInsertionOffset(
276269
FileName, Code.drop_front(MinInsertOffset), Style)),
277-
Categories(Style, FileName),
278-
IncludeRegex(llvm::Regex(IncludeRegexPattern)) {
270+
Categories(Style, FileName), IncludeRegex(getCppIncludeRegex()) {
279271
// Add 0 for main header and INT_MAX for headers that are not in any
280272
// category.
281273
Priorities = {0, INT_MAX};
@@ -290,10 +282,11 @@ HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code,
290282
for (auto Line : Lines) {
291283
NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1);
292284
if (IncludeRegex.match(Line, &Matches)) {
285+
StringRef IncludeName = tooling::getIncludeNameFromMatches(Matches);
293286
// If this is the last line without trailing newline, we need to make
294287
// sure we don't delete across the file boundary.
295288
addExistingInclude(
296-
Include(Matches[2],
289+
Include(IncludeName,
297290
tooling::Range(
298291
Offset, std::min(Line.size() + 1, Code.size() - Offset))),
299292
NextLineOffset);
@@ -403,5 +396,25 @@ tooling::Replacements HeaderIncludes::remove(llvm::StringRef IncludeName,
403396
return Result;
404397
}
405398

399+
llvm::Regex getCppIncludeRegex() {
400+
static const char CppIncludeRegexPattern[] =
401+
R"(^[\t\ ]*[@#][\t\ ]*(import|include)([^"]*("[^"]+")|[^<]*(<[^>]+>)|[\t\ ]*([^;]+;)))";
402+
return llvm::Regex(CppIncludeRegexPattern);
403+
}
404+
405+
llvm::StringRef getIncludeNameFromMatches(
406+
const llvm::SmallVectorImpl<llvm::StringRef> &Matches) {
407+
for (auto Match : llvm::reverse(Matches)) {
408+
if (!Match.empty())
409+
return Match;
410+
}
411+
llvm_unreachable("No non-empty match group found in list of matches");
412+
return llvm::StringRef();
413+
}
414+
415+
llvm::StringRef trimInclude(llvm::StringRef IncludeName) {
416+
return IncludeName.trim("\"<>;");
417+
}
418+
406419
} // namespace tooling
407420
} // namespace clang

clang/unittests/Format/SortIncludesTest.cpp

+97
Original file line numberDiff line numberDiff line change
@@ -458,6 +458,103 @@ TEST_F(SortIncludesTest, HandlesMultilineIncludes) {
458458
"#include \"b.h\"\n"));
459459
}
460460

461+
TEST_F(SortIncludesTest, SupportAtImportLines) {
462+
// Test from https://github.com/llvm/llvm-project/issues/38995
463+
EXPECT_EQ("#import \"a.h\"\n"
464+
"#import \"b.h\"\n"
465+
"#import \"c.h\"\n"
466+
"#import <d/e.h>\n"
467+
"@import Foundation;\n",
468+
sort("#import \"b.h\"\n"
469+
"#import \"c.h\"\n"
470+
"#import <d/e.h>\n"
471+
"@import Foundation;\n"
472+
"#import \"a.h\"\n"));
473+
474+
// Slightly more complicated test that shows sorting in each priorities still
475+
// works.
476+
EXPECT_EQ("#import \"a.h\"\n"
477+
"#import \"b.h\"\n"
478+
"#import \"c.h\"\n"
479+
"#import <d/e.h>\n"
480+
"@import Base;\n"
481+
"@import Foundation;\n"
482+
"@import base;\n"
483+
"@import foundation;\n",
484+
sort("#import \"b.h\"\n"
485+
"#import \"c.h\"\n"
486+
"@import Base;\n"
487+
"#import <d/e.h>\n"
488+
"@import foundation;\n"
489+
"@import Foundation;\n"
490+
"@import base;\n"
491+
"#import \"a.h\"\n"));
492+
493+
// Test that shows main headers in two groups are still found and sorting
494+
// still works. The @import's are kept in their respective group but are
495+
// put at the end of each group.
496+
FmtStyle.IncludeStyle.IncludeBlocks = tooling::IncludeStyle::IBS_Preserve;
497+
EXPECT_EQ("#import \"foo.hpp\"\n"
498+
"#import \"b.h\"\n"
499+
"#import \"c.h\"\n"
500+
"#import <d/e.h>\n"
501+
"@import Base;\n"
502+
"@import Foundation;\n"
503+
"@import foundation;\n"
504+
"\n"
505+
"#import \"foo.h\"\n"
506+
"#include \"foobar\"\n"
507+
"#import <f/g.h>\n"
508+
"@import AAAA;\n"
509+
"@import aaaa;\n",
510+
sort("#import \"b.h\"\n"
511+
"@import Foundation;\n"
512+
"@import foundation;\n"
513+
"#import \"c.h\"\n"
514+
"#import <d/e.h>\n"
515+
"@import Base;\n"
516+
"#import \"foo.hpp\"\n"
517+
"\n"
518+
"@import aaaa;\n"
519+
"#import <f/g.h>\n"
520+
"@import AAAA;\n"
521+
"#include \"foobar\"\n"
522+
"#import \"foo.h\"\n",
523+
"foo.c", 2));
524+
525+
// Regrouping and putting @import's in the very last group
526+
FmtStyle.IncludeStyle.IncludeBlocks = tooling::IncludeStyle::IBS_Regroup;
527+
EXPECT_EQ("#import \"foo.hpp\"\n"
528+
"\n"
529+
"#import \"b.h\"\n"
530+
"#import \"c.h\"\n"
531+
"#import \"foo.h\"\n"
532+
"#include \"foobar\"\n"
533+
"\n"
534+
"#import <d/e.h>\n"
535+
"#import <f/g.h>\n"
536+
"\n"
537+
"@import AAAA;\n"
538+
"@import Base;\n"
539+
"@import Foundation;\n"
540+
"@import aaaa;\n"
541+
"@import foundation;\n",
542+
sort("#import \"b.h\"\n"
543+
"@import Foundation;\n"
544+
"@import foundation;\n"
545+
"#import \"c.h\"\n"
546+
"#import <d/e.h>\n"
547+
"@import Base;\n"
548+
"#import \"foo.hpp\"\n"
549+
"\n"
550+
"@import aaaa;\n"
551+
"#import <f/g.h>\n"
552+
"@import AAAA;\n"
553+
"#include \"foobar\"\n"
554+
"#import \"foo.h\"\n",
555+
"foo.c"));
556+
}
557+
461558
TEST_F(SortIncludesTest, LeavesMainHeaderFirst) {
462559
Style.IncludeIsMainRegex = "([-_](test|unittest))?$";
463560
EXPECT_EQ("#include \"llvm/a.h\"\n"

0 commit comments

Comments
 (0)