diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 49da3160daf50..ea51f88f2d975 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -1198,7 +1198,7 @@ void FormatTokenLexer::truncateToken(size_t NewLen) { /// Count the length of leading whitespace in a token. static size_t countLeadingWhitespace(StringRef Text) { // Basically counting the length matched by this regex. - // "^([\n\r\f\v \t]|(\\\\|\\?\\?/)[\n\r])+" + // "^([\n\r\f\v \t]|\\\\[\n\r])+" // Directly using the regex turned out to be slow. With the regex // version formatting all files in this directory took about 1.25 // seconds. This version took about 0.5 seconds. @@ -1222,13 +1222,6 @@ static size_t countLeadingWhitespace(StringRef Text) { break; // Splice found, consume it. Cur = Lookahead + 1; - } else if (Cur[0] == '?' && Cur[1] == '?' && Cur[2] == '/' && - (Cur[3] == '\n' || Cur[3] == '\r')) { - // Newlines can also be escaped by a '?' '?' '/' trigraph. By the way, the - // characters are quoted individually in this comment because if we write - // them together some compilers warn that we have a trigraph in the code. - assert(End - Cur >= 4); - Cur += 4; } else { break; } @@ -1300,22 +1293,16 @@ FormatToken *FormatTokenLexer::getNextToken() { Style.TabWidth - (Style.TabWidth ? Column % Style.TabWidth : 0); break; case '\\': - case '?': - case '/': - // The text was entirely whitespace when this loop was entered. Thus - // this has to be an escape sequence. - assert(Text.substr(i, 4) == "\?\?/\r" || - Text.substr(i, 4) == "\?\?/\n" || - (i >= 1 && (Text.substr(i - 1, 4) == "\?\?/\r" || - Text.substr(i - 1, 4) == "\?\?/\n")) || - (i >= 2 && (Text.substr(i - 2, 4) == "\?\?/\r" || - Text.substr(i - 2, 4) == "\?\?/\n")) || - (Text[i] == '\\' && [&]() -> bool { - size_t j = i + 1; - while (j < Text.size() && isHorizontalWhitespace(Text[j])) - ++j; - return j < Text.size() && (Text[j] == '\n' || Text[j] == '\r'); - }())); + // The code preceding the loop and in the countLeadingWhitespace + // function guarantees that Text is entirely whitespace, not including + // comments but including escaped newlines. So if 1 of these characters + // show up, then it has to be in an escape sequence. + assert(Text[i] == '\\' && [&]() -> bool { + size_t j = i + 1; + while (j < Text.size() && isHorizontalWhitespace(Text[j])) + ++j; + return j < Text.size() && (Text[j] == '\n' || Text[j] == '\r'); + }()); InEscape = true; break; default: diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 0bc1c6d45656e..46e55da5b5e11 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -6681,6 +6681,17 @@ TEST_F(FormatTest, EscapedNewlines) { " int x(int a);", AlignLeft); + // Escaped with a trigraph. The program just has to avoid crashing. + verifyNoCrash("#define A \?\?/\n" + "int i;\?\?/\n" + " int j;"); + verifyNoCrash("#define A \?\?/\r\n" + "int i;\?\?/\r\n" + " int j;"); + verifyNoCrash("#define A \?\?/\n" + "int i;", + getGoogleStyle(FormatStyle::LK_CSharp)); + // CRLF line endings verifyFormat("#define A \\\r\n int i; \\\r\n int j;", "#define A \\\r\nint i;\\\r\n int j;", Narrow);