Skip to content

Commit

Permalink
skip escaped characters in special comment scanner
Browse files Browse the repository at this point in the history
  • Loading branch information
clonker committed Jul 1, 2024
1 parent fc80f66 commit 96c162f
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 41 deletions.
46 changes: 9 additions & 37 deletions liblangutil/Scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -759,13 +759,12 @@ void Scanner::scanToken()
m_tokens[NextNext].extendedTokenInfo = std::make_tuple(m, n);
}

bool Scanner::scanEscape(bool const _rejectInvalidEscapes)
bool Scanner::scanEscape()
{
char c = m_char;

// Normally we ignore the slash just before a newline since it's meaningless.
// In the case of not rejecting invalid escapes, though, we preserve it.
if (_rejectInvalidEscapes && tryScanEndOfLine())
// Skip escaped newlines.
if (tryScanEndOfLine())
return true;
advance();

Expand All @@ -787,42 +786,17 @@ bool Scanner::scanEscape(bool const _rejectInvalidEscapes)
case 'u':
{
if (auto const codepoint = scanUnicode(); codepoint.has_value())
{
addUnicodeAsUTF8(*codepoint);
return true;
}
else if (_rejectInvalidEscapes)
return false;
else
{
addLiteralChar('\\');
addLiteralChar(c);
return true;
}
return false;
return true;
}
case 'x':
if (_rejectInvalidEscapes)
{
if (!scanHexByte(c))
return false;
}
else
if (!scanHexByte(c))
{
addLiteralChar('\\');
addLiteralChar(c);
return true;
}
if (!scanHexByte(c))
return false;
break;
default:
if (_rejectInvalidEscapes)
return false;
else
{
addLiteralChar('\\');
addLiteralChar(c);
return true;
}
return false;
}

addLiteralChar(c);
Expand Down Expand Up @@ -864,9 +838,7 @@ Token Scanner::scanString(bool const _isUnicode)
{
if (isSourcePastEndOfInput())
return setError(ScannerError::IllegalEscapeSequence);
bool const validEscape = scanEscape(false /* _rejectInvalidEscapes */);
// there are no invalid escapes in special comments except unterminated backslash at eos
solAssert(validEscape);
advance();
}
else
addLiteralChar(c);
Expand Down
2 changes: 1 addition & 1 deletion liblangutil/Scanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ class Scanner
/// Scans an escape-sequence which is part of a string and adds the
/// decoded character to the current literal. Returns true if a pattern
/// is scanned.
bool scanEscape(bool _rejectInvalidEscapes = true);
bool scanEscape();

/// @returns true iff we are currently positioned at a unicode line break.
bool isUnicodeLinebreak();
Expand Down
15 changes: 12 additions & 3 deletions test/liblangutil/Scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1029,7 +1029,7 @@ BOOST_AUTO_TEST_CASE(yul_function_with_whitespace)
BOOST_AUTO_TEST_CASE(special_comment_with_invalid_escapes)
{
std::string input(R"("test\x\f\g\u\g\a\u\g\a12\uö\xyoof")");
std::string expectedOutput(R"(test\x\f\g\u\g\a\u\g\a12\uö\xyoof)");
std::string expectedOutput(R"(test12öyoof)");
CharStream stream(input, "");
Scanner scanner(stream, ScannerKind::SpecialComment);
BOOST_REQUIRE(scanner.currentToken() == Token::StringLiteral);
Expand All @@ -1039,8 +1039,7 @@ BOOST_AUTO_TEST_CASE(special_comment_with_invalid_escapes)
BOOST_AUTO_TEST_CASE(special_comment_with_valid_and_invalid_escapes)
{
std::string input(R"("test\n\x61\t\u01A9test\f")");
std::string expectedOutput(R"(test
a Ʃtest\f)");
std::string expectedOutput(R"(test6101A9test)");
CharStream stream(input, "");
Scanner scanner(stream, ScannerKind::SpecialComment);
BOOST_REQUIRE(scanner.currentToken() == Token::StringLiteral);
Expand All @@ -1050,11 +1049,21 @@ a Ʃtest\f)");
BOOST_AUTO_TEST_CASE(special_comment_with_unterminated_escape_sequence_at_eos)
{
CharStream stream(R"("test\)", "");
std::string expectedOutput(R"(test6101A9test)");
Scanner scanner(stream, ScannerKind::SpecialComment);
BOOST_REQUIRE(scanner.currentToken() == Token::Illegal);
BOOST_REQUIRE(scanner.currentError() == ScannerError::IllegalEscapeSequence);
}

BOOST_AUTO_TEST_CASE(special_comment_with_escaped_quotes)
{
CharStream stream(R"("test\\\"")", "");
std::string expectedOutput(R"(test)");
Scanner scanner(stream, ScannerKind::SpecialComment);
BOOST_REQUIRE(scanner.currentToken() == Token::StringLiteral);
BOOST_REQUIRE(scanner.currentLiteral() == expectedOutput);
}

BOOST_AUTO_TEST_CASE(special_comment_with_unterminated_string)
{
CharStream stream(R"("test)", "");
Expand Down

0 comments on commit 96c162f

Please sign in to comment.