Skip to content

Commit

Permalink
handle dangling backslashes in special comments and some cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
clonker committed Jul 1, 2024
1 parent a0cea0c commit 1492eb5
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 15 deletions.
4 changes: 2 additions & 2 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
Language Features:
* Accept declarations of state variables with ``transient`` data location (parser support only, no code generation yet).
* Make ``require(bool, Error)`` available when using the legacy pipeline.
* Yul: Parsing rules for source location comments have been relaxed: Whitespace between the indices as well as single-quoted code snippets are now allowed.
* Yul: Parsing rules for source location comments have been relaxed: Whitespace between the location components as well as single-quoted code snippets are now allowed.


Compiler Features:
Expand All @@ -14,12 +14,12 @@ Compiler Features:


Bugfixes:
* AsmParser: Alleviates risk of encountering a segfault for very long comments.
* SMTChecker: Fix error that reports invalid number of verified checks for BMC and CHC engines.
* SMTChecker: Fix formatting of unary minus expressions in invariants.
* SMTChecker: Fix internal compiler error when reporting proved targets for BMC engine.
* TypeChecker: Fix segfault when assigning nested tuple to tuple.
* Yul Optimizer: Name simplification could lead to forbidden identifiers with a leading and/or trailing dot, e.g., ``x._`` would get simplified into ``x.``.
* Yul Parser: Fix segfault when parsing very long location comments.


### 0.8.26 (2024-05-21)
Expand Down
21 changes: 14 additions & 7 deletions liblangutil/Scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -759,12 +759,13 @@ void Scanner::scanToken()
m_tokens[NextNext].extendedTokenInfo = std::make_tuple(m, n);
}

bool Scanner::scanEscape()
bool Scanner::scanEscape(bool const _rejectInvalidEscapes)
{
char c = m_char;

// Skip escaped newlines.
if (m_kind != ScannerKind::SpecialComment && tryScanEndOfLine())
// Normally we ignore the slash just before a newline since it's meaningless.
// In the case of not rejecting invalid escapes, though, we preserve it.
if (_rejectInvalidEscapes && tryScanEndOfLine())
return true;
advance();

Expand All @@ -790,7 +791,7 @@ bool Scanner::scanEscape()
addUnicodeAsUTF8(*codepoint);
return true;
}
else if (m_kind != ScannerKind::SpecialComment)
else if (_rejectInvalidEscapes)
return false;
else
{
Expand All @@ -800,7 +801,7 @@ bool Scanner::scanEscape()
}
}
case 'x':
if (m_kind != ScannerKind::SpecialComment)
if (_rejectInvalidEscapes)
{
if (!scanHexByte(c))
return false;
Expand All @@ -814,7 +815,7 @@ bool Scanner::scanEscape()
}
break;
default:
if (m_kind != ScannerKind::SpecialComment)
if (_rejectInvalidEscapes)
return false;
else
{
Expand Down Expand Up @@ -860,7 +861,13 @@ Token Scanner::scanString(bool const _isUnicode)
if (m_kind == ScannerKind::SpecialComment)
{
if (c == '\\')
scanEscape();
{
if (isSourcePastEndOfInput())
return setError(ScannerError::IllegalEscapeSequence);
bool const validEscape = scanEscape(false /* _rejectInvalidEscapes */);
// there are no invalid escapes in special comments except unterminated backslash at eos
solAssert(validEscape);
}
else
addLiteralChar(c);
}
Expand Down
2 changes: 1 addition & 1 deletion liblangutil/Scanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ class Scanner
/// Scans an escape-sequence which is part of a string and adds the
/// decoded character to the current literal. Returns true if a pattern
/// is scanned.
bool scanEscape();
bool scanEscape(bool _rejectInvalidEscapes = true);

/// @returns true iff we are currently positioned at a unicode line break.
bool isUnicodeLinebreak();
Expand Down
10 changes: 9 additions & 1 deletion test/liblangutil/Scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1047,11 +1047,19 @@ a Ʃtest\f)");
BOOST_REQUIRE(scanner.currentLiteral() == expectedOutput);
}

BOOST_AUTO_TEST_CASE(special_comment_with_unterminated_string)
BOOST_AUTO_TEST_CASE(special_comment_with_unterminated_escape_sequence_at_eos)
{
CharStream stream(R"("test\)", "");
Scanner scanner(stream, ScannerKind::SpecialComment);
BOOST_REQUIRE(scanner.currentToken() == Token::Illegal);
BOOST_REQUIRE(scanner.currentError() == ScannerError::IllegalEscapeSequence);
}

BOOST_AUTO_TEST_CASE(special_comment_with_unterminated_string)
{
CharStream stream(R"("test)", "");
Scanner scanner(stream, ScannerKind::SpecialComment);
BOOST_REQUIRE(scanner.currentToken() == Token::Illegal);
BOOST_REQUIRE(scanner.currentError() == ScannerError::IllegalStringEndQuote);
}

Expand Down
4 changes: 0 additions & 4 deletions test/libyul/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -814,8 +814,6 @@ BOOST_AUTO_TEST_CASE(customSourceLocations_invalid_escapes)
EVMDialectTyped const& dialect = EVMDialectTyped::instance(EVMVersion{});
std::shared_ptr<Block> result = parse(sourceText, dialect, reporter);
BOOST_REQUIRE(!!result && errorList.size() == 0);
// the second source location is not parsed as such, as the hex string isn't interpreted as snippet but
// as the beginning of the tail in AsmParser
CHECK_LOCATION(result->debugData->originLocation, "source0", 111, 222);
}

Expand All @@ -831,8 +829,6 @@ BOOST_AUTO_TEST_CASE(customSourceLocations_single_quote_snippet_with_whitespaces
EVMDialectTyped const& dialect = EVMDialectTyped::instance(EVMVersion{});
std::shared_ptr<Block> result = parse(sourceText, dialect, reporter);
BOOST_REQUIRE(!!result && errorList.size() == 0);
// the second source location is not parsed as such, as the hex string isn't interpreted as snippet but
// as the beginning of the tail in AsmParser
CHECK_LOCATION(result->debugData->originLocation, "source1", 222, 333);
}

Expand Down

0 comments on commit 1492eb5

Please sign in to comment.