Skip to content

Commit 1492eb5

Browse files
committed
handle dangling backslashes in special comments and some cleanup
1 parent a0cea0c commit 1492eb5

File tree

5 files changed

+26
-15
lines changed

5 files changed

+26
-15
lines changed

Changelog.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
Language Features:
44
* Accept declarations of state variables with ``transient`` data location (parser support only, no code generation yet).
55
* Make ``require(bool, Error)`` available when using the legacy pipeline.
6-
* Yul: Parsing rules for source location comments have been relaxed: Whitespace between the indices as well as single-quoted code snippets are now allowed.
6+
* Yul: Parsing rules for source location comments have been relaxed: Whitespace between the location components as well as single-quoted code snippets are now allowed.
77

88

99
Compiler Features:
@@ -14,12 +14,12 @@ Compiler Features:
1414

1515

1616
Bugfixes:
17-
* AsmParser: Alleviates risk of encountering a segfault for very long comments.
1817
* SMTChecker: Fix error that reports invalid number of verified checks for BMC and CHC engines.
1918
* SMTChecker: Fix formatting of unary minus expressions in invariants.
2019
* SMTChecker: Fix internal compiler error when reporting proved targets for BMC engine.
2120
* TypeChecker: Fix segfault when assigning nested tuple to tuple.
2221
* Yul Optimizer: Name simplification could lead to forbidden identifiers with a leading and/or trailing dot, e.g., ``x._`` would get simplified into ``x.``.
22+
* Yul Parser: Fix segfault when parsing very long location comments.
2323

2424

2525
### 0.8.26 (2024-05-21)

liblangutil/Scanner.cpp

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -759,12 +759,13 @@ void Scanner::scanToken()
759759
m_tokens[NextNext].extendedTokenInfo = std::make_tuple(m, n);
760760
}
761761

762-
bool Scanner::scanEscape()
762+
bool Scanner::scanEscape(bool const _rejectInvalidEscapes)
763763
{
764764
char c = m_char;
765765

766-
// Skip escaped newlines.
767-
if (m_kind != ScannerKind::SpecialComment && tryScanEndOfLine())
766+
// Normally we ignore the slash just before a newline since it's meaningless.
767+
// In the case of not rejecting invalid escapes, though, we preserve it.
768+
if (_rejectInvalidEscapes && tryScanEndOfLine())
768769
return true;
769770
advance();
770771

@@ -790,7 +791,7 @@ bool Scanner::scanEscape()
790791
addUnicodeAsUTF8(*codepoint);
791792
return true;
792793
}
793-
else if (m_kind != ScannerKind::SpecialComment)
794+
else if (_rejectInvalidEscapes)
794795
return false;
795796
else
796797
{
@@ -800,7 +801,7 @@ bool Scanner::scanEscape()
800801
}
801802
}
802803
case 'x':
803-
if (m_kind != ScannerKind::SpecialComment)
804+
if (_rejectInvalidEscapes)
804805
{
805806
if (!scanHexByte(c))
806807
return false;
@@ -814,7 +815,7 @@ bool Scanner::scanEscape()
814815
}
815816
break;
816817
default:
817-
if (m_kind != ScannerKind::SpecialComment)
818+
if (_rejectInvalidEscapes)
818819
return false;
819820
else
820821
{
@@ -860,7 +861,13 @@ Token Scanner::scanString(bool const _isUnicode)
860861
if (m_kind == ScannerKind::SpecialComment)
861862
{
862863
if (c == '\\')
863-
scanEscape();
864+
{
865+
if (isSourcePastEndOfInput())
866+
return setError(ScannerError::IllegalEscapeSequence);
867+
bool const validEscape = scanEscape(false /* _rejectInvalidEscapes */);
868+
// there are no invalid escapes in special comments except unterminated backslash at eos
869+
solAssert(validEscape);
870+
}
864871
else
865872
addLiteralChar(c);
866873
}

liblangutil/Scanner.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ class Scanner
246246
/// Scans an escape-sequence which is part of a string and adds the
247247
/// decoded character to the current literal. Returns true if a pattern
248248
/// is scanned.
249-
bool scanEscape();
249+
bool scanEscape(bool _rejectInvalidEscapes = true);
250250

251251
/// @returns true iff we are currently positioned at a unicode line break.
252252
bool isUnicodeLinebreak();

test/liblangutil/Scanner.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1047,11 +1047,19 @@ a Ʃtest\f)");
10471047
BOOST_REQUIRE(scanner.currentLiteral() == expectedOutput);
10481048
}
10491049

1050-
BOOST_AUTO_TEST_CASE(special_comment_with_unterminated_string)
1050+
BOOST_AUTO_TEST_CASE(special_comment_with_unterminated_escape_sequence_at_eos)
10511051
{
10521052
CharStream stream(R"("test\)", "");
10531053
Scanner scanner(stream, ScannerKind::SpecialComment);
10541054
BOOST_REQUIRE(scanner.currentToken() == Token::Illegal);
1055+
BOOST_REQUIRE(scanner.currentError() == ScannerError::IllegalEscapeSequence);
1056+
}
1057+
1058+
BOOST_AUTO_TEST_CASE(special_comment_with_unterminated_string)
1059+
{
1060+
CharStream stream(R"("test)", "");
1061+
Scanner scanner(stream, ScannerKind::SpecialComment);
1062+
BOOST_REQUIRE(scanner.currentToken() == Token::Illegal);
10551063
BOOST_REQUIRE(scanner.currentError() == ScannerError::IllegalStringEndQuote);
10561064
}
10571065

test/libyul/Parser.cpp

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -814,8 +814,6 @@ BOOST_AUTO_TEST_CASE(customSourceLocations_invalid_escapes)
814814
EVMDialectTyped const& dialect = EVMDialectTyped::instance(EVMVersion{});
815815
std::shared_ptr<Block> result = parse(sourceText, dialect, reporter);
816816
BOOST_REQUIRE(!!result && errorList.size() == 0);
817-
// the second source location is not parsed as such, as the hex string isn't interpreted as snippet but
818-
// as the beginning of the tail in AsmParser
819817
CHECK_LOCATION(result->debugData->originLocation, "source0", 111, 222);
820818
}
821819

@@ -831,8 +829,6 @@ BOOST_AUTO_TEST_CASE(customSourceLocations_single_quote_snippet_with_whitespaces
831829
EVMDialectTyped const& dialect = EVMDialectTyped::instance(EVMVersion{});
832830
std::shared_ptr<Block> result = parse(sourceText, dialect, reporter);
833831
BOOST_REQUIRE(!!result && errorList.size() == 0);
834-
// the second source location is not parsed as such, as the hex string isn't interpreted as snippet but
835-
// as the beginning of the tail in AsmParser
836832
CHECK_LOCATION(result->debugData->originLocation, "source1", 222, 333);
837833
}
838834

0 commit comments

Comments
 (0)