From 1492eb517716db9fc8b3299581b6ff970574c628 Mon Sep 17 00:00:00 2001 From: clonker Date: Mon, 1 Jul 2024 11:58:37 +0200 Subject: [PATCH] handle dangling backslashes in special comments and some cleanup --- Changelog.md | 4 ++-- liblangutil/Scanner.cpp | 21 ++++++++++++++------- liblangutil/Scanner.h | 2 +- test/liblangutil/Scanner.cpp | 10 +++++++++- test/libyul/Parser.cpp | 4 ---- 5 files changed, 26 insertions(+), 15 deletions(-) diff --git a/Changelog.md b/Changelog.md index d38eb30e38c0..cba038a56b98 100644 --- a/Changelog.md +++ b/Changelog.md @@ -3,7 +3,7 @@ Language Features: * Accept declarations of state variables with ``transient`` data location (parser support only, no code generation yet). * Make ``require(bool, Error)`` available when using the legacy pipeline. - * Yul: Parsing rules for source location comments have been relaxed: Whitespace between the indices as well as single-quoted code snippets are now allowed. + * Yul: Parsing rules for source location comments have been relaxed: Whitespace between the location components as well as single-quoted code snippets are now allowed. Compiler Features: @@ -14,12 +14,12 @@ Compiler Features: Bugfixes: - * AsmParser: Alleviates risk of encountering a segfault for very long comments. * SMTChecker: Fix error that reports invalid number of verified checks for BMC and CHC engines. * SMTChecker: Fix formatting of unary minus expressions in invariants. * SMTChecker: Fix internal compiler error when reporting proved targets for BMC engine. * TypeChecker: Fix segfault when assigning nested tuple to tuple. * Yul Optimizer: Name simplification could lead to forbidden identifiers with a leading and/or trailing dot, e.g., ``x._`` would get simplified into ``x.``. + * Yul Parser: Fix segfault when parsing very long location comments. ### 0.8.26 (2024-05-21) diff --git a/liblangutil/Scanner.cpp b/liblangutil/Scanner.cpp index 5789d3cec663..4a51642c16d9 100644 --- a/liblangutil/Scanner.cpp +++ b/liblangutil/Scanner.cpp @@ -759,12 +759,13 @@ void Scanner::scanToken() m_tokens[NextNext].extendedTokenInfo = std::make_tuple(m, n); } -bool Scanner::scanEscape() +bool Scanner::scanEscape(bool const _rejectInvalidEscapes) { char c = m_char; - // Skip escaped newlines. - if (m_kind != ScannerKind::SpecialComment && tryScanEndOfLine()) + // Normally we ignore the slash just before a newline since it's meaningless. + // In the case of not rejecting invalid escapes, though, we preserve it. + if (_rejectInvalidEscapes && tryScanEndOfLine()) return true; advance(); @@ -790,7 +791,7 @@ bool Scanner::scanEscape() addUnicodeAsUTF8(*codepoint); return true; } - else if (m_kind != ScannerKind::SpecialComment) + else if (_rejectInvalidEscapes) return false; else { @@ -800,7 +801,7 @@ bool Scanner::scanEscape() } } case 'x': - if (m_kind != ScannerKind::SpecialComment) + if (_rejectInvalidEscapes) { if (!scanHexByte(c)) return false; @@ -814,7 +815,7 @@ bool Scanner::scanEscape() } break; default: - if (m_kind != ScannerKind::SpecialComment) + if (_rejectInvalidEscapes) return false; else { @@ -860,7 +861,13 @@ Token Scanner::scanString(bool const _isUnicode) if (m_kind == ScannerKind::SpecialComment) { if (c == '\\') - scanEscape(); + { + if (isSourcePastEndOfInput()) + return setError(ScannerError::IllegalEscapeSequence); + bool const validEscape = scanEscape(false /* _rejectInvalidEscapes */); + // there are no invalid escapes in special comments except unterminated backslash at eos + solAssert(validEscape); + } else addLiteralChar(c); } diff --git a/liblangutil/Scanner.h b/liblangutil/Scanner.h index c734a368ccae..d39867834a27 100644 --- a/liblangutil/Scanner.h +++ b/liblangutil/Scanner.h @@ -246,7 +246,7 @@ class Scanner /// Scans an escape-sequence which is part of a string and adds the /// decoded character to the current literal. Returns true if a pattern /// is scanned. - bool scanEscape(); + bool scanEscape(bool _rejectInvalidEscapes = true); /// @returns true iff we are currently positioned at a unicode line break. bool isUnicodeLinebreak(); diff --git a/test/liblangutil/Scanner.cpp b/test/liblangutil/Scanner.cpp index fae024809e69..fb499e05c3fb 100644 --- a/test/liblangutil/Scanner.cpp +++ b/test/liblangutil/Scanner.cpp @@ -1047,11 +1047,19 @@ a Ʃtest\f)"); BOOST_REQUIRE(scanner.currentLiteral() == expectedOutput); } -BOOST_AUTO_TEST_CASE(special_comment_with_unterminated_string) +BOOST_AUTO_TEST_CASE(special_comment_with_unterminated_escape_sequence_at_eos) { CharStream stream(R"("test\)", ""); Scanner scanner(stream, ScannerKind::SpecialComment); BOOST_REQUIRE(scanner.currentToken() == Token::Illegal); + BOOST_REQUIRE(scanner.currentError() == ScannerError::IllegalEscapeSequence); +} + +BOOST_AUTO_TEST_CASE(special_comment_with_unterminated_string) +{ + CharStream stream(R"("test)", ""); + Scanner scanner(stream, ScannerKind::SpecialComment); + BOOST_REQUIRE(scanner.currentToken() == Token::Illegal); BOOST_REQUIRE(scanner.currentError() == ScannerError::IllegalStringEndQuote); } diff --git a/test/libyul/Parser.cpp b/test/libyul/Parser.cpp index ecdba2a9552c..ca8784ddb6ef 100644 --- a/test/libyul/Parser.cpp +++ b/test/libyul/Parser.cpp @@ -814,8 +814,6 @@ BOOST_AUTO_TEST_CASE(customSourceLocations_invalid_escapes) EVMDialectTyped const& dialect = EVMDialectTyped::instance(EVMVersion{}); std::shared_ptr result = parse(sourceText, dialect, reporter); BOOST_REQUIRE(!!result && errorList.size() == 0); - // the second source location is not parsed as such, as the hex string isn't interpreted as snippet but - // as the beginning of the tail in AsmParser CHECK_LOCATION(result->debugData->originLocation, "source0", 111, 222); } @@ -831,8 +829,6 @@ BOOST_AUTO_TEST_CASE(customSourceLocations_single_quote_snippet_with_whitespaces EVMDialectTyped const& dialect = EVMDialectTyped::instance(EVMVersion{}); std::shared_ptr result = parse(sourceText, dialect, reporter); BOOST_REQUIRE(!!result && errorList.size() == 0); - // the second source location is not parsed as such, as the hex string isn't interpreted as snippet but - // as the beginning of the tail in AsmParser CHECK_LOCATION(result->debugData->originLocation, "source1", 222, 333); }