diff --git a/libyul/AsmParser.cpp b/libyul/AsmParser.cpp index a009b005d6bd..ccd73236a942 100644 --- a/libyul/AsmParser.cpp +++ b/libyul/AsmParser.cpp @@ -27,11 +27,10 @@ #include #include #include +#include #include #include -#include - #include #include @@ -58,6 +57,11 @@ std::optional toInt(std::string const& _value) } } +constexpr bool isNonBreakingWhitespace(char c) +{ + return c == ' ' || c == '\t' || c == '\r'; +} + } langutil::DebugData::ConstPtr Parser::createDebugData() const @@ -192,13 +196,67 @@ std::optional> Parser::parseSrcComme langutil::SourceLocation const& _commentLocation ) { - static std::regex const argsRegex = std::regex( - R"~~(^(-1|\d+):(-1|\d+):(-1|\d+)(?:\s+|$))~~" // index and location, e.g.: 1:234:-1 - R"~~(("(?:[^"\\]|\\.)*"?)?)~~", // optional code snippet, e.g.: "string memory s = \"abc\";..." - std::regex_constants::ECMAScript | std::regex_constants::optimize - ); - std::match_results match; - if (!regex_search(_arguments.cbegin(), _arguments.cend(), match, argsRegex)) + std::string const s_args (_arguments); + CharStream argumentStream (s_args, ""); + Scanner scanner (argumentStream); + + int sourceIndex { -1 }; + int start { -1 }; + int end { -1 }; + bool indexOutOfRange { false }; + std::optional snippet { std::nullopt }; + std::string_view tail { _arguments.substr(_arguments.size()) }; + auto const parseIndex = [](Scanner& _scanner) -> std::tuple, bool> + { + int sgn = 1; + if (_scanner.currentToken() == Token::Sub) + { + sgn = -1; + _scanner.next(); + } + if (_scanner.currentToken() != Token::Number) + return std::make_tuple(std::nullopt, false); + try + { + return std::make_tuple(sgn * std::stoi(_scanner.currentLiteral()), false); + } + catch(std::out_of_range const&) + { + return std::make_tuple(std::nullopt, true); + } + }; + bool success = [&]() + { + auto [parsed, exception] = parseIndex(scanner); + indexOutOfRange |= exception; + if (!parsed) + return false; + sourceIndex = *parsed; + + if (scanner.next() != Token::Colon) + return false; + scanner.next(); + std::tie(parsed, exception) = parseIndex(scanner); + indexOutOfRange |= exception; + if (!parsed) + return false; + start = *parsed; + + if (scanner.next() != Token::Colon) + return false; + scanner.next(); + std::tie(parsed, exception) = parseIndex(scanner); + indexOutOfRange |= exception; + if (!parsed) + return false; + end = *parsed; + + size_t const endIndex = static_cast(scanner.currentLocation().end); + return scanner.peekNextToken() == Token::EOS || langutil::isWhiteSpace(_arguments[endIndex]); + }(); + + // index out of range is handled by error 6367 + if (!success && !indexOutOfRange) { m_errorReporter.syntaxError( 8387_error, @@ -208,13 +266,34 @@ std::optional> Parser::parseSrcComme return std::nullopt; } - solAssert(match.size() == 5, ""); - std::string_view tail = _arguments.substr(static_cast(match.position() + match.length())); + if (scanner.peekNextToken() == Token::StringLiteral && + isNonBreakingWhitespace(_arguments[static_cast(scanner.currentLocation().end)])) + { + scanner.next(); + tail = _arguments.substr(static_cast(scanner.currentLocation().end)); + } + else if (scanner.peekNextToken() != Token::EOS && isNonBreakingWhitespace(_arguments[static_cast(scanner.currentLocation().end)])) + { + scanner.next(); + if (_arguments[static_cast(scanner.currentLocation().start)] == '"') + { + auto const endOfLine = _arguments.find_first_of('\n', static_cast(scanner.currentLocation().end)); + snippet = _arguments.substr(static_cast(scanner.currentLocation().start), endOfLine); + while (!snippet->empty() && isNonBreakingWhitespace(snippet->back())) + snippet->remove_suffix(1); + if (endOfLine != std::string::npos) + tail = _arguments.substr(endOfLine); + } + else + tail = _arguments.substr(static_cast(scanner.currentLocation().start)); + } + else + tail = _arguments.substr(static_cast(scanner.currentLocation().end)); - if (match[4].matched && ( - !boost::algorithm::ends_with(match[4].str(), "\"") || - boost::algorithm::ends_with(match[4].str(), "\\\"") - )) + if (snippet && ( + !boost::algorithm::ends_with(*snippet, "\"") || + boost::algorithm::ends_with(*snippet, "\\\"") + )) { m_errorReporter.syntaxError( 1544_error, @@ -224,11 +303,7 @@ std::optional> Parser::parseSrcComme return {{tail, SourceLocation{}}}; } - std::optional const sourceIndex = toInt(match[1].str()); - std::optional const start = toInt(match[2].str()); - std::optional const end = toInt(match[3].str()); - - if (!sourceIndex.has_value() || !start.has_value() || !end.has_value()) + if (indexOutOfRange || (sourceIndex < 0 && sourceIndex != -1) || (start < 0 && start != -1) || (end < 0 && end != -1)) m_errorReporter.syntaxError( 6367_error, _commentLocation, @@ -236,8 +311,8 @@ std::optional> Parser::parseSrcComme "Expected non-negative integer values or -1 for source index and location." ); else if (sourceIndex == -1) - return {{tail, SourceLocation{start.value(), end.value(), nullptr}}}; - else if (!(sourceIndex >= 0 && m_sourceNames->count(static_cast(sourceIndex.value())))) + return {{tail, SourceLocation{start, end, nullptr}}}; + else if (!(sourceIndex >= 0 && m_sourceNames->count(static_cast(sourceIndex)))) m_errorReporter.syntaxError( 2674_error, _commentLocation, @@ -245,9 +320,8 @@ std::optional> Parser::parseSrcComme ); else { - std::shared_ptr sourceName = m_sourceNames->at(static_cast(sourceIndex.value())); - solAssert(sourceName, ""); - return {{tail, SourceLocation{start.value(), end.value(), std::move(sourceName)}}}; + std::shared_ptr sourceName = m_sourceNames->at(static_cast(sourceIndex)); + return {{tail, SourceLocation{start, end, std::move(sourceName)}}}; } return {{tail, SourceLocation{}}}; }