Skip to content

Commit

Permalink
AsmParser: parse source comment using scanner instead of regex
Browse files Browse the repository at this point in the history
  • Loading branch information
clonker committed Jun 20, 2024
1 parent d0190e1 commit f3b731e
Showing 1 changed file with 99 additions and 25 deletions.
124 changes: 99 additions & 25 deletions libyul/AsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,10 @@
#include <liblangutil/ErrorReporter.h>
#include <liblangutil/Exceptions.h>
#include <liblangutil/Scanner.h>
#include <liblangutil/Common.h>
#include <libsolutil/Common.h>
#include <libsolutil/Visitor.h>

#include <range/v3/view/subrange.hpp>

#include <boost/algorithm/string.hpp>

#include <algorithm>
Expand All @@ -58,6 +57,11 @@ std::optional<int> toInt(std::string const& _value)
}
}

constexpr bool isNonBreakingWhitespace(char c)
{
return c == ' ' || c == '\t' || c == '\r';
}

}

langutil::DebugData::ConstPtr Parser::createDebugData() const
Expand Down Expand Up @@ -192,13 +196,67 @@ std::optional<std::pair<std::string_view, SourceLocation>> Parser::parseSrcComme
langutil::SourceLocation const& _commentLocation
)
{
static std::regex const argsRegex = std::regex(
R"~~(^(-1|\d+):(-1|\d+):(-1|\d+)(?:\s+|$))~~" // index and location, e.g.: 1:234:-1
R"~~(("(?:[^"\\]|\\.)*"?)?)~~", // optional code snippet, e.g.: "string memory s = \"abc\";..."
std::regex_constants::ECMAScript | std::regex_constants::optimize
);
std::match_results<std::string_view::const_iterator> match;
if (!regex_search(_arguments.cbegin(), _arguments.cend(), match, argsRegex))
std::string const s_args (_arguments);
CharStream argumentStream (s_args, "");
Scanner scanner (argumentStream);

int sourceIndex { -1 };
int start { -1 };
int end { -1 };
bool indexOutOfRange { false };
std::optional<std::string_view> snippet { std::nullopt };
std::string_view tail { _arguments.substr(_arguments.size()) };
auto const parseIndex = [](Scanner& _scanner) -> std::tuple<std::optional<int>, bool>
{
int sgn = 1;
if (_scanner.currentToken() == Token::Sub)
{
sgn = -1;
_scanner.next();
}
if (_scanner.currentToken() != Token::Number)
return std::make_tuple(std::nullopt, false);
try
{
return std::make_tuple(sgn * std::stoi(_scanner.currentLiteral()), false);
}
catch(std::out_of_range const&)
{
return std::make_tuple(std::nullopt, true);
}
};
bool success = [&]()
{
auto [parsed, exception] = parseIndex(scanner);
indexOutOfRange |= exception;
if (!parsed)
return false;
sourceIndex = *parsed;

if (scanner.next() != Token::Colon)
return false;
scanner.next();
std::tie(parsed, exception) = parseIndex(scanner);
indexOutOfRange |= exception;
if (!parsed)
return false;
start = *parsed;

if (scanner.next() != Token::Colon)
return false;
scanner.next();
std::tie(parsed, exception) = parseIndex(scanner);
indexOutOfRange |= exception;
if (!parsed)
return false;
end = *parsed;

size_t const endIndex = static_cast<size_t>(scanner.currentLocation().end);
return scanner.peekNextToken() == Token::EOS || langutil::isWhiteSpace(_arguments[endIndex]);
}();

// index out of range is handled by error 6367
if (!success && !indexOutOfRange)
{
m_errorReporter.syntaxError(
8387_error,
Expand All @@ -208,13 +266,34 @@ std::optional<std::pair<std::string_view, SourceLocation>> Parser::parseSrcComme
return std::nullopt;
}

solAssert(match.size() == 5, "");
std::string_view tail = _arguments.substr(static_cast<size_t>(match.position() + match.length()));
if (scanner.peekNextToken() == Token::StringLiteral &&
isNonBreakingWhitespace(_arguments[static_cast<size_t>(scanner.currentLocation().end)]))
{
scanner.next();
tail = _arguments.substr(static_cast<size_t>(scanner.currentLocation().end));
}
else if (scanner.peekNextToken() != Token::EOS && isNonBreakingWhitespace(_arguments[static_cast<size_t>(scanner.currentLocation().end)]))
{
scanner.next();
if (_arguments[static_cast<size_t>(scanner.currentLocation().start)] == '"')
{
auto const endOfLine = _arguments.find_first_of('\n', static_cast<size_t>(scanner.currentLocation().end));
snippet = _arguments.substr(static_cast<size_t>(scanner.currentLocation().start), endOfLine);
while (!snippet->empty() && isNonBreakingWhitespace(snippet->back()))
snippet->remove_suffix(1);
if (endOfLine != std::string::npos)
tail = _arguments.substr(endOfLine);
}
else
tail = _arguments.substr(static_cast<size_t>(scanner.currentLocation().start));
}
else
tail = _arguments.substr(static_cast<size_t>(scanner.currentLocation().end));

if (match[4].matched && (
!boost::algorithm::ends_with(match[4].str(), "\"") ||
boost::algorithm::ends_with(match[4].str(), "\\\"")
))
if (snippet && (
!boost::algorithm::ends_with(*snippet, "\"") ||
boost::algorithm::ends_with(*snippet, "\\\"")
))
{
m_errorReporter.syntaxError(
1544_error,
Expand All @@ -224,30 +303,25 @@ std::optional<std::pair<std::string_view, SourceLocation>> Parser::parseSrcComme
return {{tail, SourceLocation{}}};
}

std::optional<int> const sourceIndex = toInt(match[1].str());
std::optional<int> const start = toInt(match[2].str());
std::optional<int> const end = toInt(match[3].str());

if (!sourceIndex.has_value() || !start.has_value() || !end.has_value())
if (indexOutOfRange || (sourceIndex < 0 && sourceIndex != -1) || (start < 0 && start != -1) || (end < 0 && end != -1))
m_errorReporter.syntaxError(
6367_error,
_commentLocation,
"Invalid value in source location mapping. "
"Expected non-negative integer values or -1 for source index and location."
);
else if (sourceIndex == -1)
return {{tail, SourceLocation{start.value(), end.value(), nullptr}}};
else if (!(sourceIndex >= 0 && m_sourceNames->count(static_cast<unsigned>(sourceIndex.value()))))
return {{tail, SourceLocation{start, end, nullptr}}};
else if (!(sourceIndex >= 0 && m_sourceNames->count(static_cast<unsigned>(sourceIndex))))
m_errorReporter.syntaxError(
2674_error,
_commentLocation,
"Invalid source mapping. Source index not defined via @use-src."
);
else
{
std::shared_ptr<std::string const> sourceName = m_sourceNames->at(static_cast<unsigned>(sourceIndex.value()));
solAssert(sourceName, "");
return {{tail, SourceLocation{start.value(), end.value(), std::move(sourceName)}}};
std::shared_ptr<std::string const> sourceName = m_sourceNames->at(static_cast<unsigned>(sourceIndex));
return {{tail, SourceLocation{start, end, std::move(sourceName)}}};
}
return {{tail, SourceLocation{}}};
}
Expand Down

0 comments on commit f3b731e

Please sign in to comment.