Skip to content

Commit

Permalink
AsmParser: Parsing rules for source location comments have been relax…
Browse files Browse the repository at this point in the history
…ed: Whitespace between the indices as well as single-quoted code snippets are now allowed.
  • Loading branch information
clonker committed Jun 24, 2024
1 parent ce4be6e commit 5bca035
Show file tree
Hide file tree
Showing 5 changed files with 141 additions and 44 deletions.
1 change: 1 addition & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Compiler Features:


Bugfixes:
* AsmParser: Parsing rules for source location comments have been relaxed: Whitespace between the indices as well as single-quoted code snippets are now allowed.
* SMTChecker: Fix error that reports invalid number of verified checks for BMC and CHC engines.
* SMTChecker: Fix internal compiler error when reporting proved targets for BMC engine.
* TypeChecker: Fix segfault when assigning nested tuple to tuple.
Expand Down
46 changes: 31 additions & 15 deletions liblangutil/Scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -826,31 +826,44 @@ Token Scanner::scanString(bool const _isUnicode)
char const quote = m_char;
advance(); // consume quote
LiteralScope literal(this, LITERAL_TYPE_STRING);
while (m_char != quote && !isSourcePastEndOfInput() && !isUnicodeLinebreak())
// for source location comments we allow multiline string literals
while (m_char != quote && !isSourcePastEndOfInput() && (!isUnicodeLinebreak() || m_kind == ScannerKind::SpecialComment))
{
char c = m_char;
advance();
if (c == '\\')

if (m_kind == ScannerKind::SpecialComment)
{
if (isSourcePastEndOfInput() || !scanEscape())
return setError(ScannerError::IllegalEscapeSequence);
if (c == '\\')
scanEscape();
else
addLiteralChar(c);
}
else
{
// Report error on non-printable characters in string literals, however
// allow anything for unicode string literals, because their validity will
// be verified later (in the syntax checker).
//
// We are using a manual range and not isprint() to avoid
// any potential complications with locale.
if (!_isUnicode && (static_cast<unsigned>(c) <= 0x1f || static_cast<unsigned>(c) >= 0x7f))
if (c == '\\')
{
if (m_kind == ScannerKind::Yul)
return setError(ScannerError::IllegalCharacterInString);
return setError(ScannerError::UnicodeCharacterInNonUnicodeString);
if (isSourcePastEndOfInput() || !scanEscape())
return setError(ScannerError::IllegalEscapeSequence);
}
else
{
// Report error on non-printable characters in string literals, however
// allow anything for unicode string literals, because their validity will
// be verified later (in the syntax checker).
//
// We are using a manual range and not isprint() to avoid
// any potential complications with locale.
if (!_isUnicode && (static_cast<unsigned>(c) <= 0x1f || static_cast<unsigned>(c) >= 0x7f))
{
if (m_kind == ScannerKind::Yul)
return setError(ScannerError::IllegalCharacterInString);
return setError(ScannerError::UnicodeCharacterInNonUnicodeString);
}
addLiteralChar(c);
}
addLiteralChar(c);
}

}
if (m_char != quote)
return setError(ScannerError::IllegalStringEndQuote);
Expand Down Expand Up @@ -1023,6 +1036,9 @@ std::tuple<Token, unsigned, unsigned> Scanner::scanIdentifierOrKeyword()
auto const token = TokenTraits::fromIdentifierOrKeyword(m_tokens[NextNext].literal);
switch (m_kind)
{
case ScannerKind::SpecialComment:
// there are no keywords in special comments
return std::make_tuple(Token::Identifier, 0, 0);
case ScannerKind::Solidity:
// Turn experimental Solidity keywords that are not keywords in legacy Solidity into identifiers.
if (TokenTraits::isExperimentalSolidityOnlyKeyword(std::get<0>(token)))
Expand Down
7 changes: 2 additions & 5 deletions liblangutil/Scanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,12 @@
namespace solidity::langutil
{

class AstRawString;
class AstValueFactory;
class ParserRecorder;

enum class ScannerKind
{
Solidity,
Yul,
ExperimentalSolidity
ExperimentalSolidity,
SpecialComment
};

enum class ScannerError
Expand Down
79 changes: 55 additions & 24 deletions libyul/AsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,10 @@
#include <liblangutil/ErrorReporter.h>
#include <liblangutil/Exceptions.h>
#include <liblangutil/Scanner.h>
#include <liblangutil/Common.h>
#include <libsolutil/Common.h>
#include <libsolutil/Visitor.h>

#include <range/v3/view/subrange.hpp>

#include <boost/algorithm/string.hpp>

#include <algorithm>
Expand All @@ -52,7 +51,7 @@ std::optional<int> toInt(std::string const& _value)
{
return stoi(_value);
}
catch (...)
catch (std::out_of_range const&)
{
return std::nullopt;
}
Expand Down Expand Up @@ -192,13 +191,42 @@ std::optional<std::pair<std::string_view, SourceLocation>> Parser::parseSrcComme
langutil::SourceLocation const& _commentLocation
)
{
static std::regex const argsRegex = std::regex(
R"~~(^(-1|\d+):(-1|\d+):(-1|\d+)(?:\s+|$))~~" // index and location, e.g.: 1:234:-1
R"~~(("(?:[^"\\]|\\.)*"?)?)~~", // optional code snippet, e.g.: "string memory s = \"abc\";..."
std::regex_constants::ECMAScript | std::regex_constants::optimize
);
std::match_results<std::string_view::const_iterator> match;
if (!regex_search(_arguments.cbegin(), _arguments.cend(), match, argsRegex))
CharStream argumentStream (std::string(_arguments), "");
Scanner scanner (argumentStream);
scanner.setScannerMode(ScannerKind::SpecialComment);

std::string_view tail { _arguments.substr(_arguments.size()) };
auto const parseLocationComponent = [](Scanner& _scanner, bool expectTrailingColon) -> std::optional<std::string>
{
bool negative = false;
if (_scanner.currentToken() == Token::Sub)
{
negative = true;
_scanner.next();
}
if (_scanner.currentToken() != Token::Number)
return std::nullopt;
if (expectTrailingColon && _scanner.peekNextToken() != Token::Colon)
return std::nullopt;
if (!isValidDecimal(_scanner.currentLiteral()))
return std::nullopt;
std::string decimal = (negative ? "-" : "") + _scanner.currentLiteral();
_scanner.next();
if (expectTrailingColon)
_scanner.next();
return decimal;
};
std::optional<std::string> rawSourceIndex = parseLocationComponent(scanner, true);
std::optional<std::string> rawStart = parseLocationComponent(scanner, true);
std::optional<std::string> rawEnd = parseLocationComponent(scanner, false);

size_t const snippetStart = static_cast<size_t>(scanner.currentLocation().start);
bool const locationScannedSuccessfully = rawSourceIndex && rawStart && rawEnd;
bool const locationIsWhitespaceSeparated =
scanner.peekNextToken() == Token::EOS ||
(snippetStart > 0 && langutil::isWhiteSpace(_arguments[snippetStart - 1]));

if (!locationScannedSuccessfully || !locationIsWhitespaceSeparated)
{
m_errorReporter.syntaxError(
8387_error,
Expand All @@ -208,13 +236,12 @@ std::optional<std::pair<std::string_view, SourceLocation>> Parser::parseSrcComme
return std::nullopt;
}

solAssert(match.size() == 5, "");
std::string_view tail = _arguments.substr(static_cast<size_t>(match.position() + match.length()));
if (scanner.currentToken() == Token::StringLiteral || (scanner.currentToken() == Token::Illegal && scanner.currentError() == ScannerError::IllegalStringEndQuote))
tail = _arguments.substr(static_cast<size_t>(scanner.currentLocation().end));
else
tail = _arguments.substr(static_cast<size_t>(scanner.currentLocation().start));

if (match[4].matched && (
!boost::algorithm::ends_with(match[4].str(), "\"") ||
boost::algorithm::ends_with(match[4].str(), "\\\"")
))
if (scanner.currentToken() == Token::Illegal && scanner.currentError() == ScannerError::IllegalStringEndQuote)
{
m_errorReporter.syntaxError(
1544_error,
Expand All @@ -224,30 +251,34 @@ std::optional<std::pair<std::string_view, SourceLocation>> Parser::parseSrcComme
return {{tail, SourceLocation{}}};
}

std::optional<int> const sourceIndex = toInt(match[1].str());
std::optional<int> const start = toInt(match[2].str());
std::optional<int> const end = toInt(match[3].str());
std::optional<int> const sourceIndex = toInt(*rawSourceIndex);
std::optional<int> const start = toInt(*rawStart);
std::optional<int> const end = toInt(*rawEnd);

if (!sourceIndex.has_value() || !start.has_value() || !end.has_value())
if (
!sourceIndex.has_value() || *sourceIndex < -1 ||
!start.has_value() || *start < -1 ||
!end.has_value() || *end < -1
)
m_errorReporter.syntaxError(
6367_error,
_commentLocation,
"Invalid value in source location mapping. "
"Expected non-negative integer values or -1 for source index and location."
);
else if (sourceIndex == -1)
return {{tail, SourceLocation{start.value(), end.value(), nullptr}}};
else if (!(sourceIndex >= 0 && m_sourceNames->count(static_cast<unsigned>(sourceIndex.value()))))
return {{tail, SourceLocation{*start, *end, nullptr}}};
else if (!(sourceIndex >= 0 && m_sourceNames->count(static_cast<unsigned>(*sourceIndex))))
m_errorReporter.syntaxError(
2674_error,
_commentLocation,
"Invalid source mapping. Source index not defined via @use-src."
);
else
{
std::shared_ptr<std::string const> sourceName = m_sourceNames->at(static_cast<unsigned>(sourceIndex.value()));
std::shared_ptr<std::string const> sourceName = m_sourceNames->at(static_cast<unsigned>(*sourceIndex));
solAssert(sourceName, "");
return {{tail, SourceLocation{start.value(), end.value(), std::move(sourceName)}}};
return {{tail, SourceLocation{*start, *end, std::move(sourceName)}}};
}
return {{tail, SourceLocation{}}};
}
Expand Down
52 changes: 52 additions & 0 deletions test/libyul/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,58 @@ BOOST_AUTO_TEST_CASE(customSourceLocations_two_locations_with_snippets_untermina
CHECK_LOCATION(result->debugData->originLocation, "", -1, -1);
}

BOOST_AUTO_TEST_CASE(customSourceLocations_single_quote)
{
ErrorList errorList;
ErrorReporter reporter(errorList);
auto const sourceText = R"(
/// @src 0:111:222 "
///
{}
)";
EVMDialectTyped const& dialect = EVMDialectTyped::instance(EVMVersion{});
std::shared_ptr<Block> result = parse(sourceText, dialect, reporter);
BOOST_REQUIRE(!!result);
BOOST_REQUIRE(errorList.size() == 1);
BOOST_TEST(errorList[0]->type() == Error::Type::SyntaxError);
BOOST_TEST(errorList[0]->errorId() == 1544_error);
CHECK_LOCATION(result->debugData->originLocation, "", -1, -1);
}

BOOST_AUTO_TEST_CASE(customSourceLocations_two_snippets_with_hex_comment)
{
ErrorList errorList;
ErrorReporter reporter(errorList);
auto const sourceText = R"(
/// @src 0:111:222 hex"abc"@src 1:333:444 "abc"
{}
)";
EVMDialectTyped const& dialect = EVMDialectTyped::instance(EVMVersion{});
std::shared_ptr<Block> result = parse(sourceText, dialect, reporter);
BOOST_REQUIRE(!!result && errorList.size() == 0);
// the second source location is not parsed as such, as the hex string isn't interpreted as snippet but
// as the beginning of the tail in AsmParser
CHECK_LOCATION(result->debugData->originLocation, "source0", 111, 222);
}

BOOST_AUTO_TEST_CASE(customSourceLocations_multi_line_source_loc)
{
ErrorList errorList;
ErrorReporter reporter(errorList);
auto const sourceText = R"(
/// @src 1 : 111:
/// 222 "
/// abc\"def
///
/// " @src 0:333:444
{}
)";
EVMDialectTyped const& dialect = EVMDialectTyped::instance(EVMVersion{});
std::shared_ptr<Block> result = parse(sourceText, dialect, reporter);
BOOST_REQUIRE(!!result && errorList.empty());
CHECK_LOCATION(result->debugData->originLocation, "source0", 333, 444);
}

BOOST_AUTO_TEST_CASE(customSourceLocations_with_code_snippets_with_nested_locations)
{
ErrorList errorList;
Expand Down

0 comments on commit 5bca035

Please sign in to comment.