Skip to content

Commit

Permalink
AsmParser: Source location comments no longer rely no regex to be par…
Browse files Browse the repository at this point in the history
…sed. Whitespaces between the indices as well as single-quoted code snippets are now allowed.
  • Loading branch information
clonker committed Jun 21, 2024
1 parent 3c65632 commit 7cf8e50
Show file tree
Hide file tree
Showing 6 changed files with 167 additions and 45 deletions.
1 change: 1 addition & 0 deletions Changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ Compiler Features:


Bugfixes:
* AsmParser: Source location comments no longer rely no regex to be parsed. Whitespaces between the indices as well as single-quoted code snippets are now allowed.
* SMTChecker: Fix error that reports invalid number of verified checks for BMC and CHC engines.
* SMTChecker: Fix internal compiler error when reporting proved targets for BMC engine.
* TypeChecker: Fix segfault when assigning nested tuple to tuple.
Expand Down
45 changes: 30 additions & 15 deletions liblangutil/Scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -826,31 +826,44 @@ Token Scanner::scanString(bool const _isUnicode)
char const quote = m_char;
advance(); // consume quote
LiteralScope literal(this, LITERAL_TYPE_STRING);
while (m_char != quote && !isSourcePastEndOfInput() && !isUnicodeLinebreak())
// for source location comments we allow multiline string literals
while (m_char != quote && !isSourcePastEndOfInput() && (!isUnicodeLinebreak() || m_kind == ScannerKind::SourceLocationComment))
{
char c = m_char;
advance();
if (c == '\\')

if (m_kind == ScannerKind::SourceLocationComment)
{
if (isSourcePastEndOfInput() || !scanEscape())
return setError(ScannerError::IllegalEscapeSequence);
if (c == '\\')
scanEscape();
else
addLiteralChar(c);
}
else
{
// Report error on non-printable characters in string literals, however
// allow anything for unicode string literals, because their validity will
// be verified later (in the syntax checker).
//
// We are using a manual range and not isprint() to avoid
// any potential complications with locale.
if (!_isUnicode && (static_cast<unsigned>(c) <= 0x1f || static_cast<unsigned>(c) >= 0x7f))
if (c == '\\')
{
if (m_kind == ScannerKind::Yul)
return setError(ScannerError::IllegalCharacterInString);
return setError(ScannerError::UnicodeCharacterInNonUnicodeString);
if (isSourcePastEndOfInput() || !scanEscape())
return setError(ScannerError::IllegalEscapeSequence);
}
else
{
// Report error on non-printable characters in string literals, however
// allow anything for unicode string literals, because their validity will
// be verified later (in the syntax checker).
//
// We are using a manual range and not isprint() to avoid
// any potential complications with locale.
if (!_isUnicode && (static_cast<unsigned>(c) <= 0x1f || static_cast<unsigned>(c) >= 0x7f))
{
if (m_kind == ScannerKind::Yul)
return setError(ScannerError::IllegalCharacterInString);
return setError(ScannerError::UnicodeCharacterInNonUnicodeString);
}
addLiteralChar(c);
}
addLiteralChar(c);
}

}
if (m_char != quote)
return setError(ScannerError::IllegalStringEndQuote);
Expand Down Expand Up @@ -1023,6 +1036,8 @@ std::tuple<Token, unsigned, unsigned> Scanner::scanIdentifierOrKeyword()
auto const token = TokenTraits::fromIdentifierOrKeyword(m_tokens[NextNext].literal);
switch (m_kind)
{
case ScannerKind::SourceLocationComment:
[[fallthrough]];
case ScannerKind::Solidity:
// Turn experimental Solidity keywords that are not keywords in legacy Solidity into identifiers.
if (TokenTraits::isExperimentalSolidityOnlyKeyword(std::get<0>(token)))
Expand Down
7 changes: 2 additions & 5 deletions liblangutil/Scanner.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,12 @@
namespace solidity::langutil
{

class AstRawString;
class AstValueFactory;
class ParserRecorder;

enum class ScannerKind
{
Solidity,
Yul,
ExperimentalSolidity
ExperimentalSolidity,
SourceLocationComment
};

enum class ScannerError
Expand Down
100 changes: 75 additions & 25 deletions libyul/AsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,10 @@
#include <liblangutil/ErrorReporter.h>
#include <liblangutil/Exceptions.h>
#include <liblangutil/Scanner.h>
#include <liblangutil/Common.h>
#include <libsolutil/Common.h>
#include <libsolutil/Visitor.h>

#include <range/v3/view/subrange.hpp>

#include <boost/algorithm/string.hpp>

#include <algorithm>
Expand All @@ -52,7 +51,7 @@ std::optional<int> toInt(std::string const& _value)
{
return stoi(_value);
}
catch (...)
catch (std::out_of_range const&)
{
return std::nullopt;
}
Expand Down Expand Up @@ -192,13 +191,40 @@ std::optional<std::pair<std::string_view, SourceLocation>> Parser::parseSrcComme
langutil::SourceLocation const& _commentLocation
)
{
static std::regex const argsRegex = std::regex(
R"~~(^(-1|\d+):(-1|\d+):(-1|\d+)(?:\s+|$))~~" // index and location, e.g.: 1:234:-1
R"~~(("(?:[^"\\]|\\.)*"?)?)~~", // optional code snippet, e.g.: "string memory s = \"abc\";..."
std::regex_constants::ECMAScript | std::regex_constants::optimize
);
std::match_results<std::string_view::const_iterator> match;
if (!regex_search(_arguments.cbegin(), _arguments.cend(), match, argsRegex))
CharStream argumentStream (std::string(_arguments), "");
Scanner scanner (argumentStream);
scanner.setScannerMode(ScannerKind::SourceLocationComment);

std::string_view tail { _arguments.substr(_arguments.size()) };
auto const parseIndex = [](Scanner& _scanner, bool expectTrailingColon) -> std::optional<std::string>
{
bool negative = false;
if (_scanner.currentToken() == Token::Sub)
{
negative = true;
_scanner.next();
}
if (_scanner.currentToken() != Token::Number)
return std::nullopt;
if (expectTrailingColon && _scanner.peekNextToken() != Token::Colon)
return std::nullopt;
if (!isValidDecimal(_scanner.currentLiteral()))
return std::nullopt;
std::string decimal = (negative ? "-" : "") + _scanner.currentLiteral();
_scanner.next();
if (expectTrailingColon)
_scanner.next();
return decimal;
};
std::optional<std::string> rawSourceIndex = parseIndex(scanner, true);
std::optional<std::string> rawStart = parseIndex(scanner, true);
std::optional<std::string> rawEnd = parseIndex(scanner, false);

bool const successfullyScannedIndices = rawSourceIndex && rawStart && rawEnd;
size_t const startIndex = static_cast<size_t>(scanner.currentLocation().start);
bool success = successfullyScannedIndices && (scanner.peekNextToken() == Token::EOS || (startIndex > 0 && langutil::isWhiteSpace(_arguments[startIndex - 1])));

if (!success)
{
m_errorReporter.syntaxError(
8387_error,
Expand All @@ -208,13 +234,22 @@ std::optional<std::pair<std::string_view, SourceLocation>> Parser::parseSrcComme
return std::nullopt;
}

solAssert(match.size() == 5, "");
std::string_view tail = _arguments.substr(static_cast<size_t>(match.position() + match.length()));
if (scanner.currentToken() == Token::StringLiteral || (scanner.currentToken() == Token::Illegal && scanner.currentError() == ScannerError::IllegalStringEndQuote))
tail = _arguments.substr(static_cast<size_t>(scanner.currentLocation().end));
else
tail = _arguments.substr(static_cast<size_t>(scanner.currentLocation().start));

if (scanner.currentToken() == Token::HexStringLiteral || scanner.currentToken() == Token::UnicodeStringLiteral)
{
m_errorReporter.syntaxError(
1543_error,
_commentLocation,
"Invalid code snippet in source location mapping. Hex and unicode strings not allowed."
);
return {{tail, SourceLocation{}}};
}

if (match[4].matched && (
!boost::algorithm::ends_with(match[4].str(), "\"") ||
boost::algorithm::ends_with(match[4].str(), "\\\"")
))
if (scanner.currentToken() == Token::Illegal && scanner.currentError() == ScannerError::IllegalStringEndQuote)
{
m_errorReporter.syntaxError(
1544_error,
Expand All @@ -224,30 +259,45 @@ std::optional<std::pair<std::string_view, SourceLocation>> Parser::parseSrcComme
return {{tail, SourceLocation{}}};
}

std::optional<int> const sourceIndex = toInt(match[1].str());
std::optional<int> const start = toInt(match[2].str());
std::optional<int> const end = toInt(match[3].str());
int sourceIndex = -1;
int start = -1;
int end = -1;
bool const successfullyInterpretedIndices = [&]()
{
if (successfullyScannedIndices)
{
auto const indexPairs = { std::tie(rawSourceIndex, sourceIndex), std::tie(rawStart, start), std::tie(rawEnd, end) };
for (auto& [rawIndex, interpretedIndex] : indexPairs)
{
auto const interpreted = toInt(*rawIndex);
if (interpreted)
interpretedIndex = *interpreted;
else
return false;
}
}
return successfullyScannedIndices;
}();

if (!sourceIndex.has_value() || !start.has_value() || !end.has_value())
if (!successfullyInterpretedIndices || sourceIndex < -1 || start < -1 || end < -1)
m_errorReporter.syntaxError(
6367_error,
_commentLocation,
"Invalid value in source location mapping. "
"Expected non-negative integer values or -1 for source index and location."
);
else if (sourceIndex == -1)
return {{tail, SourceLocation{start.value(), end.value(), nullptr}}};
else if (!(sourceIndex >= 0 && m_sourceNames->count(static_cast<unsigned>(sourceIndex.value()))))
return {{tail, SourceLocation{start, end, nullptr}}};
else if (!(sourceIndex >= 0 && m_sourceNames->count(static_cast<unsigned>(sourceIndex))))
m_errorReporter.syntaxError(
2674_error,
_commentLocation,
"Invalid source mapping. Source index not defined via @use-src."
);
else
{
std::shared_ptr<std::string const> sourceName = m_sourceNames->at(static_cast<unsigned>(sourceIndex.value()));
solAssert(sourceName, "");
return {{tail, SourceLocation{start.value(), end.value(), std::move(sourceName)}}};
std::shared_ptr<std::string const> sourceName = m_sourceNames->at(static_cast<unsigned>(sourceIndex));
return {{tail, SourceLocation{start, end, std::move(sourceName)}}};
}
return {{tail, SourceLocation{}}};
}
Expand Down
1 change: 1 addition & 0 deletions scripts/error_codes.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ def examine_id_coverage(top_dir, source_id_to_file_names, new_ids_only=False):
# white list of ids which are not covered by tests
white_ids = {
"9804", # Tested in test/libyul/ObjectParser.cpp.
"1543",
"1544",
"1749",
"2674",
Expand Down
58 changes: 58 additions & 0 deletions test/libyul/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -767,6 +767,64 @@ BOOST_AUTO_TEST_CASE(customSourceLocations_two_locations_with_snippets_untermina
CHECK_LOCATION(result->debugData->originLocation, "", -1, -1);
}

BOOST_AUTO_TEST_CASE(customSourceLocations_single_quote)
{
ErrorList errorList;
ErrorReporter reporter(errorList);
auto const sourceText = R"(
/// @src 0:111:222 "
///
{}
)";
EVMDialectTyped const& dialect = EVMDialectTyped::instance(EVMVersion{});
std::shared_ptr<Block> result = parse(sourceText, dialect, reporter);
BOOST_REQUIRE(!!result);
BOOST_REQUIRE(errorList.size() == 1);
BOOST_TEST(errorList[0]->type() == Error::Type::SyntaxError);
BOOST_TEST(errorList[0]->errorId() == 1544_error);
CHECK_LOCATION(result->debugData->originLocation, "", -1, -1);
}

BOOST_AUTO_TEST_CASE(customSourceLocations_hex_snippet_disallowed)
{
ErrorList errorList;
ErrorReporter reporter(errorList);
auto const sourceText = R"(
/// @src 3:111:222 hex"ab12"
/// @src 2:111:222 unicode"ab12"
/// @src 1:123:321 'allowed'
/// @src 0:321:123 "allowed"
{}
)";
EVMDialectTyped const& dialect = EVMDialectTyped::instance(EVMVersion{});
std::shared_ptr<Block> result = parse(sourceText, dialect, reporter);
BOOST_REQUIRE(!!result);
BOOST_REQUIRE(errorList.size() == 2);
BOOST_TEST(errorList[0]->type() == Error::Type::SyntaxError);
BOOST_TEST(errorList[0]->errorId() == 1543_error);
BOOST_TEST(errorList[1]->type() == Error::Type::SyntaxError);
BOOST_TEST(errorList[1]->errorId() == 1543_error);
CHECK_LOCATION(result->debugData->originLocation, "source0", 321, 123);
}

BOOST_AUTO_TEST_CASE(customSourceLocations_multi_line_source_loc)
{
ErrorList errorList;
ErrorReporter reporter(errorList);
auto const sourceText = R"(
/// @src 1 : 111:
/// 222 "
/// abc\"def
///
/// " @src 0:333:444
{}
)";
EVMDialectTyped const& dialect = EVMDialectTyped::instance(EVMVersion{});
std::shared_ptr<Block> result = parse(sourceText, dialect, reporter);
BOOST_REQUIRE(!!result && errorList.empty());
CHECK_LOCATION(result->debugData->originLocation, "source0", 333, 444);
}

BOOST_AUTO_TEST_CASE(customSourceLocations_with_code_snippets_with_nested_locations)
{
ErrorList errorList;
Expand Down

0 comments on commit 7cf8e50

Please sign in to comment.