Skip to content

Commit 5bca035

Browse files
committed
AsmParser: Parsing rules for source location comments have been relaxed: Whitespace between the indices as well as single-quoted code snippets are now allowed.
1 parent ce4be6e commit 5bca035

File tree

5 files changed

+141
-44
lines changed

5 files changed

+141
-44
lines changed

Changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ Compiler Features:
1313

1414

1515
Bugfixes:
16+
* AsmParser: Parsing rules for source location comments have been relaxed: Whitespace between the indices as well as single-quoted code snippets are now allowed.
1617
* SMTChecker: Fix error that reports invalid number of verified checks for BMC and CHC engines.
1718
* SMTChecker: Fix internal compiler error when reporting proved targets for BMC engine.
1819
* TypeChecker: Fix segfault when assigning nested tuple to tuple.

liblangutil/Scanner.cpp

Lines changed: 31 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -826,31 +826,44 @@ Token Scanner::scanString(bool const _isUnicode)
826826
char const quote = m_char;
827827
advance(); // consume quote
828828
LiteralScope literal(this, LITERAL_TYPE_STRING);
829-
while (m_char != quote && !isSourcePastEndOfInput() && !isUnicodeLinebreak())
829+
// for source location comments we allow multiline string literals
830+
while (m_char != quote && !isSourcePastEndOfInput() && (!isUnicodeLinebreak() || m_kind == ScannerKind::SpecialComment))
830831
{
831832
char c = m_char;
832833
advance();
833-
if (c == '\\')
834+
835+
if (m_kind == ScannerKind::SpecialComment)
834836
{
835-
if (isSourcePastEndOfInput() || !scanEscape())
836-
return setError(ScannerError::IllegalEscapeSequence);
837+
if (c == '\\')
838+
scanEscape();
839+
else
840+
addLiteralChar(c);
837841
}
838842
else
839843
{
840-
// Report error on non-printable characters in string literals, however
841-
// allow anything for unicode string literals, because their validity will
842-
// be verified later (in the syntax checker).
843-
//
844-
// We are using a manual range and not isprint() to avoid
845-
// any potential complications with locale.
846-
if (!_isUnicode && (static_cast<unsigned>(c) <= 0x1f || static_cast<unsigned>(c) >= 0x7f))
844+
if (c == '\\')
847845
{
848-
if (m_kind == ScannerKind::Yul)
849-
return setError(ScannerError::IllegalCharacterInString);
850-
return setError(ScannerError::UnicodeCharacterInNonUnicodeString);
846+
if (isSourcePastEndOfInput() || !scanEscape())
847+
return setError(ScannerError::IllegalEscapeSequence);
848+
}
849+
else
850+
{
851+
// Report error on non-printable characters in string literals, however
852+
// allow anything for unicode string literals, because their validity will
853+
// be verified later (in the syntax checker).
854+
//
855+
// We are using a manual range and not isprint() to avoid
856+
// any potential complications with locale.
857+
if (!_isUnicode && (static_cast<unsigned>(c) <= 0x1f || static_cast<unsigned>(c) >= 0x7f))
858+
{
859+
if (m_kind == ScannerKind::Yul)
860+
return setError(ScannerError::IllegalCharacterInString);
861+
return setError(ScannerError::UnicodeCharacterInNonUnicodeString);
862+
}
863+
addLiteralChar(c);
851864
}
852-
addLiteralChar(c);
853865
}
866+
854867
}
855868
if (m_char != quote)
856869
return setError(ScannerError::IllegalStringEndQuote);
@@ -1023,6 +1036,9 @@ std::tuple<Token, unsigned, unsigned> Scanner::scanIdentifierOrKeyword()
10231036
auto const token = TokenTraits::fromIdentifierOrKeyword(m_tokens[NextNext].literal);
10241037
switch (m_kind)
10251038
{
1039+
case ScannerKind::SpecialComment:
1040+
// there are no keywords in special comments
1041+
return std::make_tuple(Token::Identifier, 0, 0);
10261042
case ScannerKind::Solidity:
10271043
// Turn experimental Solidity keywords that are not keywords in legacy Solidity into identifiers.
10281044
if (TokenTraits::isExperimentalSolidityOnlyKeyword(std::get<0>(token)))

liblangutil/Scanner.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -62,15 +62,12 @@
6262
namespace solidity::langutil
6363
{
6464

65-
class AstRawString;
66-
class AstValueFactory;
67-
class ParserRecorder;
68-
6965
enum class ScannerKind
7066
{
7167
Solidity,
7268
Yul,
73-
ExperimentalSolidity
69+
ExperimentalSolidity,
70+
SpecialComment
7471
};
7572

7673
enum class ScannerError

libyul/AsmParser.cpp

Lines changed: 55 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,10 @@
2727
#include <liblangutil/ErrorReporter.h>
2828
#include <liblangutil/Exceptions.h>
2929
#include <liblangutil/Scanner.h>
30+
#include <liblangutil/Common.h>
3031
#include <libsolutil/Common.h>
3132
#include <libsolutil/Visitor.h>
3233

33-
#include <range/v3/view/subrange.hpp>
34-
3534
#include <boost/algorithm/string.hpp>
3635

3736
#include <algorithm>
@@ -52,7 +51,7 @@ std::optional<int> toInt(std::string const& _value)
5251
{
5352
return stoi(_value);
5453
}
55-
catch (...)
54+
catch (std::out_of_range const&)
5655
{
5756
return std::nullopt;
5857
}
@@ -192,13 +191,42 @@ std::optional<std::pair<std::string_view, SourceLocation>> Parser::parseSrcComme
192191
langutil::SourceLocation const& _commentLocation
193192
)
194193
{
195-
static std::regex const argsRegex = std::regex(
196-
R"~~(^(-1|\d+):(-1|\d+):(-1|\d+)(?:\s+|$))~~" // index and location, e.g.: 1:234:-1
197-
R"~~(("(?:[^"\\]|\\.)*"?)?)~~", // optional code snippet, e.g.: "string memory s = \"abc\";..."
198-
std::regex_constants::ECMAScript | std::regex_constants::optimize
199-
);
200-
std::match_results<std::string_view::const_iterator> match;
201-
if (!regex_search(_arguments.cbegin(), _arguments.cend(), match, argsRegex))
194+
CharStream argumentStream (std::string(_arguments), "");
195+
Scanner scanner (argumentStream);
196+
scanner.setScannerMode(ScannerKind::SpecialComment);
197+
198+
std::string_view tail { _arguments.substr(_arguments.size()) };
199+
auto const parseLocationComponent = [](Scanner& _scanner, bool expectTrailingColon) -> std::optional<std::string>
200+
{
201+
bool negative = false;
202+
if (_scanner.currentToken() == Token::Sub)
203+
{
204+
negative = true;
205+
_scanner.next();
206+
}
207+
if (_scanner.currentToken() != Token::Number)
208+
return std::nullopt;
209+
if (expectTrailingColon && _scanner.peekNextToken() != Token::Colon)
210+
return std::nullopt;
211+
if (!isValidDecimal(_scanner.currentLiteral()))
212+
return std::nullopt;
213+
std::string decimal = (negative ? "-" : "") + _scanner.currentLiteral();
214+
_scanner.next();
215+
if (expectTrailingColon)
216+
_scanner.next();
217+
return decimal;
218+
};
219+
std::optional<std::string> rawSourceIndex = parseLocationComponent(scanner, true);
220+
std::optional<std::string> rawStart = parseLocationComponent(scanner, true);
221+
std::optional<std::string> rawEnd = parseLocationComponent(scanner, false);
222+
223+
size_t const snippetStart = static_cast<size_t>(scanner.currentLocation().start);
224+
bool const locationScannedSuccessfully = rawSourceIndex && rawStart && rawEnd;
225+
bool const locationIsWhitespaceSeparated =
226+
scanner.peekNextToken() == Token::EOS ||
227+
(snippetStart > 0 && langutil::isWhiteSpace(_arguments[snippetStart - 1]));
228+
229+
if (!locationScannedSuccessfully || !locationIsWhitespaceSeparated)
202230
{
203231
m_errorReporter.syntaxError(
204232
8387_error,
@@ -208,13 +236,12 @@ std::optional<std::pair<std::string_view, SourceLocation>> Parser::parseSrcComme
208236
return std::nullopt;
209237
}
210238

211-
solAssert(match.size() == 5, "");
212-
std::string_view tail = _arguments.substr(static_cast<size_t>(match.position() + match.length()));
239+
if (scanner.currentToken() == Token::StringLiteral || (scanner.currentToken() == Token::Illegal && scanner.currentError() == ScannerError::IllegalStringEndQuote))
240+
tail = _arguments.substr(static_cast<size_t>(scanner.currentLocation().end));
241+
else
242+
tail = _arguments.substr(static_cast<size_t>(scanner.currentLocation().start));
213243

214-
if (match[4].matched && (
215-
!boost::algorithm::ends_with(match[4].str(), "\"") ||
216-
boost::algorithm::ends_with(match[4].str(), "\\\"")
217-
))
244+
if (scanner.currentToken() == Token::Illegal && scanner.currentError() == ScannerError::IllegalStringEndQuote)
218245
{
219246
m_errorReporter.syntaxError(
220247
1544_error,
@@ -224,30 +251,34 @@ std::optional<std::pair<std::string_view, SourceLocation>> Parser::parseSrcComme
224251
return {{tail, SourceLocation{}}};
225252
}
226253

227-
std::optional<int> const sourceIndex = toInt(match[1].str());
228-
std::optional<int> const start = toInt(match[2].str());
229-
std::optional<int> const end = toInt(match[3].str());
254+
std::optional<int> const sourceIndex = toInt(*rawSourceIndex);
255+
std::optional<int> const start = toInt(*rawStart);
256+
std::optional<int> const end = toInt(*rawEnd);
230257

231-
if (!sourceIndex.has_value() || !start.has_value() || !end.has_value())
258+
if (
259+
!sourceIndex.has_value() || *sourceIndex < -1 ||
260+
!start.has_value() || *start < -1 ||
261+
!end.has_value() || *end < -1
262+
)
232263
m_errorReporter.syntaxError(
233264
6367_error,
234265
_commentLocation,
235266
"Invalid value in source location mapping. "
236267
"Expected non-negative integer values or -1 for source index and location."
237268
);
238269
else if (sourceIndex == -1)
239-
return {{tail, SourceLocation{start.value(), end.value(), nullptr}}};
240-
else if (!(sourceIndex >= 0 && m_sourceNames->count(static_cast<unsigned>(sourceIndex.value()))))
270+
return {{tail, SourceLocation{*start, *end, nullptr}}};
271+
else if (!(sourceIndex >= 0 && m_sourceNames->count(static_cast<unsigned>(*sourceIndex))))
241272
m_errorReporter.syntaxError(
242273
2674_error,
243274
_commentLocation,
244275
"Invalid source mapping. Source index not defined via @use-src."
245276
);
246277
else
247278
{
248-
std::shared_ptr<std::string const> sourceName = m_sourceNames->at(static_cast<unsigned>(sourceIndex.value()));
279+
std::shared_ptr<std::string const> sourceName = m_sourceNames->at(static_cast<unsigned>(*sourceIndex));
249280
solAssert(sourceName, "");
250-
return {{tail, SourceLocation{start.value(), end.value(), std::move(sourceName)}}};
281+
return {{tail, SourceLocation{*start, *end, std::move(sourceName)}}};
251282
}
252283
return {{tail, SourceLocation{}}};
253284
}

test/libyul/Parser.cpp

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -767,6 +767,58 @@ BOOST_AUTO_TEST_CASE(customSourceLocations_two_locations_with_snippets_untermina
767767
CHECK_LOCATION(result->debugData->originLocation, "", -1, -1);
768768
}
769769

770+
BOOST_AUTO_TEST_CASE(customSourceLocations_single_quote)
771+
{
772+
ErrorList errorList;
773+
ErrorReporter reporter(errorList);
774+
auto const sourceText = R"(
775+
/// @src 0:111:222 "
776+
///
777+
{}
778+
)";
779+
EVMDialectTyped const& dialect = EVMDialectTyped::instance(EVMVersion{});
780+
std::shared_ptr<Block> result = parse(sourceText, dialect, reporter);
781+
BOOST_REQUIRE(!!result);
782+
BOOST_REQUIRE(errorList.size() == 1);
783+
BOOST_TEST(errorList[0]->type() == Error::Type::SyntaxError);
784+
BOOST_TEST(errorList[0]->errorId() == 1544_error);
785+
CHECK_LOCATION(result->debugData->originLocation, "", -1, -1);
786+
}
787+
788+
BOOST_AUTO_TEST_CASE(customSourceLocations_two_snippets_with_hex_comment)
789+
{
790+
ErrorList errorList;
791+
ErrorReporter reporter(errorList);
792+
auto const sourceText = R"(
793+
/// @src 0:111:222 hex"abc"@src 1:333:444 "abc"
794+
{}
795+
)";
796+
EVMDialectTyped const& dialect = EVMDialectTyped::instance(EVMVersion{});
797+
std::shared_ptr<Block> result = parse(sourceText, dialect, reporter);
798+
BOOST_REQUIRE(!!result && errorList.size() == 0);
799+
// the second source location is not parsed as such, as the hex string isn't interpreted as snippet but
800+
// as the beginning of the tail in AsmParser
801+
CHECK_LOCATION(result->debugData->originLocation, "source0", 111, 222);
802+
}
803+
804+
BOOST_AUTO_TEST_CASE(customSourceLocations_multi_line_source_loc)
805+
{
806+
ErrorList errorList;
807+
ErrorReporter reporter(errorList);
808+
auto const sourceText = R"(
809+
/// @src 1 : 111:
810+
/// 222 "
811+
/// abc\"def
812+
///
813+
/// " @src 0:333:444
814+
{}
815+
)";
816+
EVMDialectTyped const& dialect = EVMDialectTyped::instance(EVMVersion{});
817+
std::shared_ptr<Block> result = parse(sourceText, dialect, reporter);
818+
BOOST_REQUIRE(!!result && errorList.empty());
819+
CHECK_LOCATION(result->debugData->originLocation, "source0", 333, 444);
820+
}
821+
770822
BOOST_AUTO_TEST_CASE(customSourceLocations_with_code_snippets_with_nested_locations)
771823
{
772824
ErrorList errorList;

0 commit comments

Comments
 (0)