Skip to content

Commit

Permalink
fix string scanning
Browse files Browse the repository at this point in the history
Fix handling of '$' char in strings.

Fixes Wilfred#15
  • Loading branch information
cstrahan committed Jul 22, 2021
1 parent 50f38ce commit 37325a9
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 0 deletions.
50 changes: 50 additions & 0 deletions corpus/basic.txt
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,56 @@ This works, too: ''$
(interpolation (identifier))
(escape_sequence)))

====================
string ($)
====================

[
"$"
"$\n"
"${x}"
"$${x}"
"$$${x}"
]

---

(source_expression
(list
(string)
(string
(escape_sequence))
(string
(interpolation (identifier)))
(string)
(string
(interpolation (identifier)))))

====================
indented string ($)
====================

[
''$''
''$''\n''
''${x}''
''$${x}''
''$$${x}''
]

---

(source_expression
(list
(indented_string)
(indented_string
(escape_sequence))
(indented_string
(interpolation (identifier)))
(indented_string)
(indented_string
(interpolation (identifier)))))

====================
uri
====================
Expand Down
21 changes: 21 additions & 0 deletions src/scanner.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,14 @@ static void skip(TSLexer *lexer) {
}

static bool scan_str(TSLexer *lexer) {
// We want to delegate the scanning of the start-of-string/end-of-string '"'
// character to the grammar defined in grammar.js.
// So the idea is we track if we've seen any string content,
// and if we see an unescaped '"' char _and_ we haven't consumed any string content,
// we return false to indicate to tree-sitter that our custom scanner has not found
// a token.
bool has_content = false;

lexer->result_symbol = STR_CONTENT;

while (true) {
Expand Down Expand Up @@ -49,6 +56,11 @@ static bool scan_str(TSLexer *lexer) {
} else {
return false;
}
} else if (lexer->lookahead != '"' && lexer->lookahead != '\\' ) {
// any char following '$' other than '"', '\\' and '{' (which was handled above)
// should be consumed as additional string content.
advance(lexer);
lexer->mark_end(lexer);
}
has_content = true;
break;
Expand All @@ -68,8 +80,11 @@ static bool scan_str(TSLexer *lexer) {
}

static bool scan_ind_str(TSLexer *lexer) {
// See the comment about has_content in scan_str().
bool has_content = false;

lexer->result_symbol = IND_STR_CONTENT;

while (true) {
switch (lexer->lookahead) {
case '$':
Expand All @@ -81,7 +96,13 @@ static bool scan_ind_str(TSLexer *lexer) {
} else {
return false;
}
} else if (lexer->lookahead != '\'') {
// any char following '$' other than '\'' and '{' (which was handled above)
// should be consumed as additional string content.
advance(lexer);
lexer->mark_end(lexer);
}
has_content = true;
break;
case '\'':
lexer->mark_end(lexer);
Expand Down

0 comments on commit 37325a9

Please sign in to comment.