Skip to content

Commit

Permalink
Fix path interpolation
Browse files Browse the repository at this point in the history
  • Loading branch information
ncfavier committed May 29, 2022
1 parent 9e1aba2 commit b4470a1
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 65 deletions.
91 changes: 61 additions & 30 deletions src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ impl<'a> Tokenizer<'a> {
F: FnMut(char) -> bool,
{
let mut len = 0;
while self.peek().map(|c| f(c)).unwrap_or(false) {
while self.peek().map_or(false, |c| f(c)) {
self.next().unwrap();
len += 1;
}
Expand Down Expand Up @@ -154,6 +154,17 @@ impl<'a> Tokenizer<'a> {
}
}
}

fn path_since(&mut self, past: State) -> Option<(SyntaxKind, SmolStr)> {
self.consume(is_valid_path_char);
let path = self.string_since(past);
if self.remaining().starts_with("${") {
self.ctx.last_mut().unwrap().todo = Some(Todo::Path);
} else if path.ends_with('/') {
return Some((TOKEN_ERROR, path));
}
return Some((TOKEN_PATH, path));
}
}
impl<'a> Iterator for Tokenizer<'a> {
type Item = (SyntaxKind, SmolStr);
Expand All @@ -171,7 +182,6 @@ impl<'a> Iterator for Tokenizer<'a> {
}
}
Some(Todo::Path) => {
*todo = Some(Todo::Path);
if self.starts_with_bump("${") {
self.ctx.push(Context {
interpol: Some(Interpol {
Expand All @@ -183,6 +193,8 @@ impl<'a> Iterator for Tokenizer<'a> {
todo: None,
});
return Some((TOKEN_INTERPOL_START, self.string_since(start)));
} else if self.peek().map_or(false, is_valid_path_char) {
return self.path_since(start);
}
}
Some(Todo::StringBody { multiline }) => {
Expand Down Expand Up @@ -223,10 +235,6 @@ impl<'a> Iterator for Tokenizer<'a> {
}

if self.consume(char::is_whitespace) > 0 {
let ctx = self.ctx.last_mut().unwrap();
if matches!(ctx.todo, Some(Todo::Path)) {
ctx.todo = None;
}
return Some((TOKEN_WHITESPACE, self.string_since(start)));
}

Expand Down Expand Up @@ -256,10 +264,9 @@ impl<'a> Iterator for Tokenizer<'a> {
// Check if it's a path
let store_path = self.peek() == Some('<');
let kind = {
let mut lookahead = self.remaining().chars().skip_while(|c| match c {
'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '.' | '+' | '-' => true,
let mut lookahead = self.remaining().chars().skip_while(|&c| match c {
'<' | '/' => store_path,
_ => false,
_ => is_valid_path_char(c),
});
match (lookahead.next(), lookahead.next()) {
// a//b parses as Update(a, b)
Expand All @@ -278,14 +285,7 @@ impl<'a> Iterator for Tokenizer<'a> {
if c == '~' && self.next() != Some('/') {
return Some((TOKEN_ERROR, self.string_since(start)));
}
self.consume(is_valid_path_char);
let ident = self.string_since(start);
if self.remaining().starts_with("${") {
self.ctx.last_mut().unwrap().todo = Some(Todo::Path);
} else if ident.ends_with('/') {
return Some((TOKEN_ERROR, ident));
}
return Some((TOKEN_PATH, ident));
return self.path_since(start);
}

match c {
Expand Down Expand Up @@ -336,7 +336,7 @@ impl<'a> Iterator for Tokenizer<'a> {
':' => Some((TOKEN_COLON, self.string_since(start))),
',' => Some((TOKEN_COMMA, self.string_since(start))),
'.' => {
if self.peek().map(|x| ('0'..='9').contains(&x)).unwrap_or(false) {
if self.peek().map_or(false, |x| ('0'..='9').contains(&x)) {
self.consume(|c| ('0'..='9').contains(&c));
Some((TOKEN_FLOAT, self.string_since(start)))
} else {
Expand Down Expand Up @@ -404,7 +404,6 @@ impl<'a> Iterator for Tokenizer<'a> {
Some(IdentType::Store) | None => IdentType::Ident,
Some(kind) => kind,
};
assert_ne!(kind, IdentType::Path, "paths are checked earlier");
self.consume(|c| match c {
'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '-' | '\'' => true,
c => kind == IdentType::Uri && is_valid_uri_char(c),
Expand All @@ -421,16 +420,11 @@ impl<'a> Iterator for Tokenizer<'a> {
"rec" => TOKEN_REC,
"then" => TOKEN_THEN,
"with" => TOKEN_WITH,
_ => {
if matches!(self.ctx.last_mut().unwrap().todo, Some(Todo::Path)) {
TOKEN_PATH
} else {
TOKEN_IDENT
}
},
_ => TOKEN_IDENT,
},
IdentType::Path | IdentType::Store => TOKEN_PATH,
IdentType::Uri => TOKEN_URI,
IdentType::Path => panic!("paths are checked earlier"),
IdentType::Store => panic!("store paths are checked earlier"),
};
Some((syntax_kind, ident))
}
Expand Down Expand Up @@ -490,6 +484,13 @@ mod tests {
};
}

fn path(path: &str) -> Vec<(SyntaxKind, SmolStr)> {
tokens![(TOKEN_PATH, path)]
}
fn error(token: &str) -> Vec<(SyntaxKind, SmolStr)> {
tokens![(TOKEN_ERROR, token)]
}

#[test]
fn basic_int_set() {
assert_eq!(
Expand Down Expand Up @@ -878,16 +879,16 @@ mod tests {
}
#[test]
fn paths() {
fn path(path: &str) -> Vec<(SyntaxKind, SmolStr)> {
tokens![(TOKEN_PATH, path)]
}
assert_eq!(tokenize("/hello/world"), path("/hello/world"));
assert_eq!(tokenize("hello/world"), path("hello/world"));
assert_eq!(tokenize("a+3/5+b"), path("a+3/5+b"));
assert_eq!(tokenize("1-2/3"), path("1-2/3"));
assert_eq!(tokenize("./hello/world"), path("./hello/world"));
assert_eq!(tokenize("~/hello/world"), path("~/hello/world"));
assert_eq!(tokenize("<hello/world>"), path("<hello/world>"));
assert_eq!(tokenize("~"), error("~"));
assert_eq!(tokenize("~/"), error("~/"));
assert_eq!(tokenize("/a/"), error("/a/"));
}
#[test]
fn test_path_no_newline() {
Expand Down Expand Up @@ -935,6 +936,36 @@ mod tests {
(TOKEN_INTERPOL_END, "}"),
]
);
assert_eq!(
tokenize("./${foo}let"),
tokens![
(TOKEN_PATH, "./"),
(TOKEN_INTERPOL_START, "${"),
(TOKEN_IDENT, "foo"),
(TOKEN_INTERPOL_END, "}"),
(TOKEN_PATH, "let"),
]
);
assert_eq!(
tokenize("./${foo}.jpg"),
tokens![
(TOKEN_PATH, "./"),
(TOKEN_INTERPOL_START, "${"),
(TOKEN_IDENT, "foo"),
(TOKEN_INTERPOL_END, "}"),
(TOKEN_PATH, ".jpg"),
]
);
assert_eq!(
tokenize("./${foo}/"),
tokens![
(TOKEN_PATH, "./"),
(TOKEN_INTERPOL_START, "${"),
(TOKEN_IDENT, "foo"),
(TOKEN_INTERPOL_END, "}"),
(TOKEN_ERROR, "/"),
]
);
assert_eq!(
tokenize("./${foo}a${bar}"),
tokens![
Expand Down
90 changes: 56 additions & 34 deletions test_data/parser/paths/2.expect
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
NODE_ROOT 0..72 {
NODE_LET_IN 0..72 {
NODE_ROOT 0..91 {
NODE_LET_IN 0..91 {
TOKEN_LET("let") 0..3
TOKEN_WHITESPACE("\n ") 3..6
NODE_KEY_VALUE 6..29 {
Expand Down Expand Up @@ -32,7 +32,7 @@ NODE_ROOT 0..72 {
TOKEN_SEMICOLON(";") 28..29
}
TOKEN_WHITESPACE("\n ") 29..32
NODE_KEY_VALUE 32..67 {
NODE_KEY_VALUE 32..86 {
NODE_KEY 32..33 {
NODE_IDENT 32..33 {
TOKEN_IDENT("b") 32..33
Expand All @@ -41,49 +41,71 @@ NODE_ROOT 0..72 {
TOKEN_WHITESPACE(" ") 33..34
TOKEN_ASSIGN("=") 34..35
TOKEN_WHITESPACE(" ") 35..36
NODE_APPLY 36..66 {
NODE_APPLY 36..53 {
NODE_APPLY 36..43 {
NODE_IDENT 36..37 {
TOKEN_IDENT("a") 36..37
NODE_APPLY 36..85 {
NODE_APPLY 36..66 {
NODE_APPLY 36..53 {
NODE_APPLY 36..43 {
NODE_IDENT 36..37 {
TOKEN_IDENT("a") 36..37
}
TOKEN_WHITESPACE(" ") 37..38
NODE_LITERAL 38..43 {
TOKEN_PATH("./bar") 38..43
}
}
TOKEN_WHITESPACE(" ") 37..38
NODE_LITERAL 38..43 {
TOKEN_PATH("./bar") 38..43
TOKEN_WHITESPACE(" ") 43..44
NODE_PATH_WITH_INTERPOL 44..53 {
TOKEN_PATH("./baz") 44..49
NODE_STRING_INTERPOL 49..53 {
TOKEN_INTERPOL_START("${") 49..51
NODE_IDENT 51..52 {
TOKEN_IDENT("x") 51..52
}
TOKEN_INTERPOL_END("}") 52..53
}
}
}
TOKEN_WHITESPACE(" ") 43..44
NODE_PATH_WITH_INTERPOL 44..53 {
TOKEN_PATH("./baz") 44..49
NODE_STRING_INTERPOL 49..53 {
TOKEN_INTERPOL_START("${") 49..51
NODE_IDENT 51..52 {
TOKEN_IDENT("x") 51..52
TOKEN_WHITESPACE(" ") 53..54
NODE_PATH_WITH_INTERPOL 54..66 {
TOKEN_PATH("./snens") 54..61
NODE_STRING_INTERPOL 61..65 {
TOKEN_INTERPOL_START("${") 61..63
NODE_IDENT 63..64 {
TOKEN_IDENT("x") 63..64
}
TOKEN_INTERPOL_END("}") 52..53
TOKEN_INTERPOL_END("}") 64..65
}
TOKEN_PATH("y") 65..66
}
}
TOKEN_WHITESPACE(" ") 53..54
NODE_PATH_WITH_INTERPOL 54..66 {
TOKEN_PATH("./snens") 54..61
NODE_STRING_INTERPOL 61..65 {
TOKEN_INTERPOL_START("${") 61..63
NODE_IDENT 63..64 {
TOKEN_IDENT("x") 63..64
TOKEN_WHITESPACE(" ") 66..67
NODE_PATH_WITH_INTERPOL 67..85 {
TOKEN_PATH("./qux") 67..72
NODE_STRING_INTERPOL 72..76 {
TOKEN_INTERPOL_START("${") 72..74
NODE_IDENT 74..75 {
TOKEN_IDENT("x") 74..75
}
TOKEN_INTERPOL_END("}") 75..76
}
TOKEN_PATH(".") 76..77
NODE_STRING_INTERPOL 77..81 {
TOKEN_INTERPOL_START("${") 77..79
NODE_IDENT 79..80 {
TOKEN_IDENT("y") 79..80
}
TOKEN_INTERPOL_END("}") 64..65
TOKEN_INTERPOL_END("}") 80..81
}
TOKEN_PATH("y") 65..66
TOKEN_PATH(".z/w") 81..85
}
}
TOKEN_SEMICOLON(";") 66..67
TOKEN_SEMICOLON(";") 85..86
}
TOKEN_WHITESPACE("\n") 67..68
TOKEN_IN("in") 68..70
TOKEN_WHITESPACE(" ") 70..71
NODE_IDENT 71..72 {
TOKEN_IDENT("b") 71..72
TOKEN_WHITESPACE("\n") 86..87
TOKEN_IN("in") 87..89
TOKEN_WHITESPACE(" ") 89..90
NODE_IDENT 90..91 {
TOKEN_IDENT("b") 90..91
}
}
}
2 changes: 1 addition & 1 deletion test_data/parser/paths/2.nix
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
let
a = f: ./foo${bar}/baz;
b = a ./bar ./baz${x} ./snens${x}y;
b = a ./bar ./baz${x} ./snens${x}y ./qux${x}.${y}.z/w;
in b

0 comments on commit b4470a1

Please sign in to comment.