From ddfb944b2458d242216b23e87b19401745be8da6 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Thu, 16 May 2024 11:32:53 +1000 Subject: [PATCH] Change AST for iterations to use `iteration` kind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `=` node which has traditionally been used for iteration specifications in `for` loops and generators doesn't have normal assignment semantics. Let's consider for x in xs body end which has been parsed as `(for (= x xs) (block body))`. Problems: * The iteration does create a binding for `x`, but not to the expression on the right hand side of the `=`. * The user may use `in` or `∈` in the source code rather than `=`. The parser still uses a `=` node for consistency but this only emphasizes that there's something a bit weird going on. So this use of `=` is not assignment; merely assignment-like. In this change, we use `in` instead of `=` and wrap this in an `iteration` node so that all iteration (including over multiple iterators) has the same structure. Thus the `for` loop above parses as `(for (iteration (in x xs)) (block body))` instead. The `cartesian_iteration` head naturally becomes `iteration` instead - being less specific here with the naming seems appropriate in trying to represent the surface syntax; cartesian semantics come later in lowering and a macro may decide to do something else with the iteration spec. These changes are also used for generators. After the changes we have tree structures such as julia> parsestmt(SyntaxNode, "for i in is body end") line:col│ tree │ file_name 1:1 │[for] 1:4 │ [iteration] 1:4 │ [in] 1:5 │ i 1:10 │ is 1:12 │ [block] 1:13 │ body julia> parsestmt(SyntaxNode, "for i in is, j in js body end") line:col│ tree │ file_name 1:1 │[for] 1:4 │ [iteration] 1:4 │ [in] 1:5 │ i 1:10 │ is 1:13 │ [in] 1:14 │ j 1:19 │ js 1:21 │ [block] 1:22 │ body julia> parsestmt(SyntaxNode, "[a for i = is, j = js if z]") line:col│ tree │ file_name 1:1 │[comprehension] 1:2 │ [generator] 1:2 │ a 1:7 │ [filter] 1:7 │ [iteration] 1:7 │ [in] 1:8 │ i 1:12 │ is 1:15 │ [in] 1:16 │ j 1:20 │ js 1:26 │ z julia> parsestmt(SyntaxNode, "[a for i = is for j = js if z]") line:col│ tree │ file_name 1:1 │[comprehension] 1:2 │ [generator] 1:2 │ a 1:7 │ [iteration] 1:7 │ [in] 1:8 │ i 1:12 │ is 1:18 │ [filter] 1:18 │ [iteration] 1:18 │ [in] 1:19 │ j 1:23 │ js 1:29 │ z --- docs/src/reference.md | 20 +++++++------- src/expr.jl | 34 +++++++++++------------- src/kinds.jl | 2 +- src/parser.jl | 43 ++++++++++++++---------------- test/parser.jl | 62 +++++++++++++++++++++---------------------- 5 files changed, 78 insertions(+), 83 deletions(-) diff --git a/docs/src/reference.md b/docs/src/reference.md index 2ae2cef1..a98662ee 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -76,7 +76,7 @@ class of tokenization errors and lets the parser deal with them. * Using `try catch else finally end` is parsed with `K"catch"` `K"else"` and `K"finally"` children to avoid the awkwardness of the optional child nodes in the `Expr` representation (#234) * The dotted import path syntax as in `import A.b.c` is parsed with a `K"importpath"` kind rather than `K"."`, because a bare `A.b.c` has a very different nested/quoted expression representation (#244) * We use flags rather than child nodes to represent the difference between `struct` and `mutable struct`, `module` and `baremodule` (#220) -* Multiple iterations within the header of a `for`, as in `for a=as, b=bs body end` are represented with a `cartesian_iterator` head rather than a `block`, as these lists of iterators are neither semantically nor syntactically a sequence of statements. Unlike other uses of `block` (see also generators). +* Iterations are represented with the `iteration` head rather than `=` within the header of a `for`. Thus `for i=is ; body end` parses to `(for (iteration i is) (block body))`. Cartesian iteration as in `for a=as, b=bs body end` are represented with a longer `iteration` block rather than a `block` containing `=` because these lists of iterators are neither semantically nor syntactically a sequence of statements, unlike other uses of `block`. Generators also use the `iteration` head - see information on that below. ## More detail on tree differences @@ -90,8 +90,10 @@ mean ``` for x in xs -for y in ys - push!(xy, collection) + for y in ys + push!(xy, collection) + end +end ``` so the `xy` prefix is in the *body* of the innermost for loop. Following this, @@ -112,8 +114,8 @@ source order. However, our green tree is strictly source-ordered, so we must deviate from the Julia AST. We deal with this by grouping cartesian products of iterators -(separated by commas) within `cartesian_iterator` blocks as in `for` loops, and -use the presence of multiple iterator blocks rather than the `flatten` head to +(separated by commas) within `iteration` blocks as in `for` loops, and +use the length of the `iteration` block rather than the `flatten` head to distinguish flattened iterators. The nested flattens and generators of `Expr` forms are reconstructed later. In this form the tree structure resembles the source much more closely. For example, `(xy for x in xs for y in ys)` is parsed as @@ -121,8 +123,8 @@ source much more closely. For example, `(xy for x in xs for y in ys)` is parsed ``` (generator xy - (= x xs) - (= y ys)) + (iteration x xs) + (iteration y ys)) ``` And the cartesian iteration `(xy for x in xs, y in ys)` is parsed as @@ -130,9 +132,7 @@ And the cartesian iteration `(xy for x in xs, y in ys)` is parsed as ``` (generator xy - (cartesian_iterator - (= x xs) - (= y ys))) + (iteration x xs y ys)) ``` ### Whitespace trivia inside strings diff --git a/src/expr.jl b/src/expr.jl index d600a99b..adda1463 100644 --- a/src/expr.jl +++ b/src/expr.jl @@ -198,6 +198,17 @@ function _extract_do_lambda!(args) end end +function _append_iterspec!(args, ex) + if @isexpr(ex, :iteration) + for iter in ex.args::Vector{Any} + push!(args, Expr(:(=), iter.args...)) + end + else + push!(args, ex) + end + return args +end + # Convert internal node of the JuliaSyntax parse tree to an Expr function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args) k = kind(head) @@ -301,10 +312,8 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, # Move parameters blocks to args[2] _reorder_parameters!(args, 2) elseif k == K"for" - a1 = args[1] - if @isexpr(a1, :cartesian_iterator) - args[1] = Expr(:block, a1.args...) - end + iters = _append_iterspec!([], args[1]) + args[1] = length(iters) == 1 ? only(iters) : Expr(:block, iters...) # Add extra line number node for the `end` of the block. This may seem # useless but it affects code coverage. push!(args[2].args, endloc) @@ -360,12 +369,8 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, # source-ordered `generator` format. gen = args[1] for j = length(args):-1:2 - aj = args[j] - if @isexpr(aj, :cartesian_iterator) - gen = Expr(:generator, gen, aj.args...) - else - gen = Expr(:generator, gen, aj) - end + gen = Expr(:generator, gen) + _append_iterspec!(gen.args, args[j]) if j < length(args) # Additional `for`s flatten the inner generator gen = Expr(:flatten, gen) @@ -374,14 +379,7 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, return gen elseif k == K"filter" @assert length(args) == 2 - iterspec = args[1] - outargs = Any[args[2]] - if @isexpr(iterspec, :cartesian_iterator) - append!(outargs, iterspec.args) - else - push!(outargs, iterspec) - end - args = outargs + args = _append_iterspec!(Any[args[2]], args[1]) elseif k == K"nrow" || k == K"ncat" # For lack of a better place, the dimension argument to nrow/ncat # is stored in the flags diff --git a/src/kinds.jl b/src/kinds.jl index bf837716..0ef0b815 100644 --- a/src/kinds.jl +++ b/src/kinds.jl @@ -1099,7 +1099,7 @@ register_kinds!(JuliaSyntax, 0, [ # Comprehensions "generator" "filter" - "cartesian_iterator" + "iteration" "comprehension" "typed_comprehension" # Container for a single statement/atom plus any trivia and errors diff --git a/src/parser.jl b/src/parser.jl index 8401002c..9a2ac703 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -1075,7 +1075,7 @@ function parse_where_chain(ps0::ParseState, mark) # x where {T,S} ==> (where x (braces T S)) # Also various nonsensical forms permitted # x where {T S} ==> (where x (bracescat (row T S))) - # x where {y for y in ys} ==> (where x (braces (generator y (= y ys)))) + # x where {y for y in ys} ==> (where x (braces (generator y (iteration (in y ys))))) m = position(ps) bump(ps, TRIVIA_FLAG) ckind, cflags = parse_cat(ps, K"}", ps.end_symbol) @@ -1578,7 +1578,7 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) # T[x y] ==> (typed_hcat T x y) # T[x ; y] ==> (typed_vcat T x y) # T[a b; c d] ==> (typed_vcat T (row a b) (row c d)) - # T[x for x in xs] ==> (typed_comprehension T (generator x (= x xs))) + # T[x for x in xs] ==> (typed_comprehension T (generator x (iteration (in x xs)))) #v1.8: T[a ; b ;; c ; d] ==> (typed_ncat-2 T (nrow-1 a b) (nrow-1 c d)) outk = ckind == K"vect" ? K"ref" : ckind == K"hcat" ? K"typed_hcat" : @@ -1798,8 +1798,8 @@ function parse_resword(ps::ParseState) bump_closing_token(ps, K"end") emit(ps, mark, K"while") elseif word == K"for" - # for x in xs end ==> (for (= x xs) (block)) - # for x in xs, y in ys \n a \n end ==> (for (cartesian_iterator (= x xs) (= y ys)) (block a)) + # for x in xs end ==> (for (iteration (in x xs)) (block)) + # for x in xs, y in ys \n a \n end ==> (for (iteration (in x xs) (in y ys)) (block a)) bump(ps, TRIVIA_FLAG) parse_iteration_specs(ps) parse_block(ps) @@ -2621,11 +2621,11 @@ function parse_iteration_spec(ps::ParseState) if peek_behind(ps).orig_kind == K"outer" if peek_skip_newline_in_gen(ps) in KSet"= in ∈" # Not outer keyword - # outer = rhs ==> (= outer rhs) - # outer <| x = rhs ==> (= (call-i outer <| x) rhs) + # outer = rhs ==> (iteration (in outer rhs)) + # outer <| x = rhs ==> (iteration (in (call-i outer <| x) rhs)) else - # outer i = rhs ==> (= (outer i) rhs) - # outer (x,y) = rhs ==> (= (outer (tuple-p x y)) rhs) + # outer i = rhs ==> (iteration (in (outer i) rhs)) + # outer (x,y) = rhs ==> (iteration (in (outer (tuple-p x y)) rhs)) reset_node!(ps, position(ps), kind=K"outer", flags=TRIVIA_FLAG) parse_pipe_lt(ps) emit(ps, mark, K"outer") @@ -2641,7 +2641,7 @@ function parse_iteration_spec(ps::ParseState) end # Or try parse_pipe_lt ??? end - emit(ps, mark, K"=") + emit(ps, mark, K"in") end # Parse an iteration spec, or a comma separate list of such for for loops and @@ -2649,9 +2649,7 @@ end function parse_iteration_specs(ps::ParseState) mark = position(ps) n_iters = parse_comma_separated(ps, parse_iteration_spec) - if n_iters > 1 - emit(ps, mark, K"cartesian_iterator") - end + emit(ps, mark, K"iteration") end # flisp: parse-space-separated-exprs @@ -2701,19 +2699,19 @@ end # Parse generators # # We represent generators quite differently from `Expr`: -# * Cartesian products of iterators are grouped within cartesian_iterator +# * Iteration variables and their iterators are grouped within K"iteration" # nodes, as in the short form of `for` loops. # * The `generator` kind is used for both cartesian and flattened generators # -# (x for a in as for b in bs) ==> (parens (generator x (= a as) (= b bs))) -# (x for a in as, b in bs) ==> (parens (generator x (cartesian_iterator (= a as) (= b bs)))) -# (x for a in as, b in bs if z) ==> (parens (generator x (filter (cartesian_iterator (= a as) (= b bs)) z))) +# (x for a in as for b in bs) ==> (parens (generator x (iteration (in a as)) (iteration (in b bs)))) +# (x for a in as, b in bs) ==> (parens (generator x (iteration (in a as) (in b bs)))) +# (x for a in as, b in bs if z) ==> (parens (generator x (filter (iteration (in a as) (in b bs)) z))) # # flisp: parse-generator function parse_generator(ps::ParseState, mark) while (t = peek_token(ps); kind(t) == K"for") if !preceding_whitespace(t) - # ((x)for x in xs) ==> (parens (generator (parens x) (error) (= x xs))) + # ((x)for x in xs) ==> (parens (generator (parens x) (error) (iteration (in x xs)))) bump_invisible(ps, K"error", TRIVIA_FLAG, error="Expected space before `for` in generator") end @@ -2721,7 +2719,7 @@ function parse_generator(ps::ParseState, mark) iter_mark = position(ps) parse_iteration_specs(ps) if peek(ps) == K"if" - # (x for a in as if z) ==> (parens (generator x (filter (= a as) z))) + # (x for a in as if z) ==> (parens (generator x (filter (iteration (in a as)) z))) bump(ps, TRIVIA_FLAG) parse_cond(ps) emit(ps, iter_mark, K"filter") @@ -2732,7 +2730,7 @@ end # flisp: parse-comprehension function parse_comprehension(ps::ParseState, mark, closer) - # [x for a in as] ==> (comprehension (generator x a in as)) + # [x for a in as] ==> (comprehension (generator x (iteration (in a as)))) ps = ParseState(ps, whitespace_newline=true, space_sensitive=false, end_symbol=false) @@ -2982,8 +2980,8 @@ function parse_cat(ps::ParseState, closer, end_is_symbol) # [x ==> (vect x (error-t)) parse_vect(ps, closer) elseif k == K"for" - # [x for a in as] ==> (comprehension (generator x (= a as))) - # [x \n\n for a in as] ==> (comprehension (generator x (= a as))) + # [x for a in as] ==> (comprehension (generator x (iteration (in a as)))) + # [x \n\n for a in as] ==> (comprehension (generator x (iteration (in a as)))) parse_comprehension(ps, mark, closer) else # [x y] ==> (hcat x y) @@ -3139,8 +3137,7 @@ function parse_brackets(after_parse::Function, continue elseif k == K"for" # Generator syntax - # (x for a in as) ==> (parens (generator x (= a as))) - # (x \n\n for a in as) ==> (parens (generator x (= a as))) + # (x for a in as) ==> (parens (generator x (iteration (in a as)))) parse_generator(ps, mark) else # Error - recovery done when consuming closing_kind diff --git a/test/parser.jl b/test/parser.jl index 1e4baa66..1a157e3d 100644 --- a/test/parser.jl +++ b/test/parser.jl @@ -300,7 +300,7 @@ tests = [ "x where \n {T}" => "(where x (braces T))" "x where {T,S}" => "(where x (braces T S))" "x where {T S}" => "(where x (bracescat (row T S)))" - "x where {y for y in ys}" => "(where x (braces (generator y (= y ys))))" + "x where {y for y in ys}" => "(where x (braces (generator y (iteration (in y ys)))))" "x where T" => "(where x T)" "x where \n T" => "(where x T)" "x where T<:S" => "(where x (<: T S))" @@ -389,7 +389,7 @@ tests = [ "T[x y]" => "(typed_hcat T x y)" "T[x ; y]" => "(typed_vcat T x y)" "T[a b; c d]" => "(typed_vcat T (row a b) (row c d))" - "T[x for x in xs]" => "(typed_comprehension T (generator x (= x xs)))" + "T[x for x in xs]" => "(typed_comprehension T (generator x (iteration (in x xs))))" ((v=v"1.8",), "T[a ; b ;; c ; d]") => "(typed_ncat-2 T (nrow-1 a b) (nrow-1 c d))" # Dotted forms @@ -461,8 +461,8 @@ tests = [ "while cond body end" => "(while cond (block body))" "while x < y \n a \n b \n end" => "(while (call-i x < y) (block a b))" # for - "for x in xs end" => "(for (= x xs) (block))" - "for x in xs, y in ys \n a \n end" => "(for (cartesian_iterator (= x xs) (= y ys)) (block a))" + "for x in xs end" => "(for (iteration (in x xs)) (block))" + "for x in xs, y in ys \n a \n end" => "(for (iteration (in x xs) (in y ys)) (block a))" # let "let x=1\n end" => "(let (block (= x 1)) (block))" "let x=1 ; end" => "(let (block (= x 1)) (block))" @@ -670,16 +670,16 @@ tests = [ "import A..." => "(import (importpath A ..))" "import A; B" => "(import (importpath A))" ], - JuliaSyntax.parse_iteration_spec => [ - "i = rhs" => "(= i rhs)" - "i in rhs" => "(= i rhs)" - "i ∈ rhs" => "(= i rhs)" - "i = 1:10" => "(= i (call-i 1 : 10))" - "(i,j) in iter" => "(= (tuple-p i j) iter)" - "outer = rhs" => "(= outer rhs)" - "outer <| x = rhs" => "(= (call-i outer <| x) rhs)" - "outer i = rhs" => "(= (outer i) rhs)" - "outer (x,y) = rhs" => "(= (outer (tuple-p x y)) rhs)" + JuliaSyntax.parse_iteration_specs => [ + "i = rhs" => "(iteration (in i rhs))" + "i in rhs" => "(iteration (in i rhs))" + "i ∈ rhs" => "(iteration (in i rhs))" + "i = 1:10" => "(iteration (in i (call-i 1 : 10)))" + "(i,j) in iter" => "(iteration (in (tuple-p i j) iter))" + "outer = rhs" => "(iteration (in outer rhs))" + "outer <| x = rhs" => "(iteration (in (call-i outer <| x) rhs))" + "outer i = rhs" => "(iteration (in (outer i) rhs))" + "outer (x,y) = rhs" => "(iteration (in (outer (tuple-p x y)) rhs))" ], JuliaSyntax.parse_paren => [ # Tuple syntax with commas @@ -707,8 +707,8 @@ tests = [ "(x)" => "(parens x)" "(a...)" => "(parens (... a))" # Generators - "(x for a in as)" => "(parens (generator x (= a as)))" - "(x \n\n for a in as)" => "(parens (generator x (= a as)))" + "(x for a in as)" => "(parens (generator x (iteration (in a as))))" + "(x \n\n for a in as)" => "(parens (generator x (iteration (in a as))))" # Range parsing in parens "(1:\n2)" => "(parens (call-i 1 : 2))" "(1:2)" => "(parens (call-i 1 : 2))" @@ -776,19 +776,19 @@ tests = [ "[x \n, ]" => "(vect x)" "[x" => "(vect x (error-t))" "[x \n\n ]" => "(vect x)" - "[x for a in as]" => "(comprehension (generator x (= a as)))" - "[x \n\n for a in as]" => "(comprehension (generator x (= a as)))" + "[x for a in as]" => "(comprehension (generator x (iteration (in a as))))" + "[x \n\n for a in as]" => "(comprehension (generator x (iteration (in a as))))" # parse_generator - "(x for a in as for b in bs)" => "(parens (generator x (= a as) (= b bs)))" - "(x for a in as, b in bs)" => "(parens (generator x (cartesian_iterator (= a as) (= b bs))))" - "(x for a in as, b in bs if z)" => "(parens (generator x (filter (cartesian_iterator (= a as) (= b bs)) z)))" - "(x for a in as, b in bs for c in cs, d in ds)" => "(parens (generator x (cartesian_iterator (= a as) (= b bs)) (cartesian_iterator (= c cs) (= d ds))))" - "(x for a in as for b in bs if z)" => "(parens (generator x (= a as) (filter (= b bs) z)))" - "(x for a in as if z for b in bs)" => "(parens (generator x (filter (= a as) z) (= b bs)))" - "[x for a = as for b = bs if cond1 for c = cs if cond2]" => "(comprehension (generator x (= a as) (filter (= b bs) cond1) (filter (= c cs) cond2)))" - "[x for a = as if begin cond2 end]" => "(comprehension (generator x (filter (= a as) (block cond2))))" - "[(x)for x in xs]" => "(comprehension (generator (parens x) (error-t) (= x xs)))" - "(x for a in as if z)" => "(parens (generator x (filter (= a as) z)))" + "(x for a in as for b in bs)" => "(parens (generator x (iteration (in a as)) (iteration (in b bs))))" + "(x for a in as, b in bs)" => "(parens (generator x (iteration (in a as) (in b bs))))" + "(x for a in as, b in bs if z)" => "(parens (generator x (filter (iteration (in a as) (in b bs)) z)))" + "(x for a in as, b in bs for c in cs, d in ds)" => "(parens (generator x (iteration (in a as) (in b bs)) (iteration (in c cs) (in d ds))))" + "(x for a in as for b in bs if z)" => "(parens (generator x (iteration (in a as)) (filter (iteration (in b bs)) z)))" + "(x for a in as if z for b in bs)" => "(parens (generator x (filter (iteration (in a as)) z) (iteration (in b bs))))" + "[x for a = as for b = bs if cond1 for c = cs if cond2]" => "(comprehension (generator x (iteration (in a as)) (filter (iteration (in b bs)) cond1) (filter (iteration (in c cs)) cond2)))" + "[x for a = as if begin cond2 end]" => "(comprehension (generator x (filter (iteration (in a as)) (block cond2))))" + "[(x)for x in xs]" => "(comprehension (generator (parens x) (error-t) (iteration (in x xs))))" + "(x for a in as if z)" => "(parens (generator x (filter (iteration (in a as)) z)))" # parse_vect "[x, y]" => "(vect x y)" "[x, y]" => "(vect x y)" @@ -876,8 +876,8 @@ tests = [ "\"hi\$(\"ho\")\"" => "(string \"hi\" (parens (string \"ho\")))" "\"\$(x,y)\"" => "(string (parens (error x y)))" "\"\$(x;y)\"" => "(string (parens (error x y)))" - "\"\$(x for y in z)\"" => "(string (parens (error (generator x (= y z)))))" - "\"\$((x for y in z))\"" => "(string (parens (parens (generator x (= y z)))))" + "\"\$(x for y in z)\"" => "(string (parens (error (generator x (iteration (in y z))))))" + "\"\$((x for y in z))\"" => "(string (parens (parens (generator x (iteration (in y z))))))" "\"\$(xs...)\"" => "(string (parens (... xs)))" "\"a \$foo b\"" => "(string \"a \" foo \" b\")" "\"\$var\"" => "(string var)" @@ -996,7 +996,7 @@ parsestmt_test_specs = [ ":+'y'" => "(juxtapose (call-post (quote-: +) ') (call-post y '))" # unary subtype ops and newlines "a +\n\n<:" => "(call-i a + <:)" - "for\n\n<:" => "(for (= <: (error (error-t))) (block (error)) (error-t))" + "for\n\n<:" => "(for (iteration (in <: (error (error-t)))) (block (error)) (error-t))" # Empty character consumes trailing ' delimiter (ideally this could be # tested above but we don't require the input stream to be consumed in the # unit tests there.