From 1170bb6b12456c5d14693078c38620272a0a7910 Mon Sep 17 00:00:00 2001 From: Claire Foster Date: Sat, 1 Jul 2023 05:31:19 +1000 Subject: [PATCH] AST: Rearrange `do` to sit inside `call`/`macrocall` `do` syntax is represented in `Expr` with the `do` outside the call. This makes some sense syntactically (do appears as "an operator" after the function call). However semantically this nesting is awkward because the lambda represented by the do block is passed to the call. This same problem occurs for the macro form `@f(x) do \n body end` where the macro expander needs a special rule to expand nestings of the form `Expr(:do, Expr(:macrocall ...), ...)`, rearranging the expression which are passed to this macro call rather than passing the expressions up the tree. In this PR, we change the parsing of @f(x, y) do a, b\n body\n end f(x, y) do a, b\n body\n end to tack the `do` onto the end of the call argument list: (macrocall @f x y (do (tuple a b) body)) (call f x y (do (tuple a b) body)) This achieves the following desirable properties 1. Content of `do` is nested inside the call which improves the match between AST and semantics 2. Macro can be passed the syntax as-is rather than the macro expander rearranging syntax before passing it to the macro 3. In the future, a macro can detect when it's being passed do syntax rather than lambda syntax 4. `do` head is used uniformly for both call and macrocall 5. We preserve the source ordering properties we need for the green tree. --- docs/src/reference.md | 42 ++++++++++++++++++++++++++++-------------- src/expr.jl | 23 +++++++++++++++++++++-- src/parser.jl | 17 +++++++++-------- test/expr.jl | 32 ++++++++++++++++++++++++++++++-- test/parser.jl | 9 +++++---- 5 files changed, 93 insertions(+), 30 deletions(-) diff --git a/docs/src/reference.md b/docs/src/reference.md index 67ced3f1..bb9d3959 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -43,7 +43,7 @@ the source text more closely. * The right hand side of `x where {T}` retains the `K"braces"` node around the `T` to distinguish it from `x where T`. * Ternary syntax is not immediately lowered to an `if` node: `a ? b : c` parses as `(? a b c)` rather than `Expr(:if, :a, :b, :c)` (#85) * `global const` and `const global` are not normalized by the parser. This is done in `Expr` conversion (#130) -* The AST for `do` is flatter and not lowered to a lambda by the parser: `f(x) do y ; body end` is parsed as `(do (call f x) (tuple y) (block body))` (#98) +* [`do` syntax](#Do-blocks) is nested as the last child of the call which the `do` lambda will be passed to (#98, #322) * `@.` is not lowered to `@__dot__` inside the parser (#146) * Docstrings use the `K"doc"` kind, and are not lowered to `Core.@doc` until later (#217) * Juxtaposition uses the `K"juxtapose"` kind rather than lowering immediately to `*` (#220) @@ -78,7 +78,6 @@ class of tokenization errors and lets the parser deal with them. * We use flags rather than child nodes to represent the difference between `struct` and `mutable struct`, `module` and `baremodule` (#220) * Multiple iterations within the header of a `for`, as in `for a=as, b=bs body end` are represented with a `cartesian_iterator` head rather than a `block`, as these lists of iterators are neither semantically nor syntactically a sequence of statements. Unlike other uses of `block` (see also generators). - ## More detail on tree differences ### Generators @@ -196,23 +195,38 @@ The same goes for command strings which are always wrapped in `K"cmdstring"` regardless of whether they have multiple pieces (due to triple-quoted dedenting) or otherwise. -### No desugaring of the closure in do blocks +### Do blocks -The reference parser represents `do` syntax with a closure for the second -argument. That is, +`do` syntax is represented in the `Expr` AST with the `do` outside the call. +This makes some sense syntactically (do appears as "an operator" after the +function call). -```julia -f(x) do y - body -end -``` +However semantically this nesting is awkward because the lambda represented by +the do block is passed to the call. This same problem occurs for the macro form +`@f(x) do \n body end` where the macro expander needs a special rule to expand +nestings of the form `Expr(:do, Expr(:macrocall ...), ...)`, rearranging the +expression which are passed to this macro call rather than passing the +expressions up the tree. + +The implied closure is also lowered to a nested `Expr(:->)` expression, though +it this somewhat premature to do this during parsing. + +To resolve these problems we parse + + @f(x, y) do a, b\n body\n end + f(x, y) do a, b\n body\n end -becomes `(do (call f x) (-> (tuple y) (block body)))` in the reference parser. +by tacking the `do` onto the end of the call argument list: -However, the nested closure with `->` head is implied here rather than present -in the surface syntax, which suggests this is a premature desugaring step. -Instead we emit the flatter structure `(do (call f x) (tuple y) (block body))`. + (macrocall @f x y (do (tuple a b) body)) + (call f x y (do (tuple a b) body)) +This achieves the following desirable properties +1. Content of `do` is nested inside the call which improves the match between AST and semantics +2. Macro can be passed the syntax as-is rather than the macro expander rearranging syntax before passing it to the macro +3. In the future, a macro can detect when it's being passed do syntax rather than lambda syntax +4. `do` head is used uniformly for both call and macrocall +5. We preserve the source ordering properties we need for the green tree. ## Tree structure reference diff --git a/src/expr.jl b/src/expr.jl index f674b984..d5cef886 100644 --- a/src/expr.jl +++ b/src/expr.jl @@ -184,6 +184,16 @@ function _fixup_Expr_children!(head, loc, args) return args end +# Remove the `do` block from the final position in a function/macro call arg list +function _extract_do_lambda!(args) + if length(args) > 1 && Meta.isexpr(args[end], :do_lambda) + do_ex = pop!(args)::Expr + return Expr(:->, do_ex.args...) + else + return nothing + end +end + # Convert internal node of the JuliaSyntax parse tree to an Expr function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args) k = kind(head) @@ -217,8 +227,12 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, end end elseif k == K"macrocall" + do_lambda = _extract_do_lambda!(args) _reorder_parameters!(args, 2) insert!(args, 2, loc) + if do_lambda isa Expr + return Expr(:do, Expr(headsym, args...), do_lambda) + end elseif k == K"block" || (k == K"toplevel" && !has_flags(head, TOPLEVEL_SEMICOLONS_FLAG)) if isempty(args) push!(args, loc) @@ -247,6 +261,7 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, popfirst!(args) headsym = Symbol("'") end + do_lambda = _extract_do_lambda!(args) # Move parameters blocks to args[2] _reorder_parameters!(args, 2) if headsym === :dotcall @@ -259,6 +274,9 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args[1] = Symbol(".", args[1]) end end + if do_lambda isa Expr + return Expr(:do, Expr(headsym, args...), do_lambda) + end elseif k == K"." if length(args) == 2 a2 = args[2] @@ -402,8 +420,9 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, # as inert QuoteNode rather than in `Expr(:quote)` quasiquote return QuoteNode(a1) end - elseif k == K"do" && length(args) == 3 - return Expr(:do, args[1], Expr(:->, args[2], args[3])) + elseif k == K"do" + # Temporary head which is picked up by _extract_do_lambda + headsym = :do_lambda elseif k == K"let" a1 = args[1] if @isexpr(a1, :block) diff --git a/src/parser.jl b/src/parser.jl index 042e811d..0cc9383f 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -1510,12 +1510,12 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) parse_call_arglist(ps, K")") - emit(ps, mark, is_macrocall ? K"macrocall" : K"call", - is_macrocall ? PARENS_FLAG : EMPTY_FLAGS) if peek(ps) == K"do" - # f(x) do y body end ==> (do (call f x) (tuple y) (block body)) - parse_do(ps, mark) + # f(x) do y body end ==> (call f x (do (tuple y) (block body))) + parse_do(ps) end + emit(ps, mark, is_macrocall ? K"macrocall" : K"call", + is_macrocall ? PARENS_FLAG : EMPTY_FLAGS) if is_macrocall # @x(a, b) ==> (macrocall-p @x a b) # A.@x(y) ==> (macrocall-p (. A @x) y) @@ -2266,18 +2266,19 @@ function parse_catch(ps::ParseState) end # flisp: parse-do -function parse_do(ps::ParseState, mark) +function parse_do(ps::ParseState) + mark = position(ps) bump(ps, TRIVIA_FLAG) # do ps = normal_context(ps) m = position(ps) if peek(ps) in KSet"NewlineWs ;" - # f() do\nend ==> (do (call f) (tuple) (block)) - # f() do ; body end ==> (do (call f) (tuple) (block body)) + # f() do\nend ==> (call f (do (tuple) (block))) + # f() do ; body end ==> (call f (do (tuple) (block body))) # this trivia needs to go into the tuple due to the way position() # works. bump(ps, TRIVIA_FLAG) else - # f() do x, y\n body end ==> (do (call f) (tuple x y) (block body)) + # f() do x, y\n body end ==> (call f (do (tuple x y) (block body))) parse_comma_separated(ps, parse_range) end emit(ps, m, K"tuple") diff --git a/test/expr.jl b/test/expr.jl index 96e711d7..810390c8 100644 --- a/test/expr.jl +++ b/test/expr.jl @@ -296,11 +296,39 @@ @testset "do block conversion" begin @test parsestmt("f(x) do y\n body end") == - Expr(:do, Expr(:call, :f, :x), + Expr(:do, + Expr(:call, :f, :x), Expr(:->, Expr(:tuple, :y), Expr(:block, LineNumberNode(2), :body))) + + @test parsestmt("@f(x) do y body end") == + Expr(:do, + Expr(:macrocall, Symbol("@f"), LineNumberNode(1), :x), + Expr(:->, Expr(:tuple, :y), + Expr(:block, + LineNumberNode(1), + :body))) + + @test parsestmt("f(x; a=1) do y body end") == + Expr(:do, + Expr(:call, :f, Expr(:parameters, Expr(:kw, :a, 1)), :x), + Expr(:->, Expr(:tuple, :y), + Expr(:block, + LineNumberNode(1), + :body))) + + # Test calls with do inside them + @test parsestmt("g(f(x) do y\n body end)") == + Expr(:call, + :g, + Expr(:do, + Expr(:call, :f, :x), + Expr(:->, Expr(:tuple, :y), + Expr(:block, + LineNumberNode(2), + :body)))) end @testset "= to Expr(:kw) conversion" begin @@ -708,7 +736,7 @@ @test parsestmt("(x", ignore_errors=true) == Expr(:block, :x, Expr(:error)) @test parsestmt("x do", ignore_errors=true) == - Expr(:block, :x, Expr(:error, Expr(:do))) + Expr(:block, :x, Expr(:error, Expr(:do_lambda))) end @testset "import" begin diff --git a/test/parser.jl b/test/parser.jl index 9291c7f6..992284c9 100644 --- a/test/parser.jl +++ b/test/parser.jl @@ -355,10 +355,11 @@ tests = [ "A.@x(y)" => "(macrocall-p (. A @x) y)" "A.@x(y).z" => "(. (macrocall-p (. A @x) y) z)" # do - "f() do\nend" => "(do (call f) (tuple) (block))" - "f() do ; body end" => "(do (call f) (tuple) (block body))" - "f() do x, y\n body end" => "(do (call f) (tuple x y) (block body))" - "f(x) do y body end" => "(do (call f x) (tuple y) (block body))" + "f() do\nend" => "(call f (do (tuple) (block)))" + "f() do ; body end" => "(call f (do (tuple) (block body)))" + "f() do x, y\n body end" => "(call f (do (tuple x y) (block body)))" + "f(x) do y body end" => "(call f x (do (tuple y) (block body)))" + "@f(x) do y body end" => "(macrocall-p @f x (do (tuple y) (block body)))" # square brackets "@S[a,b]" => "(macrocall @S (vect a b))"