diff --git a/docs/src/reference.md b/docs/src/reference.md index 67ced3f1..bb9d3959 100644 --- a/docs/src/reference.md +++ b/docs/src/reference.md @@ -43,7 +43,7 @@ the source text more closely. * The right hand side of `x where {T}` retains the `K"braces"` node around the `T` to distinguish it from `x where T`. * Ternary syntax is not immediately lowered to an `if` node: `a ? b : c` parses as `(? a b c)` rather than `Expr(:if, :a, :b, :c)` (#85) * `global const` and `const global` are not normalized by the parser. This is done in `Expr` conversion (#130) -* The AST for `do` is flatter and not lowered to a lambda by the parser: `f(x) do y ; body end` is parsed as `(do (call f x) (tuple y) (block body))` (#98) +* [`do` syntax](#Do-blocks) is nested as the last child of the call which the `do` lambda will be passed to (#98, #322) * `@.` is not lowered to `@__dot__` inside the parser (#146) * Docstrings use the `K"doc"` kind, and are not lowered to `Core.@doc` until later (#217) * Juxtaposition uses the `K"juxtapose"` kind rather than lowering immediately to `*` (#220) @@ -78,7 +78,6 @@ class of tokenization errors and lets the parser deal with them. * We use flags rather than child nodes to represent the difference between `struct` and `mutable struct`, `module` and `baremodule` (#220) * Multiple iterations within the header of a `for`, as in `for a=as, b=bs body end` are represented with a `cartesian_iterator` head rather than a `block`, as these lists of iterators are neither semantically nor syntactically a sequence of statements. Unlike other uses of `block` (see also generators). - ## More detail on tree differences ### Generators @@ -196,23 +195,38 @@ The same goes for command strings which are always wrapped in `K"cmdstring"` regardless of whether they have multiple pieces (due to triple-quoted dedenting) or otherwise. -### No desugaring of the closure in do blocks +### Do blocks -The reference parser represents `do` syntax with a closure for the second -argument. That is, +`do` syntax is represented in the `Expr` AST with the `do` outside the call. +This makes some sense syntactically (do appears as "an operator" after the +function call). -```julia -f(x) do y - body -end -``` +However semantically this nesting is awkward because the lambda represented by +the do block is passed to the call. This same problem occurs for the macro form +`@f(x) do \n body end` where the macro expander needs a special rule to expand +nestings of the form `Expr(:do, Expr(:macrocall ...), ...)`, rearranging the +expression which are passed to this macro call rather than passing the +expressions up the tree. + +The implied closure is also lowered to a nested `Expr(:->)` expression, though +it this somewhat premature to do this during parsing. + +To resolve these problems we parse + + @f(x, y) do a, b\n body\n end + f(x, y) do a, b\n body\n end -becomes `(do (call f x) (-> (tuple y) (block body)))` in the reference parser. +by tacking the `do` onto the end of the call argument list: -However, the nested closure with `->` head is implied here rather than present -in the surface syntax, which suggests this is a premature desugaring step. -Instead we emit the flatter structure `(do (call f x) (tuple y) (block body))`. + (macrocall @f x y (do (tuple a b) body)) + (call f x y (do (tuple a b) body)) +This achieves the following desirable properties +1. Content of `do` is nested inside the call which improves the match between AST and semantics +2. Macro can be passed the syntax as-is rather than the macro expander rearranging syntax before passing it to the macro +3. In the future, a macro can detect when it's being passed do syntax rather than lambda syntax +4. `do` head is used uniformly for both call and macrocall +5. We preserve the source ordering properties we need for the green tree. ## Tree structure reference diff --git a/src/expr.jl b/src/expr.jl index f674b984..d5cef886 100644 --- a/src/expr.jl +++ b/src/expr.jl @@ -184,6 +184,16 @@ function _fixup_Expr_children!(head, loc, args) return args end +# Remove the `do` block from the final position in a function/macro call arg list +function _extract_do_lambda!(args) + if length(args) > 1 && Meta.isexpr(args[end], :do_lambda) + do_ex = pop!(args)::Expr + return Expr(:->, do_ex.args...) + else + return nothing + end +end + # Convert internal node of the JuliaSyntax parse tree to an Expr function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args) k = kind(head) @@ -217,8 +227,12 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, end end elseif k == K"macrocall" + do_lambda = _extract_do_lambda!(args) _reorder_parameters!(args, 2) insert!(args, 2, loc) + if do_lambda isa Expr + return Expr(:do, Expr(headsym, args...), do_lambda) + end elseif k == K"block" || (k == K"toplevel" && !has_flags(head, TOPLEVEL_SEMICOLONS_FLAG)) if isempty(args) push!(args, loc) @@ -247,6 +261,7 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, popfirst!(args) headsym = Symbol("'") end + do_lambda = _extract_do_lambda!(args) # Move parameters blocks to args[2] _reorder_parameters!(args, 2) if headsym === :dotcall @@ -259,6 +274,9 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, args[1] = Symbol(".", args[1]) end end + if do_lambda isa Expr + return Expr(:do, Expr(headsym, args...), do_lambda) + end elseif k == K"." if length(args) == 2 a2 = args[2] @@ -402,8 +420,9 @@ function _internal_node_to_Expr(source, srcrange, head, childranges, childheads, # as inert QuoteNode rather than in `Expr(:quote)` quasiquote return QuoteNode(a1) end - elseif k == K"do" && length(args) == 3 - return Expr(:do, args[1], Expr(:->, args[2], args[3])) + elseif k == K"do" + # Temporary head which is picked up by _extract_do_lambda + headsym = :do_lambda elseif k == K"let" a1 = args[1] if @isexpr(a1, :block) diff --git a/src/parser.jl b/src/parser.jl index 042e811d..0cc9383f 100644 --- a/src/parser.jl +++ b/src/parser.jl @@ -1510,12 +1510,12 @@ function parse_call_chain(ps::ParseState, mark, is_macrocall=false) bump_disallowed_space(ps) bump(ps, TRIVIA_FLAG) parse_call_arglist(ps, K")") - emit(ps, mark, is_macrocall ? K"macrocall" : K"call", - is_macrocall ? PARENS_FLAG : EMPTY_FLAGS) if peek(ps) == K"do" - # f(x) do y body end ==> (do (call f x) (tuple y) (block body)) - parse_do(ps, mark) + # f(x) do y body end ==> (call f x (do (tuple y) (block body))) + parse_do(ps) end + emit(ps, mark, is_macrocall ? K"macrocall" : K"call", + is_macrocall ? PARENS_FLAG : EMPTY_FLAGS) if is_macrocall # @x(a, b) ==> (macrocall-p @x a b) # A.@x(y) ==> (macrocall-p (. A @x) y) @@ -2266,18 +2266,19 @@ function parse_catch(ps::ParseState) end # flisp: parse-do -function parse_do(ps::ParseState, mark) +function parse_do(ps::ParseState) + mark = position(ps) bump(ps, TRIVIA_FLAG) # do ps = normal_context(ps) m = position(ps) if peek(ps) in KSet"NewlineWs ;" - # f() do\nend ==> (do (call f) (tuple) (block)) - # f() do ; body end ==> (do (call f) (tuple) (block body)) + # f() do\nend ==> (call f (do (tuple) (block))) + # f() do ; body end ==> (call f (do (tuple) (block body))) # this trivia needs to go into the tuple due to the way position() # works. bump(ps, TRIVIA_FLAG) else - # f() do x, y\n body end ==> (do (call f) (tuple x y) (block body)) + # f() do x, y\n body end ==> (call f (do (tuple x y) (block body))) parse_comma_separated(ps, parse_range) end emit(ps, m, K"tuple") diff --git a/test/expr.jl b/test/expr.jl index 96e711d7..810390c8 100644 --- a/test/expr.jl +++ b/test/expr.jl @@ -296,11 +296,39 @@ @testset "do block conversion" begin @test parsestmt("f(x) do y\n body end") == - Expr(:do, Expr(:call, :f, :x), + Expr(:do, + Expr(:call, :f, :x), Expr(:->, Expr(:tuple, :y), Expr(:block, LineNumberNode(2), :body))) + + @test parsestmt("@f(x) do y body end") == + Expr(:do, + Expr(:macrocall, Symbol("@f"), LineNumberNode(1), :x), + Expr(:->, Expr(:tuple, :y), + Expr(:block, + LineNumberNode(1), + :body))) + + @test parsestmt("f(x; a=1) do y body end") == + Expr(:do, + Expr(:call, :f, Expr(:parameters, Expr(:kw, :a, 1)), :x), + Expr(:->, Expr(:tuple, :y), + Expr(:block, + LineNumberNode(1), + :body))) + + # Test calls with do inside them + @test parsestmt("g(f(x) do y\n body end)") == + Expr(:call, + :g, + Expr(:do, + Expr(:call, :f, :x), + Expr(:->, Expr(:tuple, :y), + Expr(:block, + LineNumberNode(2), + :body)))) end @testset "= to Expr(:kw) conversion" begin @@ -708,7 +736,7 @@ @test parsestmt("(x", ignore_errors=true) == Expr(:block, :x, Expr(:error)) @test parsestmt("x do", ignore_errors=true) == - Expr(:block, :x, Expr(:error, Expr(:do))) + Expr(:block, :x, Expr(:error, Expr(:do_lambda))) end @testset "import" begin diff --git a/test/parser.jl b/test/parser.jl index 9291c7f6..992284c9 100644 --- a/test/parser.jl +++ b/test/parser.jl @@ -355,10 +355,11 @@ tests = [ "A.@x(y)" => "(macrocall-p (. A @x) y)" "A.@x(y).z" => "(. (macrocall-p (. A @x) y) z)" # do - "f() do\nend" => "(do (call f) (tuple) (block))" - "f() do ; body end" => "(do (call f) (tuple) (block body))" - "f() do x, y\n body end" => "(do (call f) (tuple x y) (block body))" - "f(x) do y body end" => "(do (call f x) (tuple y) (block body))" + "f() do\nend" => "(call f (do (tuple) (block)))" + "f() do ; body end" => "(call f (do (tuple) (block body)))" + "f() do x, y\n body end" => "(call f (do (tuple x y) (block body)))" + "f(x) do y body end" => "(call f x (do (tuple y) (block body)))" + "@f(x) do y body end" => "(macrocall-p @f x (do (tuple y) (block body)))" # square brackets "@S[a,b]" => "(macrocall @S (vect a b))"