From 18bd85e7b4114dc8307cf2f76ba19ac8604ca51b Mon Sep 17 00:00:00 2001 From: Shuhei Kadowaki Date: Tue, 6 Sep 2022 23:23:53 +0900 Subject: [PATCH] inlining: relax `finalizer` inlining control-flow restriction Eager `finalizer` inlining (#45272) currently has a restriction that requires all the def/uses to be in a same basic block. This commit relaxes that restriction a bit by allowing def/uses to involve control flow when all of them are dominated by a `finalizer` call to be inlined, since in that case it is safe to insert the body of `finalizer` at the end of all the def/uses, e.g. ```julia const FINALIZATION_COUNT = Ref(0) init_finalization_count!() = FINALIZATION_COUNT[] = 0 get_finalization_count() = FINALIZATION_COUNT[] @noinline add_finalization_count!(x) = FINALIZATION_COUNT[] += x @noinline Base.@assume_effects :nothrow safeprint(io::IO, x...) = (@nospecialize; print(io, x...)) mutable struct DoAllocWithFieldInter x::Int end function register_finalizer!(obj::DoAllocWithFieldInter) finalizer(obj) do this add_finalization_count!(this.x) end end function cfg_finalization3(io) for i = -999:1000 o = DoAllocWithFieldInter(i) register_finalizer!(o) if i == 1000 safeprint(io, o.x, '\n') elseif i > 0 safeprint(io, o.x) end end end let src = code_typed1(cfg_finalization3, (IO,)) @test count(isinvoke(:add_finalization_count!), src.code) == 1 end let init_finalization_count!() cfg_finalization3(IOBuffer()) @test get_finalization_count() == 1000 end ``` To support this transformation, the domtree code also gains the ability to represent post-dominator trees, which is generally useful. Co-authored-by: Keno Fischer --- base/compiler/ssair/domtree.jl | 137 ++++++++++++++++++++++------- base/compiler/ssair/passes.jl | 152 +++++++++++++++++++++++++-------- test/compiler/inline.jl | 110 ++++++++++++++++++++++++ test/compiler/irutils.jl | 7 +- test/compiler/ssair.jl | 5 +- 5 files changed, 340 insertions(+), 71 deletions(-) diff --git a/base/compiler/ssair/domtree.jl b/base/compiler/ssair/domtree.jl index 59016080b52040..b1fd68483041d6 100644 --- a/base/compiler/ssair/domtree.jl +++ b/base/compiler/ssair/domtree.jl @@ -109,10 +109,16 @@ end length(D::DFSTree) = length(D.from_pre) -function DFS!(D::DFSTree, blocks::Vector{BasicBlock}) +function DFS!(D::DFSTree, blocks::Vector{BasicBlock}, is_post_dominator::Bool) copy!(D, DFSTree(length(blocks))) - to_visit = Tuple{BBNumber, PreNumber, Bool}[(1, 0, false)] - pre_num = 1 + if is_post_dominator + # TODO: We're using -1 as the virtual exit node here. Would it make + # sense to actually have a real BB for the exit always? + to_visit = Tuple{BBNumber, PreNumber, Bool}[(-1, 0, false)] + else + to_visit = Tuple{BBNumber, PreNumber, Bool}[(1, 0, false)] + end + pre_num = is_post_dominator ? 0 : 1 post_num = 1 while !isempty(to_visit) # Because we want the postorder number as well as the preorder number, @@ -123,12 +129,14 @@ function DFS!(D::DFSTree, blocks::Vector{BasicBlock}) if pushed_children # Going up the DFS tree, so all we need to do is record the # postorder number, then move on - D.to_post[current_node_bb] = post_num - D.from_post[post_num] = current_node_bb + if current_node_bb != -1 + D.to_post[current_node_bb] = post_num + D.from_post[post_num] = current_node_bb + end post_num += 1 pop!(to_visit) - elseif D.to_pre[current_node_bb] != 0 + elseif current_node_bb != -1 && D.to_pre[current_node_bb] != 0 # Node has already been visited, move on pop!(to_visit) continue @@ -136,15 +144,24 @@ function DFS!(D::DFSTree, blocks::Vector{BasicBlock}) # Going down the DFS tree # Record preorder number - D.to_pre[current_node_bb] = pre_num - D.from_pre[pre_num] = current_node_bb - D.to_parent_pre[pre_num] = parent_pre + if current_node_bb != -1 + D.to_pre[current_node_bb] = pre_num + D.from_pre[pre_num] = current_node_bb + D.to_parent_pre[pre_num] = parent_pre + end # Record that children (will) have been pushed to_visit[end] = (current_node_bb, parent_pre, true) + if is_post_dominator && current_node_bb == -1 + edges = Int[bb for bb in 1:length(blocks) if isempty(blocks[bb].succs)] + else + edges = is_post_dominator ? blocks[current_node_bb].preds : + blocks[current_node_bb].succs + end + # Push children to the stack - for succ_bb in blocks[current_node_bb].succs + for succ_bb in edges push!(to_visit, (succ_bb, pre_num, false)) end @@ -161,7 +178,7 @@ function DFS!(D::DFSTree, blocks::Vector{BasicBlock}) return D end -DFS(blocks::Vector{BasicBlock}) = DFS!(DFSTree(0), blocks) +DFS(blocks::Vector{BasicBlock}, is_post_dominator::Bool=false) = DFS!(DFSTree(0), blocks, is_post_dominator) """ Keeps the per-BB state of the Semi NCA algorithm. In the original formulation, @@ -184,7 +201,7 @@ end DomTreeNode() = DomTreeNode(1, Vector{BBNumber}()) "Data structure that encodes which basic block dominates which." -struct DomTree +struct GenericDomTree{IsPostDom} # These can be reused when updating domtree dynamically dfs_tree::DFSTree snca_state::Vector{SNCAData} @@ -195,19 +212,25 @@ struct DomTree # The nodes in the tree (ordered by BB indices) nodes::Vector{DomTreeNode} end +const DomTree = GenericDomTree{false} +const PostDomTree = GenericDomTree{true} -function DomTree() - return DomTree(DFSTree(0), SNCAData[], BBNumber[], DomTreeNode[]) +function (T::Type{<:GenericDomTree})() + return T(DFSTree(0), SNCAData[], BBNumber[], DomTreeNode[]) end function construct_domtree(blocks::Vector{BasicBlock}) return update_domtree!(blocks, DomTree(), true, 0) end -function update_domtree!(blocks::Vector{BasicBlock}, domtree::DomTree, - recompute_dfs::Bool, max_pre::PreNumber) +function construct_postdomtree(blocks::Vector{BasicBlock}) + return update_domtree!(blocks, PostDomTree(), true, 0) +end + +function update_domtree!(blocks::Vector{BasicBlock}, domtree::GenericDomTree{IsPostDom}, + recompute_dfs::Bool, max_pre::PreNumber) where {IsPostDom} if recompute_dfs - DFS!(domtree.dfs_tree, blocks) + DFS!(domtree.dfs_tree, blocks, IsPostDom) end if max_pre == 0 @@ -219,17 +242,24 @@ function update_domtree!(blocks::Vector{BasicBlock}, domtree::DomTree, return domtree end -function compute_domtree_nodes!(domtree::DomTree) +function compute_domtree_nodes!(domtree::GenericDomTree{IsPostDom}) where {IsPostDom} # Compute children copy!(domtree.nodes, DomTreeNode[DomTreeNode() for _ in 1:length(domtree.idoms_bb)]) for (idx, idom) in Iterators.enumerate(domtree.idoms_bb) - (idx == 1 || idom == 0) && continue + ((!IsPostDom && idx == 1) || idom == 0) && continue push!(domtree.nodes[idom].children, idx) end # n.b. now issorted(domtree.nodes[*].children) since idx is sorted above # Recursively set level - update_level!(domtree.nodes, 1, 1) + if IsPostDom + for (node, idom) in enumerate(domtree.idoms_bb) + idom == 0 || continue + update_level!(domtree.nodes, node, 1) + end + else + update_level!(domtree.nodes, 1, 1) + end return domtree.nodes end @@ -244,13 +274,18 @@ function update_level!(nodes::Vector{DomTreeNode}, node::BBNumber, level::Int) end end +dom_edges(domtree::DomTree, blocks::Vector{BasicBlock}, idx::BBNumber) = + blocks[idx].preds +dom_edges(domtree::PostDomTree, blocks::Vector{BasicBlock}, idx::BBNumber) = + blocks[idx].succs + """ The main Semi-NCA algorithm. Matches Figure 2.8 in [LG05]. Note that the pseudocode in [LG05] is not entirely accurate. The best way to understand what's happening is to read [LT79], then the description of SLT in [LG05] (warning: inconsistent notation), then the description of Semi-NCA. """ -function SNCA!(domtree::DomTree, blocks::Vector{BasicBlock}, max_pre::PreNumber) +function SNCA!(domtree::GenericDomTree{IsPostDom}, blocks::Vector{BasicBlock}, max_pre::PreNumber) where {IsPostDom} D = domtree.dfs_tree state = domtree.snca_state # There may be more blocks than are reachable in the DFS / dominator tree @@ -289,13 +324,14 @@ function SNCA!(domtree::DomTree, blocks::Vector{BasicBlock}, max_pre::PreNumber) # Calculate semidominators, but only for blocks with preorder number up to # max_pre ancestors = copy(D.to_parent_pre) - for w::PreNumber in reverse(2:max_pre) + relevant_blocks = IsPostDom ? (1:max_pre) : (2:max_pre) + for w::PreNumber in reverse(relevant_blocks) # LLVM initializes this to the parent, the paper initializes this to # `w`, but it doesn't really matter (the parent is a predecessor, so at # worst we'll discover it below). Save a memory reference here. semi_w = typemax(PreNumber) last_linked = PreNumber(w + 1) - for v ∈ blocks[D.from_pre[w]].preds + for v ∈ dom_edges(domtree, blocks, D.from_pre[w]) # For the purpose of the domtree, ignore virtual predecessors into # catch blocks. v == 0 && continue @@ -331,7 +367,7 @@ function SNCA!(domtree::DomTree, blocks::Vector{BasicBlock}, max_pre::PreNumber) # ancestor in the (immediate) dominator tree between its semidominator and # its parent (see Lemma 2.6 in [LG05]). idoms_pre = copy(D.to_parent_pre) - for v in 2:n_nodes + for v in (IsPostDom ? (1:n_nodes) : (2:n_nodes)) idom = idoms_pre[v] vsemi = state[v].semi while idom > vsemi @@ -343,10 +379,11 @@ function SNCA!(domtree::DomTree, blocks::Vector{BasicBlock}, max_pre::PreNumber) # Express idoms in BB indexing resize!(domtree.idoms_bb, n_blocks) for i::BBNumber in 1:n_blocks - if i == 1 || D.to_pre[i] == 0 + if (!IsPostDom && i == 1) || D.to_pre[i] == 0 domtree.idoms_bb[i] = 0 else - domtree.idoms_bb[i] = D.from_pre[idoms_pre[D.to_pre[i]]] + ip = idoms_pre[D.to_pre[i]] + domtree.idoms_bb[i] = ip == 0 ? 0 : D.from_pre[ip] end end end @@ -549,7 +586,12 @@ Checks if `bb1` dominates `bb2`. `bb1` dominates `bb2` if the only way to enter `bb2` is via `bb1`. (Other blocks may be in between, e.g `bb1->bbx->bb2`). """ -function dominates(domtree::DomTree, bb1::BBNumber, bb2::BBNumber) +dominates(domtree::DomTree, bb1::BBNumber, bb2::BBNumber) = + _dominates(domtree, bb1, bb2) +postdominates(domtree::PostDomTree, bb1::BBNumber, bb2::BBNumber) = + _dominates(domtree, bb1, bb2) + +function _dominates(domtree::GenericDomTree, bb1::BBNumber, bb2::BBNumber) bb1 == bb2 && return true target_level = domtree.nodes[bb1].level source_level = domtree.nodes[bb2].level @@ -584,19 +626,48 @@ function iterate(doms::DominatedBlocks, state::Nothing=nothing) return (bb, nothing) end -function naive_idoms(blocks::Vector{BasicBlock}) +""" + nearest_common_dominator(domtree::GenericDomTree, a::BBNumber, b::BBNumber) + +Compute the nearest common (post-)dominator of `a` and `b`. +""" +function nearest_common_dominator(domtree::GenericDomTree, a::BBNumber, b::BBNumber) + alevel = domtree.nodes[a].level + blevel = domtree.nodes[b].level + # W.l.g. assume blevel <= alevel + if alevel < blevel + a, b = b, a + alevel, blevel = blevel, alevel + end + while alevel > blevel + a = domtree.idoms_bb[a] + alevel -= 1 + end + while a != b && a != 0 + a = domtree.idoms_bb[a] + b = domtree.idoms_bb[b] + end + @assert a == b + return a +end + +function naive_idoms(blocks::Vector{BasicBlock}, is_post_dominator::Bool=false) nblocks = length(blocks) # The extra +1 helps us detect unreachable blocks below dom_all = BitSet(1:nblocks+1) - dominators = BitSet[n == 1 ? BitSet(1) : copy(dom_all) for n = 1:nblocks] + dominators = is_post_dominator ? + BitSet[isempty(blocks[n].succs) ? BitSet(n) : copy(dom_all) for n = 1:nblocks] : + BitSet[n == 1 ? BitSet(1) : copy(dom_all) for n = 1:nblocks] changed = true + relevant_blocks = (is_post_dominator ? (1:nblocks) : (2:nblocks)) while changed changed = false - for n = 2:nblocks - if isempty(blocks[n].preds) + for n in relevant_blocks + edges = is_post_dominator ? blocks[n].succs : blocks[n].preds + if isempty(edges) continue end - firstp, rest = Iterators.peel(Iterators.filter(p->p != 0, blocks[n].preds))::NTuple{2,Any} + firstp, rest = Iterators.peel(Iterators.filter(p->p != 0, edges))::NTuple{2,Any} new_doms = copy(dominators[firstp]) for p in rest intersect!(new_doms, dominators[p]) @@ -608,7 +679,7 @@ function naive_idoms(blocks::Vector{BasicBlock}) end # Compute idoms idoms = fill(0, nblocks) - for i = 2:nblocks + for i in relevant_blocks if dominators[i] == dom_all idoms[i] = 0 continue diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index 70ae94e611a1fb..00577526ac2eb2 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -595,16 +595,21 @@ function is_old(compact, @nospecialize(old_node_ssa)) !already_inserted(compact, old_node_ssa) end -mutable struct LazyDomtree +mutable struct LazyGenericDomtree{IsPostDom} ir::IRCode - domtree::DomTree - LazyDomtree(ir::IRCode) = new(ir) + domtree::GenericDomTree{IsPostDom} + LazyGenericDomtree{IsPostDom}(ir::IRCode) where {IsPostDom} = new{IsPostDom}(ir) end -function get!(x::LazyDomtree) +function get!(x::LazyGenericDomtree{IsPostDom}) where {IsPostDom} isdefined(x, :domtree) && return x.domtree - return @timeit "domtree 2" x.domtree = construct_domtree(x.ir.cfg.blocks) + return @timeit "domtree 2" x.domtree = IsPostDom ? + construct_postdomtree(x.ir.cfg.blocks) : + construct_domtree(x.ir.cfg.blocks) end +const LazyDomtree = LazyGenericDomtree{false} +const LazyPostDomtree = LazyGenericDomtree{true} + function perform_lifting!(compact::IncrementalCompact, visited_phinodes::Vector{AnySSAValue}, @nospecialize(cache_key), lifting_cache::IdDict{Pair{AnySSAValue, Any}, AnySSAValue}, @@ -1051,7 +1056,7 @@ end # NOTE we resolve the inlining source here as we don't want to serialize `Core.Compiler` # data structure into the global cache (see the comment in `handle_finalizer_call!`) -function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int, mi::MethodInstance, inlining::InliningState) +function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int, mi::MethodInstance, inlining::InliningState, attach_after::Bool) code = get(inlining.mi_cache, mi, nothing) et = InliningEdgeTracker(inlining.et) if code isa CodeInstance @@ -1091,7 +1096,7 @@ function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int, mi:: ssa_rename[ssa.id] end stmt′ = ssa_substitute_op!(InsertBefore(ir, SSAValue(idx)), inst, stmt′, argexprs, mi.specTypes, mi.sparam_vals, sp_ssa, :default) - ssa_rename[idx′] = insert_node!(ir, idx, NewInstruction(stmt′, inst; line = inst[:line] + linetable_offset), true) + ssa_rename[idx′] = insert_node!(ir, idx, NewInstruction(stmt′, inst; line = inst[:line] + linetable_offset), attach_after) end return true @@ -1099,39 +1104,112 @@ end is_nothrow(ir::IRCode, pc::Int) = (ir.stmts[pc][:flag] & IR_FLAG_NOTHROW) ≠ 0 -function try_resolve_finalizer!(ir::IRCode, idx::Int, finalizer_idx::Int, defuse::SSADefUse, inlining::InliningState, info::Union{FinalizerInfo, Nothing}) - # For now: Require that all uses and defs are in the same basic block, - # so that live range calculations are easy. - bb = ir.cfg.blocks[block_for_inst(ir.cfg, first(defuse.uses).idx)] - minval::Int = typemax(Int) - maxval::Int = 0 +function reachable_blocks(cfg::CFG, from_bb, to_bb = nothing) + worklist = Int[] + visited = BitSet() + if to_bb !== nothing + push!(visited, to_bb) + end + function visit!(bb) + if !(bb in visited) + push!(visited, bb) + push!(worklist, bb) + end + end + visit!(from_bb) + while !isempty(worklist) + foreach(visit!, cfg.blocks[pop!(worklist)].succs) + end + return visited +end - function check_in_range(x::Union{Int,SSAUse}) - if isa(x, SSAUse) - didx = x.idx +function try_resolve_finalizer!(ir::IRCode, idx::Int, finalizer_idx::Int, defuse::SSADefUse, + inlining::InliningState, lazydomtree::LazyDomtree, + lazypostdomtree::LazyPostDomtree, info::Union{FinalizerInfo, Nothing}) + # For now, require that: + # 1. The allocation dominates the finalizer registration + # 2. The finalizer registration dominates all uses reachable from the + # finalizer registration. + # 3. The insertion block for the finalizer is the post-dominator of all + # uses and the finalizer registration block. The insertion block must + # be dominated by the finalizer registration block. + # 4. The path from the finalizer registration to the finalizer inlining + # location is nothrow + # + # TODO: We could relax item 3, by inlining the finalizer multiple times. + + # Check #1: The allocation dominates the finalizer registration + domtree = get!(lazydomtree) + finalizer_bb = block_for_inst(ir, finalizer_idx) + alloc_bb = block_for_inst(ir, idx) + dominates(domtree, alloc_bb, finalizer_bb) || return nothing + + bb_insert_block = finalizer_bb + bb_insert_idx = finalizer_idx + function note_block_use!(bb, idx) + new_bb_insert_block = nearest_common_dominator(get!(lazypostdomtree), bb_insert_block, bb) + bb_insert_idx = new_bb_insert_block == bb_insert_block ? max(idx, bb_insert_idx) : nothing + bb_insert_block = new_bb_insert_block + nothing + end + + # Collect all reachable blocks between the finalizer registration and the + # insertion point + blocks = reachable_blocks(ir.cfg, finalizer_bb, alloc_bb) + + # Check #2 + all_in_finalizer_bb::Bool = true + maxval::Int = 0 + function check_defuse(x::Union{Int,SSAUse}) + duidx = x isa SSAUse ? x.idx : x + duidx == finalizer_idx && return true + bb = block_for_inst(ir, duidx) + # Not reachable from finalizer registration - we're ok + bb ∉ blocks && return true + note_block_use!(bb, idx) + if dominates(domtree, finalizer_bb, bb) + return true else - didx = x - end - didx in bb.stmts || return false - if didx < minval - minval = didx + @show duidx + return false end - if didx > maxval - maxval = didx + end + all(check_defuse, defuse.uses) || return nothing + all(check_defuse, defuse.defs) || return nothing + + function check_range_nothrow(range) + all(finalizer_idx:maxval) do sidx::Int + r = is_nothrow(ir, idx) || sidx == finalizer_idx || sidx == idx + r || @show sidx + return r end - return true end - check_in_range(idx) || return nothing - all(check_in_range, defuse.uses) || return nothing - all(check_in_range, defuse.defs) || return nothing + # Check #3 + dominates(domtree, finalizer_bb, bb_insert_block) || return nothing + + # Collect all reachable blocks between the finalizer registration and the + # insertion point + blocks = reachable_blocks(ir.cfg, finalizer_bb, bb_insert_block) + + # Check #4 + for bb in blocks + range = ir.cfg.blocks[bb].stmts + if bb == bb_insert_block + bb_insert_idx === nothing && continue + range = first(range):bb_insert_idx + end + if bb == finalizer_bb + range = finalizer_idx:last(range) + end + check_range_nothrow(range) || return nothing + end - # For now: Require all statements in the basic block range to be nothrow. - all(minval:maxval) do sidx::Int - return is_nothrow(ir, idx) || sidx == finalizer_idx || sidx == idx - end || return nothing + # Ok, legality check complete. Figure out the exact statement where we're + # gonna inline the finalizer. + loc = bb_insert_idx === nothing ? first(ir.cfg.blocks[bb_insert_block].stmts) : bb_insert_idx + attach_after = bb_insert_idx !== nothing - # Ok, `finalizer` rewrite is legal. finalizer_stmt = ir[SSAValue(finalizer_idx)][:inst] argexprs = Any[finalizer_stmt.args[2], finalizer_stmt.args[3]] flags = info === nothing ? UInt8(0) : flags_for_effects(info.effects) @@ -1141,14 +1219,14 @@ function try_resolve_finalizer!(ir::IRCode, idx::Int, finalizer_idx::Int, defuse # No code in the function - Nothing to do else mi = finalizer_stmt.args[5]::MethodInstance - if inline::Bool && try_inline_finalizer!(ir, argexprs, maxval, mi, inlining) + if inline::Bool && try_inline_finalizer!(ir, argexprs, loc, mi, inlining, attach_after) # the finalizer body has been inlined else - insert_node!(ir, maxval, with_flags(NewInstruction(Expr(:invoke, mi, argexprs...), Nothing), flags), true) + insert_node!(ir, loc, with_flags(NewInstruction(Expr(:invoke, mi, argexprs...), Nothing), flags), attach_after) end end else - insert_node!(ir, maxval, with_flags(NewInstruction(Expr(:call, argexprs...), Nothing), flags), true) + insert_node!(ir, loc, with_flags(NewInstruction(Expr(:call, argexprs...), Nothing), flags), attach_after) end # Erase the call to `finalizer` ir[SSAValue(finalizer_idx)][:inst] = nothing @@ -1156,6 +1234,7 @@ function try_resolve_finalizer!(ir::IRCode, idx::Int, finalizer_idx::Int, defuse end function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree, inlining::Union{Nothing, InliningState}) + lazypostdomtree = LazyPostDomtree(ir) for (idx, (intermediaries, defuse)) in defuses intermediaries = collect(intermediaries) # Check if there are any uses we did not account for. If so, the variable @@ -1188,7 +1267,8 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse end end if finalizer_idx !== nothing && inlining !== nothing - try_resolve_finalizer!(ir, idx, finalizer_idx, defuse, inlining, ir[SSAValue(finalizer_idx)][:info]) + try_resolve_finalizer!(ir, idx, finalizer_idx, defuse, inlining, + lazydomtree, lazypostdomtree, ir[SSAValue(finalizer_idx)][:info]) continue end # Partition defuses by field diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl index a70bfff62d5eea..72e4b34331edda 100644 --- a/test/compiler/inline.jl +++ b/test/compiler/inline.jl @@ -1380,6 +1380,14 @@ mutable struct DoAllocWithField end end end +mutable struct DoAllocWithFieldInter + x::Int +end +function register_finalizer!(obj::DoAllocWithFieldInter) + finalizer(obj) do this + add_finalization_count!(this.x) + end +end function const_finalization(io) for i = 1:1000 @@ -1409,6 +1417,108 @@ let src = code_typed1(useless_finalizer, ()) @test length(src.code) == 2 end +# tests finalizer inlining when def/uses involve control flow +function cfg_finalization1(io) + for i = -999:1000 + o = DoAllocWithField(i) + if i == 1000 + safeprint(io, o.x, '\n') + elseif i > 0 + safeprint(io, o.x) + end + end +end +let src = code_typed1(cfg_finalization1, (IO,)) + @test count(isinvoke(:add_finalization_count!), src.code) == 1 +end +let + init_finalization_count!() + cfg_finalization1(IOBuffer()) + @test get_finalization_count() == 1000 +end + +function cfg_finalization2(io) + for i = -999:1000 + o = DoAllocWithField(1) + o.x = i # with `setfield!` + if i == 1000 + safeprint(io, o.x, '\n') + elseif i > 0 + safeprint(io, o.x) + end + end +end +let src = code_typed1(cfg_finalization2, (IO,)) + @test count(isinvoke(:add_finalization_count!), src.code) == 1 +end +let + init_finalization_count!() + cfg_finalization2(IOBuffer()) + @test get_finalization_count() == 2000 +end + +function cfg_finalization3(io) + for i = -999:1000 + o = DoAllocWithFieldInter(i) + register_finalizer!(o) + if i == 1000 + safeprint(io, o.x, '\n') + elseif i > 0 + safeprint(io, o.x) + end + end +end +let src = code_typed1(cfg_finalization3, (IO,)) + @test count(isinvoke(:add_finalization_count!), src.code) == 1 +end +let + init_finalization_count!() + cfg_finalization3(IOBuffer()) + @test get_finalization_count() == 1000 +end + +function cfg_finalization4(io) + for i = -999:1000 + o = DoAllocWithFieldInter(1) + o.x = i # with `setfield!` + register_finalizer!(o) + if i == 1000 + safeprint(io, o.x, '\n') + elseif i > 0 + safeprint(io, o.x) + end + end +end +let src = code_typed1(cfg_finalization4, (IO,)) + @test count(isinvoke(:add_finalization_count!), src.code) == 1 +end +let + init_finalization_count!() + cfg_finalization4(IOBuffer()) + @test get_finalization_count() == 1000 +end + +function cfg_finalization5(io) + for i = -999:1000 + o = DoAllocWithFieldInter(i) + if i == 1000 + safeprint(io, o.x, '\n') + elseif i > 0 + safeprint(io, o.x) + end + register_finalizer!(o) + end +end +let src = code_typed1(cfg_finalization5, (IO,)) + # TODO we can fix this case by checking a case when a finalizer block is post-dominated by all the def/uses + @test count(isinvoke(:add_finalization_count!), src.code) == 1 +end +let + init_finalization_count!() + cfg_finalization5(IOBuffer()) + @test get_finalization_count() == 1000 +end + # optimize `[push!|pushfirst!](::Vector{Any}, x...)` @testset "optimize `$f(::Vector{Any}, x...)`" for f = Any[push!, pushfirst!] @eval begin diff --git a/test/compiler/irutils.jl b/test/compiler/irutils.jl index b44a656ea7b341..76f883d6cea2c3 100644 --- a/test/compiler/irutils.jl +++ b/test/compiler/irutils.jl @@ -17,7 +17,12 @@ function iscall((src, f)::Tuple{IR,Base.Callable}, @nospecialize(x)) where IR<:U singleton_type(argextype(x, src)) === f end end -iscall(pred::Base.Callable, @nospecialize(x)) = isexpr(x, :call) && pred(x.args[1]) +function iscall(pred::Base.Callable, @nospecialize(x)) + if isexpr(x, :(=)) + x = x.args[2] + end + return isexpr(x, :call) && pred(x.args[1]) +end # check if `x` is a statically-resolved call of a function whose name is `sym` isinvoke(y) = @nospecialize(x) -> isinvoke(y, x) diff --git a/test/compiler/ssair.jl b/test/compiler/ssair.jl index 1acd490a472954..2474c5994b52e7 100644 --- a/test/compiler/ssair.jl +++ b/test/compiler/ssair.jl @@ -69,8 +69,10 @@ let cfg = CFG(BasicBlock[ ], Int[]) dfs = Compiler.DFS(cfg.blocks) @test dfs.from_pre[dfs.to_parent_pre[dfs.to_pre[5]]] == 4 - let correct_idoms = Compiler.naive_idoms(cfg.blocks) + let correct_idoms = Compiler.naive_idoms(cfg.blocks), + correct_pidoms = Compiler.naive_idoms(cfg.blocks, true) @test Compiler.construct_domtree(cfg.blocks).idoms_bb == correct_idoms + @test Compiler.construct_postdomtree(cfg.blocks).idoms_bb == correct_pidoms # For completeness, reverse the order of pred/succ in the CFG and verify # the answer doesn't change (it does change the which node is chosen # as the semi-dominator, since it changes the DFS numbering). @@ -82,6 +84,7 @@ let cfg = CFG(BasicBlock[ d && (blocks[5] = make_bb(reverse(blocks[5].preds), blocks[5].succs)) cfg′ = CFG(blocks, cfg.index) @test Compiler.construct_domtree(cfg′.blocks).idoms_bb == correct_idoms + @test Compiler.construct_postdomtree(cfg′.blocks).idoms_bb == correct_pidoms end end end