Skip to content

Commit

Permalink
wip: inlining: relax finalizer inlining control-flow restriction
Browse files Browse the repository at this point in the history
Eager `finalizer` inlining (#45272) currently has a restriction that
requires all the def/uses to be in a same basic block.

This commit relaxes that restriction a bit by allowing def/uses to
involve control flow when all of them are dominated by a `finalizer`
call to be inlined, since in that case it is safe to insert the body of
`finalizer` at the end of all the def/uses, e.g.
```julia
const FINALIZATION_COUNT = Ref(0)
init_finalization_count!() = FINALIZATION_COUNT[] = 0
get_finalization_count() = FINALIZATION_COUNT[]
@noinline add_finalization_count!(x) = FINALIZATION_COUNT[] += x
@noinline Base.@assume_effects :nothrow safeprint(io::IO, x...) = (@nospecialize; print(io, x...))
mutable struct DoAllocWithFieldInter
    x::Int
end
function register_finalizer!(obj::DoAllocWithFieldInter)
    finalizer(obj) do this
        add_finalization_count!(this.x)
    end
end

function cfg_finalization3(io)
    for i = -999:1000
        o = DoAllocWithFieldInter(i)
        register_finalizer!(o)
        if i == 1000
            safeprint(io, o.x, '\n')
        elseif i > 0
            safeprint(io, o.x)
        end
    end
end
let src = code_typed1(cfg_finalization3, (IO,))
    @test count(isinvoke(:add_finalization_count!), src.code) == 1
end
let
    init_finalization_count!()
    cfg_finalization3(IOBuffer())
    @test get_finalization_count() == 1000
end
```

To support this transformation, the domtree code also gains the ability
to represent post-dominator trees, which is generally useful.

Co-authored-by: Keno Fischer <keno@juliacomputing.com>
  • Loading branch information
aviatesk and Keno committed Sep 21, 2022
1 parent 67f994c commit 6d143ca
Show file tree
Hide file tree
Showing 5 changed files with 340 additions and 71 deletions.
137 changes: 104 additions & 33 deletions base/compiler/ssair/domtree.jl
Original file line number Diff line number Diff line change
Expand Up @@ -109,10 +109,16 @@ end

length(D::DFSTree) = length(D.from_pre)

function DFS!(D::DFSTree, blocks::Vector{BasicBlock})
function DFS!(D::DFSTree, blocks::Vector{BasicBlock}, is_post_dominator::Bool)
copy!(D, DFSTree(length(blocks)))
to_visit = Tuple{BBNumber, PreNumber, Bool}[(1, 0, false)]
pre_num = 1
if is_post_dominator
# TODO: We're using -1 as the virtual exit node here. Would it make
# sense to actually have a real BB for the exit always?
to_visit = Tuple{BBNumber, PreNumber, Bool}[(-1, 0, false)]
else
to_visit = Tuple{BBNumber, PreNumber, Bool}[(1, 0, false)]
end
pre_num = is_post_dominator ? 0 : 1
post_num = 1
while !isempty(to_visit)
# Because we want the postorder number as well as the preorder number,
Expand All @@ -123,28 +129,39 @@ function DFS!(D::DFSTree, blocks::Vector{BasicBlock})
if pushed_children
# Going up the DFS tree, so all we need to do is record the
# postorder number, then move on
D.to_post[current_node_bb] = post_num
D.from_post[post_num] = current_node_bb
if current_node_bb != -1
D.to_post[current_node_bb] = post_num
D.from_post[post_num] = current_node_bb
end
post_num += 1
pop!(to_visit)

elseif D.to_pre[current_node_bb] != 0
elseif current_node_bb != -1 && D.to_pre[current_node_bb] != 0
# Node has already been visited, move on
pop!(to_visit)
continue
else
# Going down the DFS tree

# Record preorder number
D.to_pre[current_node_bb] = pre_num
D.from_pre[pre_num] = current_node_bb
D.to_parent_pre[pre_num] = parent_pre
if current_node_bb != -1
D.to_pre[current_node_bb] = pre_num
D.from_pre[pre_num] = current_node_bb
D.to_parent_pre[pre_num] = parent_pre
end

# Record that children (will) have been pushed
to_visit[end] = (current_node_bb, parent_pre, true)

if is_post_dominator && current_node_bb == -1
edges = Int[bb for bb in 1:length(blocks) if isempty(blocks[bb].succs)]
else
edges = is_post_dominator ? blocks[current_node_bb].preds :
blocks[current_node_bb].succs
end

# Push children to the stack
for succ_bb in blocks[current_node_bb].succs
for succ_bb in edges
push!(to_visit, (succ_bb, pre_num, false))
end

Expand All @@ -161,7 +178,7 @@ function DFS!(D::DFSTree, blocks::Vector{BasicBlock})
return D
end

DFS(blocks::Vector{BasicBlock}) = DFS!(DFSTree(0), blocks)
DFS(blocks::Vector{BasicBlock}, is_post_dominator::Bool=false) = DFS!(DFSTree(0), blocks, is_post_dominator)

"""
Keeps the per-BB state of the Semi NCA algorithm. In the original formulation,
Expand All @@ -184,7 +201,7 @@ end
DomTreeNode() = DomTreeNode(1, Vector{BBNumber}())

"Data structure that encodes which basic block dominates which."
struct DomTree
struct GenericDomTree{IsPostDom}
# These can be reused when updating domtree dynamically
dfs_tree::DFSTree
snca_state::Vector{SNCAData}
Expand All @@ -195,19 +212,25 @@ struct DomTree
# The nodes in the tree (ordered by BB indices)
nodes::Vector{DomTreeNode}
end
const DomTree = GenericDomTree{false}
const PostDomTree = GenericDomTree{true}

function DomTree()
return DomTree(DFSTree(0), SNCAData[], BBNumber[], DomTreeNode[])
function (T::Type{<:GenericDomTree})()
return T(DFSTree(0), SNCAData[], BBNumber[], DomTreeNode[])
end

function construct_domtree(blocks::Vector{BasicBlock})
return update_domtree!(blocks, DomTree(), true, 0)
end

function update_domtree!(blocks::Vector{BasicBlock}, domtree::DomTree,
recompute_dfs::Bool, max_pre::PreNumber)
function construct_postdomtree(blocks::Vector{BasicBlock})
return update_domtree!(blocks, PostDomTree(), true, 0)
end

function update_domtree!(blocks::Vector{BasicBlock}, domtree::GenericDomTree{IsPostDom},
recompute_dfs::Bool, max_pre::PreNumber) where {IsPostDom}
if recompute_dfs
DFS!(domtree.dfs_tree, blocks)
DFS!(domtree.dfs_tree, blocks, IsPostDom)
end

if max_pre == 0
Expand All @@ -219,17 +242,24 @@ function update_domtree!(blocks::Vector{BasicBlock}, domtree::DomTree,
return domtree
end

function compute_domtree_nodes!(domtree::DomTree)
function compute_domtree_nodes!(domtree::GenericDomTree{IsPostDom}) where {IsPostDom}
# Compute children
copy!(domtree.nodes,
DomTreeNode[DomTreeNode() for _ in 1:length(domtree.idoms_bb)])
for (idx, idom) in Iterators.enumerate(domtree.idoms_bb)
(idx == 1 || idom == 0) && continue
((!IsPostDom && idx == 1) || idom == 0) && continue
push!(domtree.nodes[idom].children, idx)
end
# n.b. now issorted(domtree.nodes[*].children) since idx is sorted above
# Recursively set level
update_level!(domtree.nodes, 1, 1)
if IsPostDom
for (node, idom) in enumerate(domtree.idoms_bb)
idom == 0 || continue
update_level!(domtree.nodes, node, 1)
end
else
update_level!(domtree.nodes, 1, 1)
end
return domtree.nodes
end

Expand All @@ -244,13 +274,18 @@ function update_level!(nodes::Vector{DomTreeNode}, node::BBNumber, level::Int)
end
end

dom_edges(domtree::DomTree, blocks::Vector{BasicBlock}, idx::BBNumber) =
blocks[idx].preds
dom_edges(domtree::PostDomTree, blocks::Vector{BasicBlock}, idx::BBNumber) =
blocks[idx].succs

"""
The main Semi-NCA algorithm. Matches Figure 2.8 in [LG05]. Note that the
pseudocode in [LG05] is not entirely accurate. The best way to understand
what's happening is to read [LT79], then the description of SLT in [LG05]
(warning: inconsistent notation), then the description of Semi-NCA.
"""
function SNCA!(domtree::DomTree, blocks::Vector{BasicBlock}, max_pre::PreNumber)
function SNCA!(domtree::GenericDomTree{IsPostDom}, blocks::Vector{BasicBlock}, max_pre::PreNumber) where {IsPostDom}
D = domtree.dfs_tree
state = domtree.snca_state
# There may be more blocks than are reachable in the DFS / dominator tree
Expand Down Expand Up @@ -289,13 +324,14 @@ function SNCA!(domtree::DomTree, blocks::Vector{BasicBlock}, max_pre::PreNumber)
# Calculate semidominators, but only for blocks with preorder number up to
# max_pre
ancestors = copy(D.to_parent_pre)
for w::PreNumber in reverse(2:max_pre)
relevant_blocks = IsPostDom ? (1:max_pre) : (2:max_pre)
for w::PreNumber in reverse(relevant_blocks)
# LLVM initializes this to the parent, the paper initializes this to
# `w`, but it doesn't really matter (the parent is a predecessor, so at
# worst we'll discover it below). Save a memory reference here.
semi_w = typemax(PreNumber)
last_linked = PreNumber(w + 1)
for v blocks[D.from_pre[w]].preds
for v dom_edges(domtree, blocks, D.from_pre[w])
# For the purpose of the domtree, ignore virtual predecessors into
# catch blocks.
v == 0 && continue
Expand Down Expand Up @@ -331,7 +367,7 @@ function SNCA!(domtree::DomTree, blocks::Vector{BasicBlock}, max_pre::PreNumber)
# ancestor in the (immediate) dominator tree between its semidominator and
# its parent (see Lemma 2.6 in [LG05]).
idoms_pre = copy(D.to_parent_pre)
for v in 2:n_nodes
for v in (IsPostDom ? (1:n_nodes) : (2:n_nodes))
idom = idoms_pre[v]
vsemi = state[v].semi
while idom > vsemi
Expand All @@ -343,10 +379,11 @@ function SNCA!(domtree::DomTree, blocks::Vector{BasicBlock}, max_pre::PreNumber)
# Express idoms in BB indexing
resize!(domtree.idoms_bb, n_blocks)
for i::BBNumber in 1:n_blocks
if i == 1 || D.to_pre[i] == 0
if (!IsPostDom && i == 1) || D.to_pre[i] == 0
domtree.idoms_bb[i] = 0
else
domtree.idoms_bb[i] = D.from_pre[idoms_pre[D.to_pre[i]]]
ip = idoms_pre[D.to_pre[i]]
domtree.idoms_bb[i] = ip == 0 ? 0 : D.from_pre[ip]
end
end
end
Expand Down Expand Up @@ -549,7 +586,12 @@ Checks if `bb1` dominates `bb2`.
`bb1` dominates `bb2` if the only way to enter `bb2` is via `bb1`.
(Other blocks may be in between, e.g `bb1->bbx->bb2`).
"""
function dominates(domtree::DomTree, bb1::BBNumber, bb2::BBNumber)
dominates(domtree::DomTree, bb1::BBNumber, bb2::BBNumber) =
_dominates(domtree, bb1, bb2)
postdominates(domtree::PostDomTree, bb1::BBNumber, bb2::BBNumber) =
_dominates(domtree, bb1, bb2)

function _dominates(domtree::GenericDomTree, bb1::BBNumber, bb2::BBNumber)
bb1 == bb2 && return true
target_level = domtree.nodes[bb1].level
source_level = domtree.nodes[bb2].level
Expand Down Expand Up @@ -584,19 +626,48 @@ function iterate(doms::DominatedBlocks, state::Nothing=nothing)
return (bb, nothing)
end

function naive_idoms(blocks::Vector{BasicBlock})
"""
nearest_common_dominator(domtree::GenericDomTree, a::BBNumber, b::BBNumber)
Compute the nearest common (post-)dominator of `a` and `b`.
"""
function nearest_common_dominator(domtree::GenericDomTree, a::BBNumber, b::BBNumber)
alevel = domtree.nodes[a].level
blevel = domtree.nodes[b].level
# W.l.g. assume blevel <= alevel
if alevel < blevel
a, b = b, a
alevel, blevel = blevel, alevel
end
while alevel > blevel
a = domtree.idoms_bb[a]
alevel -= 1
end
while a != b && a != 0
a = domtree.idoms_bb[a]
b = domtree.idoms_bb[b]
end
@assert a == b
return a
end

function naive_idoms(blocks::Vector{BasicBlock}, is_post_dominator::Bool=false)
nblocks = length(blocks)
# The extra +1 helps us detect unreachable blocks below
dom_all = BitSet(1:nblocks+1)
dominators = BitSet[n == 1 ? BitSet(1) : copy(dom_all) for n = 1:nblocks]
dominators = is_post_dominator ?
BitSet[isempty(blocks[n].succs) ? BitSet(n) : copy(dom_all) for n = 1:nblocks] :
BitSet[n == 1 ? BitSet(1) : copy(dom_all) for n = 1:nblocks]
changed = true
relevant_blocks = (is_post_dominator ? (1:nblocks) : (2:nblocks))
while changed
changed = false
for n = 2:nblocks
if isempty(blocks[n].preds)
for n in relevant_blocks
edges = is_post_dominator ? blocks[n].succs : blocks[n].preds
if isempty(edges)
continue
end
firstp, rest = Iterators.peel(Iterators.filter(p->p != 0, blocks[n].preds))::NTuple{2,Any}
firstp, rest = Iterators.peel(Iterators.filter(p->p != 0, edges))::NTuple{2,Any}
new_doms = copy(dominators[firstp])
for p in rest
intersect!(new_doms, dominators[p])
Expand All @@ -608,7 +679,7 @@ function naive_idoms(blocks::Vector{BasicBlock})
end
# Compute idoms
idoms = fill(0, nblocks)
for i = 2:nblocks
for i in relevant_blocks
if dominators[i] == dom_all
idoms[i] = 0
continue
Expand Down
Loading

0 comments on commit 6d143ca

Please sign in to comment.