From 3334f7fd54c31cf92dda2298e90f6db5da4bc3e7 Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Wed, 1 Mar 2017 16:49:44 -0500 Subject: [PATCH 1/6] improve ability to inline away `typeassert` and `===` this was affected by using `Const` in more cases instead of `Type{}` --- base/inference.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/base/inference.jl b/base/inference.jl index ca2ca0b8c5251..8b2dba8b85bd6 100644 --- a/base/inference.jl +++ b/base/inference.jl @@ -3467,8 +3467,7 @@ struct InvokeData texpr end -function inline_as_constant(val::ANY, argexprs, sv::InferenceState, - invoke_data::ANY) +function inline_as_constant(val::ANY, argexprs, sv::InferenceState, invoke_data::ANY) if invoke_data === nothing invoke_fexpr = nothing invoke_texpr = nothing @@ -3661,8 +3660,9 @@ function inlineable(f::ANY, ft::ANY, e::Expr, atypes::Vector{Any}, sv::Inference if (f === typeassert || ft ⊑ typeof(typeassert)) && length(atypes)==3 # typeassert(x::S, T) => x, when S<:T - if isType(atypes[3]) && isleaftype(atypes[3]) && - atypes[2] ⊑ atypes[3].parameters[1] + a3 = atypes[3] + if (isType(a3) && isleaftype(a3) && atypes[2] ⊑ a3.parameters[1]) || + (isa(a3,Const) && isa(a3.val,Type) && atypes[2] ⊑ a3.val) return (argexprs[2], ()) end end @@ -3670,7 +3670,7 @@ function inlineable(f::ANY, ft::ANY, e::Expr, atypes::Vector{Any}, sv::Inference # special-case inliners for known pure functions that compute types if sv.params.inlining if isa(e.typ, Const) # || isconstType(e.typ) - if (f === apply_type || f === fieldtype || f === typeof || + if (f === apply_type || f === fieldtype || f === typeof || f === (===) || istopfunction(topmod, f, :typejoin) || istopfunction(topmod, f, :isbits) || istopfunction(topmod, f, :promote_type) || From e8c12ade3b3a2f3d43c3c8f6e96f4a33d012f7f1 Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Wed, 1 Mar 2017 16:52:56 -0500 Subject: [PATCH 2/6] remove effect-free statements in `void_use_elim_pass!` --- base/inference.jl | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/base/inference.jl b/base/inference.jl index 8b2dba8b85bd6..8580b63d03a26 100644 --- a/base/inference.jl +++ b/base/inference.jl @@ -2114,7 +2114,7 @@ issubstate(a::VarState, b::VarState) = (a.typ ⊑ b.typ && a.undef <= b.undef) # in a dead branch but can be ignored when analyzing uses/liveness. is_meta_expr_head(head::Symbol) = (head === :inbounds || head === :boundscheck || head === :meta || - head === :line) + head === :line || head === :simdloop) is_meta_expr(ex::Expr) = is_meta_expr_head(ex.head) function tmerge(typea::ANY, typeb::ANY) @@ -3359,7 +3359,8 @@ function is_pure_builtin(f::ANY) f === Intrinsics.checked_srem_int || f === Intrinsics.checked_urem_int || f === Intrinsics.check_top_bit || - f === Intrinsics.sqrt_llvm) + f === Intrinsics.sqrt_llvm || + f === Intrinsics.cglobal) # cglobal throws an error for symbol-not-found return true end end @@ -3390,12 +3391,17 @@ function effect_free(e::ANY, src::CodeInfo, mod::Module, allow_volatile::Bool) return (isdefined(e.mod, e.name) && (allow_volatile || isconst(e.mod, e.name))) elseif isa(e, Symbol) return allow_volatile + elseif isa(e, Slot) + return src.slotflags[slot_id(e)] & Slot_UsedUndef == 0 elseif isa(e, Expr) e = e::Expr head = e.head if head === :static_parameter || is_meta_expr_head(head) return true end + if e.typ === Bottom + return false + end ea = e.args if head === :call if is_known_call_p(e, is_pure_builtin, src, mod) @@ -4646,7 +4652,13 @@ function void_use_elim_pass!(sv::InferenceState) elseif isa(ex, GlobalRef) ex = ex::GlobalRef return !isdefined(ex.mod, ex.name) - elseif (isa(ex, Expr) || isa(ex, GotoNode) || isa(ex, LineNumberNode) || + elseif isa(ex, Expr) + h = ex.head + if h === :return || h === :(=) || h === :gotoifnot || is_meta_expr_head(h) + return true + end + return !effect_free(ex, sv.src, sv.mod, false) + elseif (isa(ex, GotoNode) || isa(ex, LineNumberNode) || isa(ex, NewvarNode) || isa(ex, Symbol) || isa(ex, LabelNode)) # This is a list of special type handled by the compiler return true From 6d6218fa74f6b4b80956df6e1da853f98bd31a15 Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Wed, 1 Mar 2017 16:55:32 -0500 Subject: [PATCH 3/6] add a pass to remove variables whose values are never read --- base/inference.jl | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/base/inference.jl b/base/inference.jl index 8580b63d03a26..a9a81279d993c 100644 --- a/base/inference.jl +++ b/base/inference.jl @@ -4505,6 +4505,37 @@ function is_known_call_p(e::Expr, pred::ANY, src::CodeInfo, mod::Module) return (isa(f, Const) && pred(f.val)) || (isType(f) && pred(f.parameters[1])) end +function record_used(e::ANY, T::ANY, used::Vector{Bool}) + if isa(e,T) + used[e.id+1] = true + elseif isa(e,Expr) + i0 = e.head === :(=) ? 2 : 1 + for i = i0:length(e.args) + record_used(e.args[i], T, used) + end + end +end + +function remove_unused_vars!(src::CodeInfo) + used = fill(false, length(src.slotnames)+1) + used_ssa = fill(false, length(src.ssavaluetypes)+1) + for i = 1:length(src.code) + record_used(src.code[i], Slot, used) + record_used(src.code[i], SSAValue, used_ssa) + end + for i = 1:length(src.code) + e = src.code[i] + if isa(e,NewvarNode) && !used[e.slot.id+1] + src.code[i] = nothing + elseif isa(e,Expr) && e.head === :(=) + if (isa(e.args[1],Slot) && !used[e.args[1].id+1]) || + (isa(e.args[1],SSAValue) && !used_ssa[e.args[1].id+1]) + src.code[i] = e.args[2] + end + end + end +end + function delete_var!(src::CodeInfo, id, T) filter!(x->!(isa(x,Expr) && (x.head === :(=) || x.head === :const) && isa(x.args[1],T) && x.args[1].id == id), @@ -5038,6 +5069,7 @@ function alloc_elim_pass!(sv::InferenceState) vs, gs = find_sa_vars(sv.src, sv.nargs) remove_redundant_temp_vars(sv.src, sv.nargs, vs, Slot) remove_redundant_temp_vars(sv.src, sv.nargs, gs, SSAValue) + remove_unused_vars!(sv.src) i = 1 while i < length(body) e = body[i] From c703c4dd63621e51389ea6e4d0a5ef90dcf7ee57 Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Wed, 1 Mar 2017 17:00:14 -0500 Subject: [PATCH 4/6] overhaul `remove_redundant_temp_vars` pass to remove more variables --- base/inference.jl | 119 +++++++++++++++++++++++++++++----------------- 1 file changed, 75 insertions(+), 44 deletions(-) diff --git a/base/inference.jl b/base/inference.jl index a9a81279d993c..ad52c02a2efcb 100644 --- a/base/inference.jl +++ b/base/inference.jl @@ -4536,62 +4536,90 @@ function remove_unused_vars!(src::CodeInfo) end end -function delete_var!(src::CodeInfo, id, T) +var_matches(a::ANY, b::ANY) = false +var_matches(a::SSAValue, b::SSAValue) = a.id == b.id +var_matches(a::Slot, b::Slot) = a.id == b.id + +function delete_var!(src::CodeInfo, v::Union{Slot,SSAValue}) filter!(x->!(isa(x,Expr) && (x.head === :(=) || x.head === :const) && - isa(x.args[1],T) && x.args[1].id == id), + var_matches(x.args[1], v)), src.code) return src end -function slot_replace!(src::CodeInfo, id::Int, rhs::ANY, T::ANY) +function slot_replace!(src::CodeInfo, v::Union{Slot,SSAValue}, rhs::ANY) for i = 1:length(src.code) - src.code[i] = _slot_replace!(src.code[i], id, rhs, T) + src.code[i] = _slot_replace!(src.code[i], v, rhs) end return src end -function _slot_replace!(e, id::Int, rhs::ANY, T::ANY) - if isa(e,T) && e.id == id +function _slot_replace!(e::ANY, v::Union{Slot,SSAValue}, rhs::ANY) + if var_matches(e, v) return rhs end if isa(e,Expr) for i = 1:length(e.args) - e.args[i] = _slot_replace!(e.args[i], id, rhs, T) + e.args[i] = _slot_replace!(e.args[i], v, rhs) end end return e end -occurs_undef(var::Int, expr, flags) = - flags[var] & Slot_UsedUndef != 0 && occurs_more(expr, e -> (isa(e, Slot) && slot_id(e) == var), 0) > 0 - is_argument(nargs::Int, v::Slot) = slot_id(v) <= nargs +normslot(s::SlotNumber) = s +normslot(s::TypedSlot) = SlotNumber(slot_id(s)) + +# given a single-assigned var and its initializer `init`, return what we can +# replace `var` with, or `var` itself if we shouldn't replace it +function get_replacement(table, var::Union{SlotNumber, SSAValue}, init::ANY, nargs, slottypes, ssavaluetypes) + #if isa(init, QuoteNode) # this can cause slight code size increases + # return init + if isa(init, Expr) && init.head === :static_parameter + return init + elseif isa(init, Slot) && is_argument(nargs, init::Slot) + # the transformation is not ideal if the assignment + # is present for the auto-unbox functionality + # (from inlining improved type inference information) + # and this transformation would worsen the type information + # everywhere later in the function + ityp = isa(init, TypedSlot) ? init.typ : slottypes[(init::SlotNumber).id] + if ityp ⊑ (isa(var,SSAValue) ? ssavaluetypes[var.id + 1] : slottypes[var.id]) + return init + end + elseif isa(init, SSAValue) + if haskey(table, init) + return get_replacement(table, init, table[init], nargs, slottypes, ssavaluetypes) + end + return init + elseif isa(init, SlotNumber) && haskey(table, init) + return get_replacement(table, init, table[init], nargs, slottypes, ssavaluetypes) + elseif isa(init, TypedSlot) + sl = normslot(init) + if haskey(table, sl) + rep = get_replacement(table, sl, table[sl], nargs, slottypes, ssavaluetypes) + if isa(rep, SlotNumber) + rep = TypedSlot(rep.id, init.typ) + end + return rep + end + end + return var +end + # remove all single-assigned vars v in "v = x" where x is an argument. # "sa" is the result of find_sa_vars -# T: Slot or SSAValue -function remove_redundant_temp_vars(src::CodeInfo, nargs::Int, sa, T) +function remove_redundant_temp_vars!(src::CodeInfo, nargs::Int, sa) flags = src.slotflags - ssavalue_types = src.ssavaluetypes - bexpr = Expr(:block) - bexpr.args = src.code + slottypes = src.slottypes + ssavaluetypes = src.ssavaluetypes for (v, init) in sa - if isa(init, Slot) && is_argument(nargs, init::Slot) - # this transformation is not valid for vars used before def. - # we need to preserve the point of assignment to know where to - # throw errors (issue #4645). - if T === SSAValue || !occurs_undef(v, bexpr, flags) - # the transformation is not ideal if the assignment - # is present for the auto-unbox functionality - # (from inlining improved type inference information) - # and this transformation would worsen the type information - # everywhere later in the function - ityp = isa(init, TypedSlot) ? init.typ : src.slottypes[(init::SlotNumber).id] - if ityp ⊑ (T === SSAValue ? ssavalue_types[v + 1] : src.slottypes[v]) - delete_var!(src, v, T) - slot_replace!(src, v, init, T) - end - end + repl = get_replacement(sa, v, init, nargs, slottypes, ssavaluetypes) + compare = isa(repl,TypedSlot) ? normslot(repl) : repl + if compare !== v + delete_var!(src, v) + slot_replace!(src, v, repl) end end return src @@ -4602,27 +4630,31 @@ function find_sa_vars(src::CodeInfo, nargs::Int) body = src.code av = ObjectIdDict() av2 = ObjectIdDict() - gss = ObjectIdDict() for i = 1:length(body) e = body[i] if isa(e,Expr) && e.head === :(=) lhs = e.args[1] if isa(lhs, SSAValue) - gss[lhs.id] = e.args[2] + av[lhs] = e.args[2] elseif isa(lhs, Slot) - id = slot_id(lhs) - if id > nargs # exclude args - if !haskey(av, id) - av[id] = e.args[2] + lhs = normslot(lhs) + id = lhs.id + # exclude args and used undef vars + # this transformation is not valid for vars used before def. + # we need to preserve the point of assignment to know where to + # throw errors (issue #4645). + if id > nargs && (src.slotflags[id] & Slot_UsedUndef == 0) + if !haskey(av, lhs) + av[lhs] = e.args[2] else - av2[id] = true + av2[lhs] = true end end end end end - filter!((id, _) -> !haskey(av2, id), av) - return (av, gss) + filter!((v, _) -> !haskey(av2, v), av) + return av end symequal(x::SSAValue, y::SSAValue) = x.id === y.id @@ -5066,9 +5098,8 @@ function alloc_elim_pass!(sv::InferenceState) body = sv.src.code bexpr = Expr(:block) bexpr.args = body - vs, gs = find_sa_vars(sv.src, sv.nargs) - remove_redundant_temp_vars(sv.src, sv.nargs, vs, Slot) - remove_redundant_temp_vars(sv.src, sv.nargs, gs, SSAValue) + vs = find_sa_vars(sv.src, sv.nargs) + remove_redundant_temp_vars!(sv.src, sv.nargs, vs) remove_unused_vars!(sv.src) i = 1 while i < length(body) @@ -5079,7 +5110,7 @@ function alloc_elim_pass!(sv::InferenceState) end e = e::Expr if e.head === :(=) && (isa(e.args[1], SSAValue) || - (isa(e.args[1], Slot) && haskey(vs, slot_id(e.args[1])))) + (isa(e.args[1], Slot) && haskey(vs, normslot(e.args[1])))) var = e.args[1] rhs = e.args[2] # Need to make sure LLVM can recognize this as LLVM ssa value too From 763f36c6780f74a81196a50d7a79b0f236e22cef Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Wed, 8 Mar 2017 01:49:32 -0500 Subject: [PATCH 5/6] add an inlining heuristic that helps avoid allocations don't inline into a function `f` if doing so would put it over the inlining threshhold, and if inlining `f` itself would help avoid tuple allocations. so far this is only used on `promote`, to limit the effects as much as possible. --- base/inference.jl | 154 +++++++++++++++++++++++++--------------------- 1 file changed, 85 insertions(+), 69 deletions(-) diff --git a/base/inference.jl b/base/inference.jl index ad52c02a2efcb..298a84d679dab 100644 --- a/base/inference.jl +++ b/base/inference.jl @@ -2875,9 +2875,7 @@ function isinlineable(m::Method, src::CodeInfo) end end if !inlineable - body = Expr(:block) - body.args = src.code - inlineable = inline_worthy(body, cost) + inlineable = inline_worthy_stmts(src.code, cost) end return inlineable end @@ -3661,7 +3659,10 @@ end # static parameters are ok if all the static parameter values are leaf types, # meaning they are fully known. # `ft` is the type of the function. `f` is the exact function if known, or else `nothing`. -function inlineable(f::ANY, ft::ANY, e::Expr, atypes::Vector{Any}, sv::InferenceState) +# `pending_stmts` is an array of statements from functions inlined so far, so +# we can estimate the total size of the enclosing function after inlining. +function inlineable(f::ANY, ft::ANY, e::Expr, atypes::Vector{Any}, sv::InferenceState, + pending_stmts) argexprs = e.args if (f === typeassert || ft ⊑ typeof(typeassert)) && length(atypes)==3 @@ -3932,6 +3933,34 @@ function inlineable(f::ANY, ft::ANY, e::Expr, atypes::Vector{Any}, sv::Inference invoke_data) end + if !isa(ast, Array{Any,1}) + ast = ccall(:jl_uncompress_ast, Any, (Any, Any), method, ast) + else + ast = copy_exprargs(ast) + end + ast = ast::Array{Any,1} + + # `promote` is a tuple-returning function that is very important to inline + if isdefined(Main, :Base) && isdefined(Main.Base, :promote) && + length(sv.src.slottypes) > 0 && sv.src.slottypes[1] ⊑ typeof(getfield(Main.Base, :promote)) + # check for non-isbits Tuple return + if sv.bestguess ⊑ Tuple && !isbits(widenconst(sv.bestguess)) + # See if inlining this call would change the enclosing function + # from inlineable to not inlineable. + # This heuristic is applied to functions that return non-bits + # tuples, since we want to be able to inline those functions to + # avoid the tuple allocation. + current_stmts = vcat(sv.src.code, pending_stmts) + if inline_worthy_stmts(current_stmts) + append!(current_stmts, ast) + if !inline_worthy_stmts(current_stmts) + return invoke_NF(argexprs0, e.typ, atypes, sv, atype_unlimited, + invoke_data) + end + end + end + end + # create the backedge if isa(frame, InferenceState) && !frame.inferred && frame.cached # in this case, the actual backedge linfo hasn't been computed @@ -3954,13 +3983,6 @@ function inlineable(f::ANY, ft::ANY, e::Expr, atypes::Vector{Any}, sv::Inference nm = length(unwrap_unionall(metharg).parameters) - if !isa(ast, Array{Any,1}) - ast = ccall(:jl_uncompress_ast, Any, (Any, Any), method, ast) - else - ast = copy_exprargs(ast) - end - ast = ast::Array{Any,1} - body = Expr(:block) body.args = ast propagate_inbounds = src.propagate_inbounds @@ -4187,10 +4209,13 @@ function inline_ignore(ex::ANY) return isa(ex, Expr) && is_meta_expr(ex::Expr) end +function inline_worthy_stmts(stmts::Vector{Any}, cost::Integer = 1000) + body = Expr(:block) + body.args = stmts + return inline_worthy(body, cost) +end + function inline_worthy(body::Expr, cost::Integer=1000) # precondition: 0 < cost; nominal cost = 1000 - if popmeta!(body, :noinline)[1] - return false - end symlim = 1000 + 5_000_000 ÷ cost nstmt = 0 for stmt in body.args @@ -4238,17 +4263,15 @@ end function inlining_pass!(sv::InferenceState) eargs = sv.src.code i = 1 + stmtbuf = [] while i <= length(eargs) ei = eargs[i] if isa(ei, Expr) - res = inlining_pass(ei, sv) - eargs[i] = res[1] - if isa(res[2], Array) - sts = res[2]::Array{Any,1} - for j = 1:length(sts) - insert!(eargs, i, sts[j]) - i += 1 - end + eargs[i] = inlining_pass(ei, sv, stmtbuf, 1) + if !isempty(stmtbuf) + splice!(eargs, i:i-1, stmtbuf) + i += length(stmtbuf) + empty!(stmtbuf) end end i += 1 @@ -4257,16 +4280,17 @@ end const corenumtype = Union{Int32, Int64, Float32, Float64} -function inlining_pass(e::Expr, sv::InferenceState) +# return inlined replacement for `e`, inserting new needed statements +# at index `ins` in `stmts`. +function inlining_pass(e::Expr, sv::InferenceState, stmts, ins) if e.head === :method # avoid running the inlining pass on function definitions - return (e, ()) + return e end eargs = e.args if length(eargs) < 1 - return (e, ()) + return e end - stmts = [] arg1 = eargs[1] isccall = false i0 = 1 @@ -4281,6 +4305,7 @@ function inlining_pass(e::Expr, sv::InferenceState) i0 = 5 end has_stmts = false # needed to preserve order-of-execution + prev_stmts_length = length(stmts) for _i = length(eargs):-1:i0 if isccall && _i == 3 i = 1 @@ -4303,40 +4328,33 @@ function inlining_pass(e::Expr, sv::InferenceState) else argloc = eargs end - res = inlining_pass(ei, sv) - res1 = res[1] - res2 = res[2] - has_new_stmts = isa(res2, Array) && !isempty(res2::Array{Any,1}) + sl0 = length(stmts) + res = inlining_pass(ei, sv, stmts, ins) + ns = length(stmts) - sl0 # number of new statements just added if isccallee - restype = exprtype(res1, sv.src, sv.mod) + restype = exprtype(res, sv.src, sv.mod) if isa(restype, Const) argloc[i] = restype.val - if !effect_free(res1, sv.src, sv.mod, false) - insert!(stmts, 1, res1) - end - if has_new_stmts - prepend!(stmts, res2::Array{Any,1}) + if !effect_free(res, sv.src, sv.mod, false) + insert!(stmts, ins+ns, res) end # Assume this is the last argument to process break end end - if has_stmts && !effect_free(res1, sv.src, sv.mod, false) - restype = exprtype(res1, sv.src, sv.mod) + if has_stmts && !effect_free(res, sv.src, sv.mod, false) + restype = exprtype(res, sv.src, sv.mod) vnew = newvar!(sv, restype) argloc[i] = vnew - unshift!(stmts, Expr(:(=), vnew, res1)) + insert!(stmts, ins+ns, Expr(:(=), vnew, res)) else - argloc[i] = res1 - end - if has_new_stmts - res2 = res2::Array{Any,1} - prepend!(stmts, res2) - if !has_stmts && !(_i == i0) - for stmt in res2 - if !effect_free(stmt, sv.src, sv.mod, true) - has_stmts = true - end + argloc[i] = res + end + if !has_stmts && ns > 0 && !(_i == i0) + for s = ins:ins+ns-1 + stmt = stmts[s] + if !effect_free(stmt, sv.src, sv.mod, true) + has_stmts = true; break end end end @@ -4351,7 +4369,7 @@ function inlining_pass(e::Expr, sv::InferenceState) end end if e.head !== :call - return (e, stmts) + return e end ft = exprtype(arg1, sv.src, sv.mod) @@ -4363,10 +4381,12 @@ function inlining_pass(e::Expr, sv::InferenceState) else f = nothing if !( isleaftype(ft) || ft<:Type ) - return (e, stmts) + return e end end + ins += (length(stmts) - prev_stmts_length) + if sv.params.inlining if isdefined(Main, :Base) && ((isdefined(Main.Base, :^) && f === Main.Base.:^) || @@ -4390,19 +4410,13 @@ function inlining_pass(e::Expr, sv::InferenceState) exprtype(a1, sv.src, sv.mod) ⊑ basenumtype) if square e.args = Any[GlobalRef(Main.Base,:*), a1, a1] - res = inlining_pass(e, sv) + res = inlining_pass(e, sv, stmts, ins) else e.args = Any[GlobalRef(Main.Base,:*), Expr(:call, GlobalRef(Main.Base,:*), a1, a1), a1] e.args[2].typ = e.typ - res = inlining_pass(e, sv) - end - if isa(res, Tuple) - if isa(res[2], Array) && !isempty(res[2]) - append!(stmts, res[2]) - end - res = res[1] + res = inlining_pass(e, sv, stmts, ins) end - return (res, stmts) + return res end end end @@ -4413,13 +4427,14 @@ function inlining_pass(e::Expr, sv::InferenceState) ata[1] = ft for i = 2:length(e.args) a = exprtype(e.args[i], sv.src, sv.mod) - (a === Bottom || isvarargtype(a)) && return (e, stmts) + (a === Bottom || isvarargtype(a)) && return e ata[i] = a end - res = inlineable(f, ft, e, ata, sv) + res = inlineable(f, ft, e, ata, sv, stmts) if isa(res,Tuple) if isa(res[2],Array) && !isempty(res[2]) - append!(stmts,res[2]) + splice!(stmts, ins:ins-1, res[2]) + ins += length(res[2]) end res = res[1] end @@ -4431,7 +4446,7 @@ function inlining_pass(e::Expr, sv::InferenceState) e = res::Expr f = _apply; ft = abstract_eval_constant(f) else - return (res,stmts) + return res end end @@ -4453,7 +4468,7 @@ function inlining_pass(e::Expr, sv::InferenceState) newargs[i-2] = Any[ mk_getfield(aarg,j,tp[j]) for j=1:length(tp) ] else # not all args expandable - return (e,stmts) + return e end end e.args = [Any[e.args[2]]; newargs...] @@ -4468,14 +4483,14 @@ function inlining_pass(e::Expr, sv::InferenceState) else f = nothing if !( isleaftype(ft) || ft<:Type ) - return (e,stmts) + return e end end else - return (e,stmts) + return e end end - return (e,stmts) + return e end const compiler_temp_sym = Symbol("#temp#") @@ -4576,7 +4591,8 @@ normslot(s::TypedSlot) = SlotNumber(slot_id(s)) function get_replacement(table, var::Union{SlotNumber, SSAValue}, init::ANY, nargs, slottypes, ssavaluetypes) #if isa(init, QuoteNode) # this can cause slight code size increases # return init - if isa(init, Expr) && init.head === :static_parameter + if (isa(init, Expr) && init.head === :static_parameter) || isa(init, corenumtype) || + init === () || init === nothing return init elseif isa(init, Slot) && is_argument(nargs, init::Slot) # the transformation is not ideal if the assignment From 3e53392feaa9be3cf6733d4d80f929fbfe7e4870 Mon Sep 17 00:00:00 2001 From: Jeff Bezanson Date: Thu, 9 Mar 2017 20:02:13 -0500 Subject: [PATCH 6/6] fix regressions due to replacing too many slots with SSAValues --- base/inference.jl | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/base/inference.jl b/base/inference.jl index 298a84d679dab..402d167cb4ba2 100644 --- a/base/inference.jl +++ b/base/inference.jl @@ -4605,6 +4605,12 @@ function get_replacement(table, var::Union{SlotNumber, SSAValue}, init::ANY, nar return init end elseif isa(init, SSAValue) + if isa(var, SlotNumber) && slottypes[var.id] ⊑ Tuple + # Here we avoid replacing a Slot with an SSAValue when the type is an + # aggregate. That can cause LLVM to generate a bunch of extra memcpys + # if the data ever needs to be stack allocated later. + return var + end if haskey(table, init) return get_replacement(table, init, table[init], nargs, slottypes, ssavaluetypes) end