From 3334f7fd54c31cf92dda2298e90f6db5da4bc3e7 Mon Sep 17 00:00:00 2001
From: Jeff Bezanson <jeff.bezanson@gmail.com>
Date: Wed, 1 Mar 2017 16:49:44 -0500
Subject: [PATCH 1/6] improve ability to inline away `typeassert` and `===`

this was affected by using `Const` in more cases instead of `Type{}`
---
 base/inference.jl | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/base/inference.jl b/base/inference.jl
index ca2ca0b8c5251..8b2dba8b85bd6 100644
--- a/base/inference.jl
+++ b/base/inference.jl
@@ -3467,8 +3467,7 @@ struct InvokeData
     texpr
 end
 
-function inline_as_constant(val::ANY, argexprs, sv::InferenceState,
-                            invoke_data::ANY)
+function inline_as_constant(val::ANY, argexprs, sv::InferenceState, invoke_data::ANY)
     if invoke_data === nothing
         invoke_fexpr = nothing
         invoke_texpr = nothing
@@ -3661,8 +3660,9 @@ function inlineable(f::ANY, ft::ANY, e::Expr, atypes::Vector{Any}, sv::Inference
 
     if (f === typeassert || ft ⊑ typeof(typeassert)) && length(atypes)==3
         # typeassert(x::S, T) => x, when S<:T
-        if isType(atypes[3]) && isleaftype(atypes[3]) &&
-            atypes[2] ⊑ atypes[3].parameters[1]
+        a3 = atypes[3]
+        if (isType(a3) && isleaftype(a3) && atypes[2] ⊑ a3.parameters[1]) ||
+            (isa(a3,Const) && isa(a3.val,Type) && atypes[2] ⊑ a3.val)
             return (argexprs[2], ())
         end
     end
@@ -3670,7 +3670,7 @@ function inlineable(f::ANY, ft::ANY, e::Expr, atypes::Vector{Any}, sv::Inference
     # special-case inliners for known pure functions that compute types
     if sv.params.inlining
         if isa(e.typ, Const) # || isconstType(e.typ)
-            if (f === apply_type || f === fieldtype || f === typeof ||
+            if (f === apply_type || f === fieldtype || f === typeof || f === (===) ||
                 istopfunction(topmod, f, :typejoin) ||
                 istopfunction(topmod, f, :isbits) ||
                 istopfunction(topmod, f, :promote_type) ||

From e8c12ade3b3a2f3d43c3c8f6e96f4a33d012f7f1 Mon Sep 17 00:00:00 2001
From: Jeff Bezanson <jeff.bezanson@gmail.com>
Date: Wed, 1 Mar 2017 16:52:56 -0500
Subject: [PATCH 2/6] remove effect-free statements in `void_use_elim_pass!`

---
 base/inference.jl | 18 +++++++++++++++---
 1 file changed, 15 insertions(+), 3 deletions(-)

diff --git a/base/inference.jl b/base/inference.jl
index 8b2dba8b85bd6..8580b63d03a26 100644
--- a/base/inference.jl
+++ b/base/inference.jl
@@ -2114,7 +2114,7 @@ issubstate(a::VarState, b::VarState) = (a.typ ⊑ b.typ && a.undef <= b.undef)
 # in a dead branch but can be ignored when analyzing uses/liveness.
 is_meta_expr_head(head::Symbol) =
     (head === :inbounds || head === :boundscheck || head === :meta ||
-     head === :line)
+     head === :line || head === :simdloop)
 is_meta_expr(ex::Expr) = is_meta_expr_head(ex.head)
 
 function tmerge(typea::ANY, typeb::ANY)
@@ -3359,7 +3359,8 @@ function is_pure_builtin(f::ANY)
              f === Intrinsics.checked_srem_int ||
              f === Intrinsics.checked_urem_int ||
              f === Intrinsics.check_top_bit ||
-             f === Intrinsics.sqrt_llvm)
+             f === Intrinsics.sqrt_llvm ||
+             f === Intrinsics.cglobal)  # cglobal throws an error for symbol-not-found
             return true
         end
     end
@@ -3390,12 +3391,17 @@ function effect_free(e::ANY, src::CodeInfo, mod::Module, allow_volatile::Bool)
         return (isdefined(e.mod, e.name) && (allow_volatile || isconst(e.mod, e.name)))
     elseif isa(e, Symbol)
         return allow_volatile
+    elseif isa(e, Slot)
+        return src.slotflags[slot_id(e)] & Slot_UsedUndef == 0
     elseif isa(e, Expr)
         e = e::Expr
         head = e.head
         if head === :static_parameter || is_meta_expr_head(head)
             return true
         end
+        if e.typ === Bottom
+            return false
+        end
         ea = e.args
         if head === :call
             if is_known_call_p(e, is_pure_builtin, src, mod)
@@ -4646,7 +4652,13 @@ function void_use_elim_pass!(sv::InferenceState)
         elseif isa(ex, GlobalRef)
             ex = ex::GlobalRef
             return !isdefined(ex.mod, ex.name)
-        elseif (isa(ex, Expr) || isa(ex, GotoNode) || isa(ex, LineNumberNode) ||
+        elseif isa(ex, Expr)
+            h = ex.head
+            if h === :return || h === :(=) || h === :gotoifnot || is_meta_expr_head(h)
+                return true
+            end
+            return !effect_free(ex, sv.src, sv.mod, false)
+        elseif (isa(ex, GotoNode) || isa(ex, LineNumberNode) ||
                 isa(ex, NewvarNode) || isa(ex, Symbol) || isa(ex, LabelNode))
             # This is a list of special type handled by the compiler
             return true

From 6d6218fa74f6b4b80956df6e1da853f98bd31a15 Mon Sep 17 00:00:00 2001
From: Jeff Bezanson <jeff.bezanson@gmail.com>
Date: Wed, 1 Mar 2017 16:55:32 -0500
Subject: [PATCH 3/6] add a pass to remove variables whose values are never
 read

---
 base/inference.jl | 32 ++++++++++++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/base/inference.jl b/base/inference.jl
index 8580b63d03a26..a9a81279d993c 100644
--- a/base/inference.jl
+++ b/base/inference.jl
@@ -4505,6 +4505,37 @@ function is_known_call_p(e::Expr, pred::ANY, src::CodeInfo, mod::Module)
     return (isa(f, Const) && pred(f.val)) || (isType(f) && pred(f.parameters[1]))
 end
 
+function record_used(e::ANY, T::ANY, used::Vector{Bool})
+    if isa(e,T)
+        used[e.id+1] = true
+    elseif isa(e,Expr)
+        i0 = e.head === :(=) ? 2 : 1
+        for i = i0:length(e.args)
+            record_used(e.args[i], T, used)
+        end
+    end
+end
+
+function remove_unused_vars!(src::CodeInfo)
+    used = fill(false, length(src.slotnames)+1)
+    used_ssa = fill(false, length(src.ssavaluetypes)+1)
+    for i = 1:length(src.code)
+        record_used(src.code[i], Slot, used)
+        record_used(src.code[i], SSAValue, used_ssa)
+    end
+    for i = 1:length(src.code)
+        e = src.code[i]
+        if isa(e,NewvarNode) && !used[e.slot.id+1]
+            src.code[i] = nothing
+        elseif isa(e,Expr) && e.head === :(=)
+            if (isa(e.args[1],Slot) && !used[e.args[1].id+1]) ||
+                (isa(e.args[1],SSAValue) && !used_ssa[e.args[1].id+1])
+                src.code[i] = e.args[2]
+            end
+        end
+    end
+end
+
 function delete_var!(src::CodeInfo, id, T)
     filter!(x->!(isa(x,Expr) && (x.head === :(=) || x.head === :const) &&
                  isa(x.args[1],T) && x.args[1].id == id),
@@ -5038,6 +5069,7 @@ function alloc_elim_pass!(sv::InferenceState)
     vs, gs = find_sa_vars(sv.src, sv.nargs)
     remove_redundant_temp_vars(sv.src, sv.nargs, vs, Slot)
     remove_redundant_temp_vars(sv.src, sv.nargs, gs, SSAValue)
+    remove_unused_vars!(sv.src)
     i = 1
     while i < length(body)
         e = body[i]

From c703c4dd63621e51389ea6e4d0a5ef90dcf7ee57 Mon Sep 17 00:00:00 2001
From: Jeff Bezanson <jeff.bezanson@gmail.com>
Date: Wed, 1 Mar 2017 17:00:14 -0500
Subject: [PATCH 4/6] overhaul `remove_redundant_temp_vars` pass to remove more
 variables

---
 base/inference.jl | 119 +++++++++++++++++++++++++++++-----------------
 1 file changed, 75 insertions(+), 44 deletions(-)

diff --git a/base/inference.jl b/base/inference.jl
index a9a81279d993c..ad52c02a2efcb 100644
--- a/base/inference.jl
+++ b/base/inference.jl
@@ -4536,62 +4536,90 @@ function remove_unused_vars!(src::CodeInfo)
     end
 end
 
-function delete_var!(src::CodeInfo, id, T)
+var_matches(a::ANY,      b::ANY)      = false
+var_matches(a::SSAValue, b::SSAValue) = a.id == b.id
+var_matches(a::Slot,     b::Slot)     = a.id == b.id
+
+function delete_var!(src::CodeInfo, v::Union{Slot,SSAValue})
     filter!(x->!(isa(x,Expr) && (x.head === :(=) || x.head === :const) &&
-                 isa(x.args[1],T) && x.args[1].id == id),
+                 var_matches(x.args[1], v)),
             src.code)
     return src
 end
 
-function slot_replace!(src::CodeInfo, id::Int, rhs::ANY, T::ANY)
+function slot_replace!(src::CodeInfo, v::Union{Slot,SSAValue}, rhs::ANY)
     for i = 1:length(src.code)
-        src.code[i] = _slot_replace!(src.code[i], id, rhs, T)
+        src.code[i] = _slot_replace!(src.code[i], v, rhs)
     end
     return src
 end
 
-function _slot_replace!(e, id::Int, rhs::ANY, T::ANY)
-    if isa(e,T) && e.id == id
+function _slot_replace!(e::ANY, v::Union{Slot,SSAValue}, rhs::ANY)
+    if var_matches(e, v)
         return rhs
     end
     if isa(e,Expr)
         for i = 1:length(e.args)
-            e.args[i] = _slot_replace!(e.args[i], id, rhs, T)
+            e.args[i] = _slot_replace!(e.args[i], v, rhs)
         end
     end
     return e
 end
 
-occurs_undef(var::Int, expr, flags) =
-    flags[var] & Slot_UsedUndef != 0 && occurs_more(expr, e -> (isa(e, Slot) && slot_id(e) == var), 0) > 0
-
 is_argument(nargs::Int, v::Slot) = slot_id(v) <= nargs
 
+normslot(s::SlotNumber) = s
+normslot(s::TypedSlot) = SlotNumber(slot_id(s))
+
+# given a single-assigned var and its initializer `init`, return what we can
+# replace `var` with, or `var` itself if we shouldn't replace it
+function get_replacement(table, var::Union{SlotNumber, SSAValue}, init::ANY, nargs, slottypes, ssavaluetypes)
+    #if isa(init, QuoteNode)  # this can cause slight code size increases
+    #    return init
+    if isa(init, Expr) && init.head === :static_parameter
+        return init
+    elseif isa(init, Slot) && is_argument(nargs, init::Slot)
+        # the transformation is not ideal if the assignment
+        # is present for the auto-unbox functionality
+        # (from inlining improved type inference information)
+        # and this transformation would worsen the type information
+        # everywhere later in the function
+        ityp = isa(init, TypedSlot) ? init.typ : slottypes[(init::SlotNumber).id]
+        if ityp ⊑ (isa(var,SSAValue) ? ssavaluetypes[var.id + 1] : slottypes[var.id])
+            return init
+        end
+    elseif isa(init, SSAValue)
+        if haskey(table, init)
+            return get_replacement(table, init, table[init], nargs, slottypes, ssavaluetypes)
+        end
+        return init
+    elseif isa(init, SlotNumber) && haskey(table, init)
+        return get_replacement(table, init, table[init], nargs, slottypes, ssavaluetypes)
+    elseif isa(init, TypedSlot)
+        sl = normslot(init)
+        if haskey(table, sl)
+            rep = get_replacement(table, sl, table[sl], nargs, slottypes, ssavaluetypes)
+            if isa(rep, SlotNumber)
+                rep = TypedSlot(rep.id, init.typ)
+            end
+            return rep
+        end
+    end
+    return var
+end
+
 # remove all single-assigned vars v in "v = x" where x is an argument.
 # "sa" is the result of find_sa_vars
-# T: Slot or SSAValue
-function remove_redundant_temp_vars(src::CodeInfo, nargs::Int, sa, T)
+function remove_redundant_temp_vars!(src::CodeInfo, nargs::Int, sa)
     flags = src.slotflags
-    ssavalue_types = src.ssavaluetypes
-    bexpr = Expr(:block)
-    bexpr.args = src.code
+    slottypes = src.slottypes
+    ssavaluetypes = src.ssavaluetypes
     for (v, init) in sa
-        if isa(init, Slot) && is_argument(nargs, init::Slot)
-            # this transformation is not valid for vars used before def.
-            # we need to preserve the point of assignment to know where to
-            # throw errors (issue #4645).
-            if T === SSAValue || !occurs_undef(v, bexpr, flags)
-                # the transformation is not ideal if the assignment
-                # is present for the auto-unbox functionality
-                # (from inlining improved type inference information)
-                # and this transformation would worsen the type information
-                # everywhere later in the function
-                ityp = isa(init, TypedSlot) ? init.typ : src.slottypes[(init::SlotNumber).id]
-                if ityp ⊑ (T === SSAValue ? ssavalue_types[v + 1] : src.slottypes[v])
-                    delete_var!(src, v, T)
-                    slot_replace!(src, v, init, T)
-                end
-            end
+        repl = get_replacement(sa, v, init, nargs, slottypes, ssavaluetypes)
+        compare = isa(repl,TypedSlot) ? normslot(repl) : repl
+        if compare !== v
+            delete_var!(src, v)
+            slot_replace!(src, v, repl)
         end
     end
     return src
@@ -4602,27 +4630,31 @@ function find_sa_vars(src::CodeInfo, nargs::Int)
     body = src.code
     av = ObjectIdDict()
     av2 = ObjectIdDict()
-    gss = ObjectIdDict()
     for i = 1:length(body)
         e = body[i]
         if isa(e,Expr) && e.head === :(=)
             lhs = e.args[1]
             if isa(lhs, SSAValue)
-                gss[lhs.id] = e.args[2]
+                av[lhs] = e.args[2]
             elseif isa(lhs, Slot)
-                id = slot_id(lhs)
-                if id > nargs  # exclude args
-                    if !haskey(av, id)
-                        av[id] = e.args[2]
+                lhs = normslot(lhs)
+                id = lhs.id
+                # exclude args and used undef vars
+                # this transformation is not valid for vars used before def.
+                # we need to preserve the point of assignment to know where to
+                # throw errors (issue #4645).
+                if id > nargs && (src.slotflags[id] & Slot_UsedUndef == 0)
+                    if !haskey(av, lhs)
+                        av[lhs] = e.args[2]
                     else
-                        av2[id] = true
+                        av2[lhs] = true
                     end
                 end
             end
         end
     end
-    filter!((id, _) -> !haskey(av2, id), av)
-    return (av, gss)
+    filter!((v, _) -> !haskey(av2, v), av)
+    return av
 end
 
 symequal(x::SSAValue, y::SSAValue) = x.id === y.id
@@ -5066,9 +5098,8 @@ function alloc_elim_pass!(sv::InferenceState)
     body = sv.src.code
     bexpr = Expr(:block)
     bexpr.args = body
-    vs, gs = find_sa_vars(sv.src, sv.nargs)
-    remove_redundant_temp_vars(sv.src, sv.nargs, vs, Slot)
-    remove_redundant_temp_vars(sv.src, sv.nargs, gs, SSAValue)
+    vs = find_sa_vars(sv.src, sv.nargs)
+    remove_redundant_temp_vars!(sv.src, sv.nargs, vs)
     remove_unused_vars!(sv.src)
     i = 1
     while i < length(body)
@@ -5079,7 +5110,7 @@ function alloc_elim_pass!(sv::InferenceState)
         end
         e = e::Expr
         if e.head === :(=) && (isa(e.args[1], SSAValue) ||
-                               (isa(e.args[1], Slot) && haskey(vs, slot_id(e.args[1]))))
+                               (isa(e.args[1], Slot) && haskey(vs, normslot(e.args[1]))))
             var = e.args[1]
             rhs = e.args[2]
             # Need to make sure LLVM can recognize this as LLVM ssa value too

From 763f36c6780f74a81196a50d7a79b0f236e22cef Mon Sep 17 00:00:00 2001
From: Jeff Bezanson <jeff.bezanson@gmail.com>
Date: Wed, 8 Mar 2017 01:49:32 -0500
Subject: [PATCH 5/6] add an inlining heuristic that helps avoid allocations

don't inline into a function `f` if doing so would put it over the
inlining threshhold, and if inlining `f` itself would help
avoid tuple allocations.

so far this is only used on `promote`, to limit the effects as
much as possible.
---
 base/inference.jl | 154 +++++++++++++++++++++++++---------------------
 1 file changed, 85 insertions(+), 69 deletions(-)

diff --git a/base/inference.jl b/base/inference.jl
index ad52c02a2efcb..298a84d679dab 100644
--- a/base/inference.jl
+++ b/base/inference.jl
@@ -2875,9 +2875,7 @@ function isinlineable(m::Method, src::CodeInfo)
         end
     end
     if !inlineable
-        body = Expr(:block)
-        body.args = src.code
-        inlineable = inline_worthy(body, cost)
+        inlineable = inline_worthy_stmts(src.code, cost)
     end
     return inlineable
 end
@@ -3661,7 +3659,10 @@ end
 # static parameters are ok if all the static parameter values are leaf types,
 # meaning they are fully known.
 # `ft` is the type of the function. `f` is the exact function if known, or else `nothing`.
-function inlineable(f::ANY, ft::ANY, e::Expr, atypes::Vector{Any}, sv::InferenceState)
+# `pending_stmts` is an array of statements from functions inlined so far, so
+# we can estimate the total size of the enclosing function after inlining.
+function inlineable(f::ANY, ft::ANY, e::Expr, atypes::Vector{Any}, sv::InferenceState,
+                    pending_stmts)
     argexprs = e.args
 
     if (f === typeassert || ft ⊑ typeof(typeassert)) && length(atypes)==3
@@ -3932,6 +3933,34 @@ function inlineable(f::ANY, ft::ANY, e::Expr, atypes::Vector{Any}, sv::Inference
                          invoke_data)
     end
 
+    if !isa(ast, Array{Any,1})
+        ast = ccall(:jl_uncompress_ast, Any, (Any, Any), method, ast)
+    else
+        ast = copy_exprargs(ast)
+    end
+    ast = ast::Array{Any,1}
+
+    # `promote` is a tuple-returning function that is very important to inline
+    if isdefined(Main, :Base) && isdefined(Main.Base, :promote) &&
+        length(sv.src.slottypes) > 0 && sv.src.slottypes[1] ⊑ typeof(getfield(Main.Base, :promote))
+        # check for non-isbits Tuple return
+        if sv.bestguess ⊑ Tuple && !isbits(widenconst(sv.bestguess))
+            # See if inlining this call would change the enclosing function
+            # from inlineable to not inlineable.
+            # This heuristic is applied to functions that return non-bits
+            # tuples, since we want to be able to inline those functions to
+            # avoid the tuple allocation.
+            current_stmts = vcat(sv.src.code, pending_stmts)
+            if inline_worthy_stmts(current_stmts)
+                append!(current_stmts, ast)
+                if !inline_worthy_stmts(current_stmts)
+                    return invoke_NF(argexprs0, e.typ, atypes, sv, atype_unlimited,
+                                     invoke_data)
+                end
+            end
+        end
+    end
+
     # create the backedge
     if isa(frame, InferenceState) && !frame.inferred && frame.cached
         # in this case, the actual backedge linfo hasn't been computed
@@ -3954,13 +3983,6 @@ function inlineable(f::ANY, ft::ANY, e::Expr, atypes::Vector{Any}, sv::Inference
 
     nm = length(unwrap_unionall(metharg).parameters)
 
-    if !isa(ast, Array{Any,1})
-        ast = ccall(:jl_uncompress_ast, Any, (Any, Any), method, ast)
-    else
-        ast = copy_exprargs(ast)
-    end
-    ast = ast::Array{Any,1}
-
     body = Expr(:block)
     body.args = ast
     propagate_inbounds = src.propagate_inbounds
@@ -4187,10 +4209,13 @@ function inline_ignore(ex::ANY)
     return isa(ex, Expr) && is_meta_expr(ex::Expr)
 end
 
+function inline_worthy_stmts(stmts::Vector{Any}, cost::Integer = 1000)
+    body = Expr(:block)
+    body.args = stmts
+    return inline_worthy(body, cost)
+end
+
 function inline_worthy(body::Expr, cost::Integer=1000) # precondition: 0 < cost; nominal cost = 1000
-    if popmeta!(body, :noinline)[1]
-        return false
-    end
     symlim = 1000 + 5_000_000 ÷ cost
     nstmt = 0
     for stmt in body.args
@@ -4238,17 +4263,15 @@ end
 function inlining_pass!(sv::InferenceState)
     eargs = sv.src.code
     i = 1
+    stmtbuf = []
     while i <= length(eargs)
         ei = eargs[i]
         if isa(ei, Expr)
-            res = inlining_pass(ei, sv)
-            eargs[i] = res[1]
-            if isa(res[2], Array)
-                sts = res[2]::Array{Any,1}
-                for j = 1:length(sts)
-                    insert!(eargs, i, sts[j])
-                    i += 1
-                end
+            eargs[i] = inlining_pass(ei, sv, stmtbuf, 1)
+            if !isempty(stmtbuf)
+                splice!(eargs, i:i-1, stmtbuf)
+                i += length(stmtbuf)
+                empty!(stmtbuf)
             end
         end
         i += 1
@@ -4257,16 +4280,17 @@ end
 
 const corenumtype = Union{Int32, Int64, Float32, Float64}
 
-function inlining_pass(e::Expr, sv::InferenceState)
+# return inlined replacement for `e`, inserting new needed statements
+# at index `ins` in `stmts`.
+function inlining_pass(e::Expr, sv::InferenceState, stmts, ins)
     if e.head === :method
         # avoid running the inlining pass on function definitions
-        return (e, ())
+        return e
     end
     eargs = e.args
     if length(eargs) < 1
-        return (e, ())
+        return e
     end
-    stmts = []
     arg1 = eargs[1]
     isccall = false
     i0 = 1
@@ -4281,6 +4305,7 @@ function inlining_pass(e::Expr, sv::InferenceState)
         i0 = 5
     end
     has_stmts = false # needed to preserve order-of-execution
+    prev_stmts_length = length(stmts)
     for _i = length(eargs):-1:i0
         if isccall && _i == 3
             i = 1
@@ -4303,40 +4328,33 @@ function inlining_pass(e::Expr, sv::InferenceState)
             else
                 argloc = eargs
             end
-            res = inlining_pass(ei, sv)
-            res1 = res[1]
-            res2 = res[2]
-            has_new_stmts = isa(res2, Array) && !isempty(res2::Array{Any,1})
+            sl0 = length(stmts)
+            res = inlining_pass(ei, sv, stmts, ins)
+            ns = length(stmts) - sl0  # number of new statements just added
             if isccallee
-                restype = exprtype(res1, sv.src, sv.mod)
+                restype = exprtype(res, sv.src, sv.mod)
                 if isa(restype, Const)
                     argloc[i] = restype.val
-                    if !effect_free(res1, sv.src, sv.mod, false)
-                        insert!(stmts, 1, res1)
-                    end
-                    if has_new_stmts
-                        prepend!(stmts, res2::Array{Any,1})
+                    if !effect_free(res, sv.src, sv.mod, false)
+                        insert!(stmts, ins+ns, res)
                     end
                     # Assume this is the last argument to process
                     break
                 end
             end
-            if has_stmts && !effect_free(res1, sv.src, sv.mod, false)
-                restype = exprtype(res1, sv.src, sv.mod)
+            if has_stmts && !effect_free(res, sv.src, sv.mod, false)
+                restype = exprtype(res, sv.src, sv.mod)
                 vnew = newvar!(sv, restype)
                 argloc[i] = vnew
-                unshift!(stmts, Expr(:(=), vnew, res1))
+                insert!(stmts, ins+ns, Expr(:(=), vnew, res))
             else
-                argloc[i] = res1
-            end
-            if has_new_stmts
-                res2 = res2::Array{Any,1}
-                prepend!(stmts, res2)
-                if !has_stmts && !(_i == i0)
-                    for stmt in res2
-                        if !effect_free(stmt, sv.src, sv.mod, true)
-                            has_stmts = true
-                        end
+                argloc[i] = res
+            end
+            if !has_stmts && ns > 0 && !(_i == i0)
+                for s = ins:ins+ns-1
+                    stmt = stmts[s]
+                    if !effect_free(stmt, sv.src, sv.mod, true)
+                        has_stmts = true; break
                     end
                 end
             end
@@ -4351,7 +4369,7 @@ function inlining_pass(e::Expr, sv::InferenceState)
         end
     end
     if e.head !== :call
-        return (e, stmts)
+        return e
     end
 
     ft = exprtype(arg1, sv.src, sv.mod)
@@ -4363,10 +4381,12 @@ function inlining_pass(e::Expr, sv::InferenceState)
     else
         f = nothing
         if !( isleaftype(ft) || ft<:Type )
-            return (e, stmts)
+            return e
         end
     end
 
+    ins += (length(stmts) - prev_stmts_length)
+
     if sv.params.inlining
         if isdefined(Main, :Base) &&
             ((isdefined(Main.Base, :^) && f === Main.Base.:^) ||
@@ -4390,19 +4410,13 @@ function inlining_pass(e::Expr, sv::InferenceState)
                                            exprtype(a1, sv.src, sv.mod) ⊑ basenumtype)
                     if square
                         e.args = Any[GlobalRef(Main.Base,:*), a1, a1]
-                        res = inlining_pass(e, sv)
+                        res = inlining_pass(e, sv, stmts, ins)
                     else
                         e.args = Any[GlobalRef(Main.Base,:*), Expr(:call, GlobalRef(Main.Base,:*), a1, a1), a1]
                         e.args[2].typ = e.typ
-                        res = inlining_pass(e, sv)
-                    end
-                    if isa(res, Tuple)
-                        if isa(res[2], Array) && !isempty(res[2])
-                            append!(stmts, res[2])
-                        end
-                        res = res[1]
+                        res = inlining_pass(e, sv, stmts, ins)
                     end
-                    return (res, stmts)
+                    return res
                 end
             end
         end
@@ -4413,13 +4427,14 @@ function inlining_pass(e::Expr, sv::InferenceState)
         ata[1] = ft
         for i = 2:length(e.args)
             a = exprtype(e.args[i], sv.src, sv.mod)
-            (a === Bottom || isvarargtype(a)) && return (e, stmts)
+            (a === Bottom || isvarargtype(a)) && return e
             ata[i] = a
         end
-        res = inlineable(f, ft, e, ata, sv)
+        res = inlineable(f, ft, e, ata, sv, stmts)
         if isa(res,Tuple)
             if isa(res[2],Array) && !isempty(res[2])
-                append!(stmts,res[2])
+                splice!(stmts, ins:ins-1, res[2])
+                ins += length(res[2])
             end
             res = res[1]
         end
@@ -4431,7 +4446,7 @@ function inlining_pass(e::Expr, sv::InferenceState)
                 e = res::Expr
                 f = _apply; ft = abstract_eval_constant(f)
             else
-                return (res,stmts)
+                return res
             end
         end
 
@@ -4453,7 +4468,7 @@ function inlining_pass(e::Expr, sv::InferenceState)
                     newargs[i-2] = Any[ mk_getfield(aarg,j,tp[j]) for j=1:length(tp) ]
                 else
                     # not all args expandable
-                    return (e,stmts)
+                    return e
                 end
             end
             e.args = [Any[e.args[2]]; newargs...]
@@ -4468,14 +4483,14 @@ function inlining_pass(e::Expr, sv::InferenceState)
             else
                 f = nothing
                 if !( isleaftype(ft) || ft<:Type )
-                    return (e,stmts)
+                    return e
                 end
             end
         else
-            return (e,stmts)
+            return e
         end
     end
-    return (e,stmts)
+    return e
 end
 
 const compiler_temp_sym = Symbol("#temp#")
@@ -4576,7 +4591,8 @@ normslot(s::TypedSlot) = SlotNumber(slot_id(s))
 function get_replacement(table, var::Union{SlotNumber, SSAValue}, init::ANY, nargs, slottypes, ssavaluetypes)
     #if isa(init, QuoteNode)  # this can cause slight code size increases
     #    return init
-    if isa(init, Expr) && init.head === :static_parameter
+    if (isa(init, Expr) && init.head === :static_parameter) || isa(init, corenumtype) ||
+        init === () || init === nothing
         return init
     elseif isa(init, Slot) && is_argument(nargs, init::Slot)
         # the transformation is not ideal if the assignment

From 3e53392feaa9be3cf6733d4d80f929fbfe7e4870 Mon Sep 17 00:00:00 2001
From: Jeff Bezanson <jeff.bezanson@gmail.com>
Date: Thu, 9 Mar 2017 20:02:13 -0500
Subject: [PATCH 6/6] fix regressions due to replacing too many slots with
 SSAValues

---
 base/inference.jl | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/base/inference.jl b/base/inference.jl
index 298a84d679dab..402d167cb4ba2 100644
--- a/base/inference.jl
+++ b/base/inference.jl
@@ -4605,6 +4605,12 @@ function get_replacement(table, var::Union{SlotNumber, SSAValue}, init::ANY, nar
             return init
         end
     elseif isa(init, SSAValue)
+        if isa(var, SlotNumber) && slottypes[var.id] ⊑ Tuple
+            # Here we avoid replacing a Slot with an SSAValue when the type is an
+            # aggregate. That can cause LLVM to generate a bunch of extra memcpys
+            # if the data ever needs to be stack allocated later.
+            return var
+        end
         if haskey(table, init)
             return get_replacement(table, init, table[init], nargs, slottypes, ssavaluetypes)
         end