diff --git a/base/compiler/abstractinterpretation.jl b/base/compiler/abstractinterpretation.jl index 1449b911ee629c..d9714e581e607c 100644 --- a/base/compiler/abstractinterpretation.jl +++ b/base/compiler/abstractinterpretation.jl @@ -1589,6 +1589,16 @@ function invoke_rewrite(xs::Vector{Any}) return newxs end +function abstract_add_finalizer(interp::AbstractInterpreter, argtypes::Vector{Any}, sv::InferenceState) + if length(argtypes) == 3 + tt = argtypes[3] + finalizer_argvec = Any[argtypes[3], argtypes[2]] + call = abstract_call(interp, ArgInfo(nothing, finalizer_argvec), sv, 1) + return CallMeta(Nothing, Effects(), FinalizerInfo(call.info, call.effects)) + end + return CallMeta(Nothing, Effects(), false) +end + # call where the function is known exactly function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f), arginfo::ArgInfo, sv::InferenceState, @@ -1603,6 +1613,8 @@ function abstract_call_known(interp::AbstractInterpreter, @nospecialize(f), return abstract_invoke(interp, arginfo, sv) elseif f === modifyfield! return abstract_modifyfield!(interp, argtypes, sv) + elseif f === Core._add_finalizer + return abstract_add_finalizer(interp, argtypes, sv) end rt = abstract_call_builtin(interp, f, arginfo, sv, max_methods) return CallMeta(rt, builtin_effects(f, argtypes, rt), false) @@ -1998,7 +2010,8 @@ function abstract_eval_statement(interp::AbstractInterpreter, @nospecialize(e), effects.effect_free ? ALWAYS_TRUE : TRISTATE_UNKNOWN, effects.nothrow ? ALWAYS_TRUE : TRISTATE_UNKNOWN, effects.terminates_globally ? ALWAYS_TRUE : TRISTATE_UNKNOWN, - #=nonoverlayed=#true + #=nonoverlayed=#true, + TRISTATE_UNKNOWN )) else tristate_merge!(sv, EFFECTS_UNKNOWN) @@ -2089,6 +2102,19 @@ function abstract_eval_global(M::Module, s::Symbol, frame::InferenceState) return ty end +function abstract_eval_global_assignment(interp::AbstractInterpreter, frame::InferenceState, lhs::GlobalRef, @nospecialize(rhs)) + M = lhs.mod + s = lhs.name + nothrow = false + if isdefined(M, s) && !isconst(M, s) + ty = ccall(:jl_binding_type, Any, (Any, Any), M, s) + nothrow = ty === nothing || rhs ⊑ ty + end + tristate_merge!(frame, Effects(EFFECTS_TOTAL, + effect_free=TRISTATE_UNKNOWN, + nothrow=nothrow ? ALWAYS_TRUE : TRISTATE_UNKNOWN)) +end + abstract_eval_ssavalue(s::SSAValue, sv::InferenceState) = abstract_eval_ssavalue(s, sv.src) function abstract_eval_ssavalue(s::SSAValue, src::CodeInfo) typ = (src.ssavaluetypes::Vector{Any})[s.id] @@ -2321,9 +2347,7 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState) if isa(lhs, SlotNumber) changes = StateUpdate(lhs, VarState(t, false), changes, false) elseif isa(lhs, GlobalRef) - tristate_merge!(frame, Effects(EFFECTS_TOTAL, - effect_free=TRISTATE_UNKNOWN, - nothrow=TRISTATE_UNKNOWN)) + abstract_eval_global_assignment(interp, frame, lhs, t) elseif !isa(lhs, SSAValue) tristate_merge!(frame, EFFECTS_UNKNOWN) end diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl index 3c9e9cf4c21d6a..0dc6c7857b0361 100644 --- a/base/compiler/optimize.jl +++ b/base/compiler/optimize.jl @@ -27,6 +27,9 @@ const IR_FLAG_THROW_BLOCK = 0x01 << 3 # This statement may be removed if its result is unused. In particular it must # thus be both pure and effect free. const IR_FLAG_EFFECT_FREE = 0x01 << 4 +# This statement was proven not to throw +const IR_FLAG_NOTHROW = 0x01 << 5 + const TOP_TUPLE = GlobalRef(Core, :tuple) @@ -542,7 +545,7 @@ function run_passes(ci::CodeInfo, sv::OptimizationState, caller::InferenceResult @timeit "Inlining" ir = ssa_inlining_pass!(ir, ir.linetable, sv.inlining, ci.propagate_inbounds) # @timeit "verify 2" verify_ir(ir) @timeit "compact 2" ir = compact!(ir) - @timeit "SROA" ir = sroa_pass!(ir) + @timeit "SROA" ir = sroa_pass!(ir, sv.inlining) @timeit "ADCE" ir = adce_pass!(ir) @timeit "type lift" ir = type_lift_pass!(ir) @timeit "compact 3" ir = compact!(ir) diff --git a/base/compiler/ssair/inlining.jl b/base/compiler/ssair/inlining.jl index f07757eafc6e10..15f2e125d4ba87 100644 --- a/base/compiler/ssair/inlining.jl +++ b/base/compiler/ssair/inlining.jl @@ -306,21 +306,17 @@ function finish_cfg_inline!(state::CFGInliningState) end end -function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any}, - linetable::Vector{LineInfoNode}, item::InliningTodo, - boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}}) - # Ok, do the inlining here - spec = item.spec::ResolvedInliningSpec - sparam_vals = item.mi.sparam_vals - def = item.mi.def::Method +function ir_inline_linetable!(linetable::Vector{LineInfoNode}, inlinee_ir::IRCode, + inlinee::Method, + inlined_at::Int32) + coverage = coverage_enabled(inlinee.module) linetable_offset::Int32 = length(linetable) # Append the linetable of the inlined function to our line table - inlined_at = compact.result[idx][:line] topline::Int32 = linetable_offset + Int32(1) - coverage = coverage_enabled(def.module) coverage_by_path = JLOptions().code_coverage == 3 - push!(linetable, LineInfoNode(def.module, def.name, def.file, def.line, inlined_at)) - oldlinetable = spec.ir.linetable + push!(linetable, LineInfoNode(inlinee.module, inlinee.name, inlinee.file, inlinee.line, inlined_at)) + oldlinetable = inlinee_ir.linetable + extra_coverage_line = 0 for oldline in 1:length(oldlinetable) entry = oldlinetable[oldline] if !coverage && coverage_by_path && is_file_tracked(entry.file) @@ -339,8 +335,25 @@ function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector end push!(linetable, newentry) end - if coverage && spec.ir.stmts[1][:line] + linetable_offset != topline - insert_node_here!(compact, NewInstruction(Expr(:code_coverage_effect), Nothing, topline)) + if coverage && inlinee_ir.stmts[1][:line] + linetable_offset != topline + extra_coverage_line = topline + end + return linetable_offset, extra_coverage_line +end + +function ir_inline_item!(compact::IncrementalCompact, idx::Int, argexprs::Vector{Any}, + linetable::Vector{LineInfoNode}, item::InliningTodo, + boundscheck::Symbol, todo_bbs::Vector{Tuple{Int, Int}}) + # Ok, do the inlining here + spec = item.spec::ResolvedInliningSpec + sparam_vals = item.mi.sparam_vals + def = item.mi.def::Method + inlined_at = compact.result[idx][:line] + linetable_offset::Int32 = length(linetable) + topline::Int32 = linetable_offset + Int32(1) + linetable_offset, extra_coverage_line = ir_inline_linetable!(linetable, item.spec.ir, def, inlined_at) + if extra_coverage_line != 0 + insert_node_here!(compact, NewInstruction(Expr(:code_coverage_effect), Nothing, extra_coverage_line)) end if def.isva nargs_def = Int(def.nargs::Int32) @@ -847,12 +860,8 @@ function resolve_todo(todo::InliningTodo, state::InliningState, flag::UInt8) return compileable_specialization(et, match, effects) end - if isa(src, IRCode) - src = copy(src) - end - et !== nothing && push!(et, mi) - return InliningTodo(mi, src, effects) + return InliningTodo(mi, retrieve_ir_for_inlining(mi, src), effects) end function resolve_todo((; fully_covered, atype, cases, #=bbs=#)::UnionSplit, state::InliningState, flag::UInt8) @@ -874,7 +883,8 @@ function validate_sparams(sparams::SimpleVector) end function analyze_method!(match::MethodMatch, argtypes::Vector{Any}, - flag::UInt8, state::InliningState) + flag::UInt8, state::InliningState, + do_resolve::Bool = true) method = match.method spec_types = match.spec_types @@ -908,7 +918,7 @@ function analyze_method!(match::MethodMatch, argtypes::Vector{Any}, todo = InliningTodo(mi, match, argtypes) # If we don't have caches here, delay resolving this MethodInstance # until the batch inlining step (or an external post-processing pass) - state.mi_cache === nothing && return todo + do_resolve && state.mi_cache === nothing && return todo return resolve_todo(todo, state, flag) end @@ -916,15 +926,15 @@ function InliningTodo(mi::MethodInstance, ir::IRCode, effects::Effects) return InliningTodo(mi, ResolvedInliningSpec(ir, linear_inline_eligible(ir), effects)) end -function InliningTodo(mi::MethodInstance, src::Union{CodeInfo, Array{UInt8, 1}}, effects::Effects) - if !isa(src, CodeInfo) - src = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), mi.def, C_NULL, src::Vector{UInt8})::CodeInfo - end +function retrieve_ir_for_inlining(mi::MethodInstance, src::Array{UInt8, 1}) + src = ccall(:jl_uncompress_ir, Any, (Any, Ptr{Cvoid}, Any), mi.def, C_NULL, src::Vector{UInt8})::CodeInfo + return retrieve_ir_for_inlining(mi, src) +end - @timeit "inline IR inflation" begin; - return InliningTodo(mi, inflate_ir(src, mi)::IRCode, effects) - end +retrieve_ir_for_inlining(mi::MethodInstance, src::CodeInfo) = @timeit "inline IR inflation" begin; + inflate_ir(src, mi)::IRCode end +retrieve_ir_for_inlining(mi::MethodInstance, ir::IRCode) = copy(ir) function handle_single_case!( ir::IRCode, idx::Int, stmt::Expr, @@ -1206,7 +1216,7 @@ function process_simple!(ir::IRCode, idx::Int, state::InliningState, todo::Vecto end end - if sig.f !== Core.invoke && is_builtin(sig) + if sig.f !== Core.invoke && sig.f !== Core._add_finalizer && is_builtin(sig) # No inlining for builtins (other invoke/apply/typeassert) return nothing end @@ -1223,9 +1233,10 @@ function process_simple!(ir::IRCode, idx::Int, state::InliningState, todo::Vecto end # TODO inline non-`isdispatchtuple`, union-split callsites? -function analyze_single_call!( - ir::IRCode, idx::Int, stmt::Expr, infos::Vector{MethodMatchInfo}, flag::UInt8, - sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}}) +function compute_inlining_cases( + infos::Vector{MethodMatchInfo}, flag::UInt8, + sig::Signature, state::InliningState, + do_resolve::Bool = true) argtypes = sig.argtypes cases = InliningCase[] local any_fully_covered = false @@ -1242,7 +1253,7 @@ function analyze_single_call!( continue end for match in meth - handled_all_cases &= handle_match!(match, argtypes, flag, state, cases, true) + handled_all_cases &= handle_match!(match, argtypes, flag, state, cases, true, do_resolve) any_fully_covered |= match.fully_covers end end @@ -1252,8 +1263,18 @@ function analyze_single_call!( filter!(case::InliningCase->isdispatchtuple(case.sig), cases) end - handle_cases!(ir, idx, stmt, argtypes_to_type(argtypes), cases, - handled_all_cases & any_fully_covered, todo, state.params) + return cases, handled_all_cases & any_fully_covered +end + +function analyze_single_call!( + ir::IRCode, idx::Int, stmt::Expr, infos::Vector{MethodMatchInfo}, flag::UInt8, + sig::Signature, state::InliningState, todo::Vector{Pair{Int, Any}}) + + r = compute_inlining_cases(infos, flag, sig, state) + r === nothing && return nothing + cases, all_covered = r + handle_cases!(ir, idx, stmt, argtypes_to_type(sig.argtypes), cases, + all_covered, todo, state.params) end # similar to `analyze_single_call!`, but with constant results @@ -1305,14 +1326,15 @@ end function handle_match!( match::MethodMatch, argtypes::Vector{Any}, flag::UInt8, state::InliningState, - cases::Vector{InliningCase}, allow_abstract::Bool = false) + cases::Vector{InliningCase}, allow_abstract::Bool = false, + do_resolve::Bool = true) spec_types = match.spec_types allow_abstract || isdispatchtuple(spec_types) || return false # we may see duplicated dispatch signatures here when a signature gets widened # during abstract interpretation: for the purpose of inlining, we can just skip # processing this dispatch candidate _any(case->case.sig === spec_types, cases) && return true - item = analyze_method!(match, argtypes, flag, state) + item = analyze_method!(match, argtypes, flag, state, do_resolve) item === nothing && return false push!(cases, InliningCase(spec_types, item)) return true @@ -1427,6 +1449,48 @@ function assemble_inline_todo!(ir::IRCode, state::InliningState) continue end + # Handle finalizer + if sig.f === Core._add_finalizer + if isa(info, FinalizerInfo) + # Only inline finalizers that are known nothrow and notls. + # This avoids having to set up state for finalizer isolation + (is_nothrow(info.effects) && is_notls(info.effects)) || continue + + info = info.info + if isa(info, MethodMatchInfo) + infos = MethodMatchInfo[info] + elseif isa(info, UnionSplitInfo) + infos = info.matches + else + continue + end + + ft = argextype(stmt.args[3], ir) + has_free_typevars(ft) && return nothing + f = singleton_type(ft) + argtypes = Vector{Any}(undef, 2) + argtypes[1] = ft + argtypes[2] = argextype(stmt.args[2], ir) + sig = Signature(f, ft, argtypes) + + cases, all_covered = compute_inlining_cases(infos, UInt8(0), sig, state, false) + length(cases) == 0 && continue + if all_covered && length(cases) == 1 + if isa(cases[1], InliningCase) + case1 = cases[1].item + if isa(case1, InliningTodo) + push!(stmt.args, true) + push!(stmt.args, case1.mi) + elseif isa(case1, InvokeCase) + push!(stmt.args, false) + push!(stmt.args, case1.invoke) + end + end + end + continue + end + end + # if inference arrived here with constant-prop'ed result(s), # we can perform a specialized analysis for just this case if isa(info, ConstCallInfo) diff --git a/base/compiler/ssair/ir.jl b/base/compiler/ssair/ir.jl index 2f1359e4002aea..5907e76caed7a6 100644 --- a/base/compiler/ssair/ir.jl +++ b/base/compiler/ssair/ir.jl @@ -166,36 +166,6 @@ const AnySSAValue = Union{SSAValue, OldSSAValue, NewSSAValue} # SSA-indexed nodes - -struct NewInstruction - stmt::Any - type::Any - info::Any - # If nothing, copy the line from previous statement - # in the insertion location - line::Union{Int32, Nothing} - flag::UInt8 - - ## Insertion options - - # The IR_FLAG_EFFECT_FREE flag has already been computed (or forced). - # Don't bother redoing so on insertion. - effect_free_computed::Bool - NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info), - line::Union{Int32, Nothing}, flag::UInt8, effect_free_computed::Bool) = - new(stmt, type, info, line, flag, effect_free_computed) -end -NewInstruction(@nospecialize(stmt), @nospecialize(type)) = - NewInstruction(stmt, type, nothing) -NewInstruction(@nospecialize(stmt), @nospecialize(type), line::Union{Nothing, Int32}) = - NewInstruction(stmt, type, nothing, line, IR_FLAG_NULL, false) - -effect_free(inst::NewInstruction) = - NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag | IR_FLAG_EFFECT_FREE, true) -non_effect_free(inst::NewInstruction) = - NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag & ~IR_FLAG_EFFECT_FREE, true) - - struct InstructionStream inst::Vector{Any} type::Vector{Any} @@ -295,6 +265,36 @@ function add!(new::NewNodeStream, pos::Int, attach_after::Bool) end copy(nns::NewNodeStream) = NewNodeStream(copy(nns.stmts), copy(nns.info)) +struct NewInstruction + stmt::Any + type::Any + info::Any + # If nothing, copy the line from previous statement + # in the insertion location + line::Union{Int32, Nothing} + flag::UInt8 + + ## Insertion options + + # The IR_FLAG_EFFECT_FREE flag has already been computed (or forced). + # Don't bother redoing so on insertion. + effect_free_computed::Bool + NewInstruction(@nospecialize(stmt), @nospecialize(type), @nospecialize(info), + line::Union{Int32, Nothing}, flag::UInt8, effect_free_computed::Bool) = + new(stmt, type, info, line, flag, effect_free_computed) +end +NewInstruction(@nospecialize(stmt), @nospecialize(type)) = + NewInstruction(stmt, type, nothing) +NewInstruction(@nospecialize(stmt), @nospecialize(type), line::Union{Nothing, Int32}) = + NewInstruction(stmt, type, nothing, line, IR_FLAG_NULL, false) +NewInstruction(@nospecialize(stmt), meta::Instruction; line::Union{Int32, Nothing}=nothing) = + NewInstruction(stmt, meta[:type], meta[:info], line === nothing ? meta[:line] : line, meta[:flag], true) + +effect_free(inst::NewInstruction) = + NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag | IR_FLAG_EFFECT_FREE, true) +non_effect_free(inst::NewInstruction) = + NewInstruction(inst.stmt, inst.type, inst.info, inst.line, inst.flag & ~IR_FLAG_EFFECT_FREE, true) + struct IRCode stmts::InstructionStream argtypes::Vector{Any} diff --git a/base/compiler/ssair/passes.jl b/base/compiler/ssair/passes.jl index c2597363df2824..9f3aacbc65326f 100644 --- a/base/compiler/ssair/passes.jl +++ b/base/compiler/ssair/passes.jl @@ -14,6 +14,7 @@ GetfieldUse(idx::Int) = SSAUse(:getfield, idx) PreserveUse(idx::Int) = SSAUse(:preserve, idx) NoPreserve() = SSAUse(:nopreserve, 0) IsdefinedUse(idx::Int) = SSAUse(:isdefined, idx) +AddFinalizerUse(idx::Int) = SSAUse(:add_finalizer, idx) """ du::SSADefUse @@ -735,7 +736,7 @@ its argument). In a case when all usages are fully eliminated, `struct` allocation may also be erased as a result of succeeding dead code elimination. """ -function sroa_pass!(ir::IRCode) +function sroa_pass!(ir::IRCode, inlining::Union{Nothing, InliningState} = nothing) compact = IncrementalCompact(ir) defuses = nothing # will be initialized once we encounter mutability in order to reduce dynamic allocations lifting_cache = IdDict{Pair{AnySSAValue, Any}, AnySSAValue}() @@ -744,7 +745,7 @@ function sroa_pass!(ir::IRCode) for ((_, idx), stmt) in compact # check whether this statement is `getfield` / `setfield!` (or other "interesting" statement) isa(stmt, Expr) || continue - is_setfield = is_isdefined = false + is_setfield = is_isdefined = is_add_finalizer = false field_ordering = :unspecified if is_known_call(stmt, setfield!, compact) 4 <= length(stmt.args) <= 5 || continue @@ -767,6 +768,13 @@ function sroa_pass!(ir::IRCode) field_ordering = argextype(stmt.args[4], compact) widenconst(field_ordering) === Bool && (field_ordering = :unspecified) end + elseif is_known_call(stmt, Core._add_finalizer, compact) + 3 <= length(stmt.args) <= 5 || continue + # Inlining performs legality checks on the finalizer to determine + # whether or not we may inline it. If so, it appends extra arguments + # at the end of the intrinsic. Detect that here. + length(stmt.args) == 5 || continue + is_add_finalizer = true elseif isexpr(stmt, :foreigncall) nccallargs = length(stmt.args[3]::SimpleVector) preserved = Int[] @@ -824,9 +832,10 @@ function sroa_pass!(ir::IRCode) # analyze this `getfield` / `isdefined` / `setfield!` call - field = try_compute_field_stmt(compact, stmt) - field === nothing && continue - + if !is_add_finalizer + field = try_compute_field_stmt(compact, stmt) + field === nothing && continue + end val = stmt.args[2] struct_typ = unwrap_unionall(widenconst(argextype(val, compact))) @@ -864,14 +873,16 @@ function sroa_pass!(ir::IRCode) push!(defuse.defs, idx) elseif is_isdefined push!(defuse.uses, IsdefinedUse(idx)) + elseif is_add_finalizer + push!(defuse.uses, AddFinalizerUse(idx)) else push!(defuse.uses, GetfieldUse(idx)) end union!(mid, intermediaries) end continue - elseif is_setfield - continue # invalid `setfield!` call, but just ignore here + elseif is_setfield || is_add_finalizer + continue # invalid `setfield!` or `_add_finalizer` call, but just ignore here elseif is_isdefined continue # TODO? end @@ -921,7 +932,7 @@ function sroa_pass!(ir::IRCode) used_ssas = copy(compact.used_ssas) simple_dce!(compact, (x::SSAValue) -> used_ssas[x.id] -= 1) ir = complete(compact) - sroa_mutables!(ir, defuses, used_ssas, lazydomtree) + sroa_mutables!(ir, defuses, used_ssas, lazydomtree, inlining) return ir else simple_dce!(compact) @@ -929,7 +940,60 @@ function sroa_pass!(ir::IRCode) end end -function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree) +function try_inline_finalizer!(ir::IRCode, argexprs::Vector{Any}, idx::Int, mi::MethodInstance, inlining::InliningState) + code = get(inlining.mi_cache, mi, nothing) + if code isa CodeInstance + if use_const_api(code) + # No code in the function - Nothing to do + inlining.et !== nothing && push!(inlining.et, mi) + return true + end + src = code.inferred + else + src = code + end + + src = inlining_policy(inlining.interp, src, IR_FLAG_NULL, mi, Any[]) + src === nothing && return false + src = retrieve_ir_for_inlining(mi, src) + + # For now: Require finalizer to only have one basic block + length(src.cfg.blocks) == 1 || return false + + # Ok, we're committed to inlining the finalizer + inlining.et !== nothing && push!(inlining.et, mi) + + linetable_offset, extra_coverage_line = ir_inline_linetable!(ir.linetable, src, mi.def, ir[SSAValue(idx)][:line]) + if extra_coverage_line != 0 + insert_node!(ir, idx, NewInstruction(Expr(:code_coverage_effect), Nothing, extra_coverage_line)) + end + + # TODO: Use the actual inliner here rather than open coding this special + # purpose inliner. + spvals = mi.sparam_vals + ssa_rename = Vector{Any}(undef, length(src.stmts)) + for idx′ = 1:length(src.stmts) + urs = userefs(src[SSAValue(idx′)][:inst]) + for ur in urs + if isa(ur[], SSAValue) + ur[] = ssa_rename[ur[].id] + elseif isa(ur[], Argument) + ur[] = argexprs[ur[].n] + elseif isexpr(ur[], :static_parameter) + ur[] = spvals[ur[].args[1]] + end + end + # TODO: Scan newly added statement into the sroa defuse struct + stmt = urs[] + isa(stmt, ReturnNode) && continue + inst = src[SSAValue(idx′)] + ssa_rename[idx′] = insert_node!(ir, idx, NewInstruction(stmt, inst; line = inst[:line] + linetable_offset), true) + end + return true +end + +is_nothrow(ir::IRCode, pc::Int) = ir.stmts[pc][:flag] & (IR_FLAG_EFFECT_FREE | IR_FLAG_NOTHROW) ≠ 0 +function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse}}, used_ssas::Vector{Int}, lazydomtree::LazyDomtree, inlining::Union{Nothing, InliningState}) for (idx, (intermediaries, defuse)) in defuses intermediaries = collect(intermediaries) # Check if there are any uses we did not account for. If so, the variable @@ -952,9 +1016,72 @@ function sroa_mutables!(ir::IRCode, defuses::IdDict{Int, Tuple{SPCSet, SSADefUse # error at runtime, but is not illegal to have in the IR. ismutabletype(typ) || continue typ = typ::DataType + # First check for any add_finalizer calls + add_finalizer_idx = nothing + for use in defuse.uses + if use.kind === :add_finalizer + # For now: Only allow one add_finalizer per allocation + add_finalizer_idx !== nothing && @goto skip + add_finalizer_idx = use.idx + end + end + if add_finalizer_idx !== nothing + # For now: Require that all uses and defs are in the same basic block, + # so that live range calculations are easy. + bb = ir.cfg.blocks[block_for_inst(ir.cfg, first(defuse.uses).idx)] + minval = typemax(Int) + maxval = 0 + + check_in_range(defuse) = check_in_range(defuse.idx) + function check_in_range(didx::Int) + didx in bb.stmts || return false + if didx < minval + minval = didx + end + if didx > maxval + maxval = didx + end + return true + end + + check_in_range(idx) || continue + _all(check_in_range, defuse.uses) || continue + _all(check_in_range, defuse.defs) || continue + + # For now: Require all statements in the basic block range to be + # nothrow. + all_nothrow = _all(idx->is_nothrow(ir, idx) || idx == add_finalizer_idx, minval:maxval) + all_nothrow || continue + + # Ok, finalizer rewrite is legal. + add_finalizer_stmt = ir[SSAValue(add_finalizer_idx)][:inst] + argexprs = Any[add_finalizer_stmt.args[3], add_finalizer_stmt.args[2]] + may_inline = add_finalizer_stmt.args[4]::Bool + mi = add_finalizer_stmt.args[5]::Union{MethodInstance, Nothing} + if may_inline && mi !== nothing + if try_inline_finalizer!(ir, argexprs, maxval, add_finalizer_stmt.args[5], inlining) + @goto done_finalizer + end + mi = compileable_specialization(inlining.et, mi, Effects()).invoke + end + if mi !== nothing + insert_node!(ir, maxval, + NewInstruction(Expr(:invoke, mi, argexprs...), Nothing), + true) + else + insert_node!(ir, maxval, + NewInstruction(Expr(:call, argexprs...), Nothing), + true) + end + @label done_finalizer + # Erase call to add_finalizer + ir[SSAValue(add_finalizer_idx)][:inst] = nothing + continue + end # Partition defuses by field fielddefuse = SSADefUse[SSADefUse() for _ = 1:fieldcount(typ)] all_eliminated = all_forwarded = true + has_add_finalizer = false for use in defuse.uses if use.kind === :preserve for du in fielddefuse diff --git a/base/compiler/ssair/show.jl b/base/compiler/ssair/show.jl index f4c826a45156fd..4811125926c2c7 100644 --- a/base/compiler/ssair/show.jl +++ b/base/compiler/ssair/show.jl @@ -802,6 +802,8 @@ function Base.show(io::IO, e::Core.Compiler.Effects) printstyled(io, string(tristate_letter(e.nothrow), 'n'); color=tristate_color(e.nothrow)) print(io, ',') printstyled(io, string(tristate_letter(e.terminates), 't'); color=tristate_color(e.terminates)) + print(io, ',') + printstyled(io, string(tristate_letter(e.notls), 's'); color=tristate_color(e.notls)) print(io, ')') e.nonoverlayed || printstyled(io, '′'; color=:red) end diff --git a/base/compiler/stmtinfo.jl b/base/compiler/stmtinfo.jl index 3f9a562061a12c..99f39563946159 100644 --- a/base/compiler/stmtinfo.jl +++ b/base/compiler/stmtinfo.jl @@ -183,4 +183,15 @@ struct ReturnTypeCallInfo info::Any end +""" + info::FinalizerInfo + +Represents a the information of a potential call to the finalizer on the given +object type. +""" +struct FinalizerInfo + info::Any + effects::Effects +end + @specialize diff --git a/base/compiler/tfuncs.jl b/base/compiler/tfuncs.jl index 87df43ec92224b..90c10c7e1aea38 100644 --- a/base/compiler/tfuncs.jl +++ b/base/compiler/tfuncs.jl @@ -559,6 +559,8 @@ add_tfunc(atomic_pointerswap, 3, 3, (a, v, order) -> (@nospecialize; pointer_elt add_tfunc(atomic_pointermodify, 4, 4, atomic_pointermodify_tfunc, 5) add_tfunc(atomic_pointerreplace, 5, 5, atomic_pointerreplace_tfunc, 5) add_tfunc(donotdelete, 0, INT_INF, (@nospecialize args...)->Nothing, 0) +add_tfunc(Core._add_finalizer, 2, 2, (@nospecialize args...)->Nothing, 5) + # more accurate typeof_tfunc for vararg tuples abstract only in length function typeof_concrete_vararg(t::DataType) diff --git a/base/compiler/types.jl b/base/compiler/types.jl index e594c233353d92..12efbf8e7207eb 100644 --- a/base/compiler/types.jl +++ b/base/compiler/types.jl @@ -45,6 +45,11 @@ The effects are composed of the following set of different properties: - `terminates::TriState`: this method is guaranteed to terminate - `nonoverlayed::Bool`: indicates that any methods that may be called within this method are not defined in an [overlayed method table](@ref OverlayMethodTable) +- `notls::TriState`: this method does not access any state bound to the current + task and may thus be moved to a different task without changing observable + behavior. Note that this currently implies that `noyield` as well, since + yielding modifies the state of the current task, though this may be split + in the future. See [`Base.@assume_effects`](@ref) for more detailed explanation on the definitions of these properties. Along the abstract interpretation, `Effects` at each statement are analyzed locally and @@ -67,6 +72,7 @@ struct Effects nothrow::TriState terminates::TriState nonoverlayed::Bool + notls::TriState # This effect is currently only tracked in inference and modified # :consistent before caching. We may want to track it in the future. inbounds_taints_consistency::Bool @@ -76,20 +82,22 @@ function Effects( effect_free::TriState, nothrow::TriState, terminates::TriState, - nonoverlayed::Bool) + nonoverlayed::Bool, + notls::TriState) return Effects( consistent, effect_free, nothrow, terminates, nonoverlayed, + notls, false) end -const EFFECTS_TOTAL = Effects(ALWAYS_TRUE, ALWAYS_TRUE, ALWAYS_TRUE, ALWAYS_TRUE, true) -const EFFECTS_THROWS = Effects(ALWAYS_TRUE, ALWAYS_TRUE, TRISTATE_UNKNOWN, ALWAYS_TRUE, true) -const EFFECTS_UNKNOWN = Effects(TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, true) # mostly unknown, but it's not overlayed at least (e.g. it's not a call) -const EFFECTS_UNKNOWN′ = Effects(TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, false) # unknown, really +const EFFECTS_TOTAL = Effects(ALWAYS_TRUE, ALWAYS_TRUE, ALWAYS_TRUE, ALWAYS_TRUE, true, ALWAYS_TRUE) +const EFFECTS_THROWS = Effects(ALWAYS_TRUE, ALWAYS_TRUE, TRISTATE_UNKNOWN, ALWAYS_TRUE, true, ALWAYS_TRUE) +const EFFECTS_UNKNOWN = Effects(TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, true, TRISTATE_UNKNOWN) # mostly unknown, but it's not overlayed at least (e.g. it's not a call) +const EFFECTS_UNKNOWN′ = Effects(TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, TRISTATE_UNKNOWN, false, TRISTATE_UNKNOWN) # unknown, really function Effects(e::Effects = EFFECTS_UNKNOWN′; consistent::TriState = e.consistent, @@ -97,6 +105,7 @@ function Effects(e::Effects = EFFECTS_UNKNOWN′; nothrow::TriState = e.nothrow, terminates::TriState = e.terminates, nonoverlayed::Bool = e.nonoverlayed, + notls::TriState = e.notls, inbounds_taints_consistency::Bool = e.inbounds_taints_consistency) return Effects( consistent, @@ -104,6 +113,7 @@ function Effects(e::Effects = EFFECTS_UNKNOWN′; nothrow, terminates, nonoverlayed, + notls, inbounds_taints_consistency) end @@ -111,6 +121,7 @@ is_consistent(effects::Effects) = effects.consistent === ALWAYS_TRUE is_effect_free(effects::Effects) = effects.effect_free === ALWAYS_TRUE is_nothrow(effects::Effects) = effects.nothrow === ALWAYS_TRUE is_terminates(effects::Effects) = effects.terminates === ALWAYS_TRUE +is_notls(effects::Effects) = effects.notls === ALWAYS_TRUE is_nonoverlayed(effects::Effects) = effects.nonoverlayed is_concrete_eval_eligible(effects::Effects) = @@ -132,7 +143,8 @@ function encode_effects(e::Effects) (e.effect_free.state << 2) | (e.nothrow.state << 4) | (e.terminates.state << 6) | - (UInt32(e.nonoverlayed) << 8) + (UInt32(e.nonoverlayed) << 8) | + (UInt32(e.notls.state) << 9) end function decode_effects(e::UInt32) return Effects( @@ -141,6 +153,7 @@ function decode_effects(e::UInt32) TriState((e >> 4) & 0x03), TriState((e >> 6) & 0x03), _Bool( (e >> 8) & 0x01), + TriState((e >> 9) & 0x03), false) end @@ -155,6 +168,8 @@ function tristate_merge(old::Effects, new::Effects) tristate_merge( old.terminates, new.terminates), old.nonoverlayed & new.nonoverlayed, + tristate_merge( + old.notls, new.notls), old.inbounds_taints_consistency | new.inbounds_taints_consistency) end diff --git a/base/gcutils.jl b/base/gcutils.jl index d17301a1be9b07..6918f547caa030 100644 --- a/base/gcutils.jl +++ b/base/gcutils.jl @@ -4,6 +4,12 @@ ==(w::WeakRef, v) = isequal(w.value, v) ==(w, v::WeakRef) = isequal(w, v.value) +function _check_mutable(@nospecialize(o)) @noinline + if !ismutable(o) + error("objects of type ", typeof(o), " cannot be finalized") + end +end + """ finalizer(f, x) @@ -42,18 +48,13 @@ end ``` """ function finalizer(@nospecialize(f), @nospecialize(o)) - if !ismutable(o) - error("objects of type ", typeof(o), " cannot be finalized") - end - ccall(:jl_gc_add_finalizer_th, Cvoid, (Ptr{Cvoid}, Any, Any), - Core.getptls(), o, f) + _check_mutable(o) + Core._add_finalizer(o, f) return o end function finalizer(f::Ptr{Cvoid}, o::T) where T @inline - if !ismutable(o) - error("objects of type ", typeof(o), " cannot be finalized") - end + _check_mutable(o) ccall(:jl_gc_add_ptr_finalizer, Cvoid, (Ptr{Cvoid}, Any, Ptr{Cvoid}), Core.getptls(), o, f) return o diff --git a/src/builtin_proto.h b/src/builtin_proto.h index c820751ab56e23..94f07a71642034 100644 --- a/src/builtin_proto.h +++ b/src/builtin_proto.h @@ -57,6 +57,7 @@ DECLARE_BUILTIN(_typevar); DECLARE_BUILTIN(donotdelete); DECLARE_BUILTIN(getglobal); DECLARE_BUILTIN(setglobal); +DECLARE_BUILTIN(_add_finalizer); JL_CALLABLE(jl_f_invoke_kwsorter); #ifdef DEFINE_BUILTIN_GLOBALS @@ -73,6 +74,7 @@ JL_CALLABLE(jl_f_get_binding_type); JL_CALLABLE(jl_f_set_binding_type); JL_CALLABLE(jl_f_donotdelete); JL_CALLABLE(jl_f_setglobal); +JL_CALLABLE(jl_f__add_finalizer); #ifdef __cplusplus } diff --git a/src/builtins.c b/src/builtins.c index 90dc0ec6a0e5c4..8d9bc01c8416bc 100644 --- a/src/builtins.c +++ b/src/builtins.c @@ -1591,6 +1591,14 @@ JL_CALLABLE(jl_f_donotdelete) return jl_nothing; } +JL_CALLABLE(jl_f__add_finalizer) +{ + JL_NARGS(_add_finalizer, 2, 3); + jl_task_t *ct = jl_current_task; + jl_gc_add_finalizer_(ct->ptls, args[0], args[1]); + return jl_nothing; +} + static int equiv_field_types(jl_value_t *old, jl_value_t *ft) { size_t nf = jl_svec_len(ft); @@ -1961,6 +1969,7 @@ void jl_init_primitives(void) JL_GC_DISABLED jl_builtin__typebody = add_builtin_func("_typebody!", jl_f__typebody); add_builtin_func("_equiv_typedef", jl_f__equiv_typedef); jl_builtin_donotdelete = add_builtin_func("donotdelete", jl_f_donotdelete); + add_builtin_func("_add_finalizer", jl_f__add_finalizer); // builtin types add_builtin("Any", (jl_value_t*)jl_any_type); diff --git a/src/codegen.cpp b/src/codegen.cpp index fdf422bb07a7d4..70a892d8e00416 100644 --- a/src/codegen.cpp +++ b/src/codegen.cpp @@ -1125,7 +1125,8 @@ static const auto &builtin_func_map() { { jl_f_arrayset_addr, new JuliaFunction{XSTR(jl_f_arrayset), get_func_sig, get_func_attrs} }, { jl_f_arraysize_addr, new JuliaFunction{XSTR(jl_f_arraysize), get_func_sig, get_func_attrs} }, { jl_f_apply_type_addr, new JuliaFunction{XSTR(jl_f_apply_type), get_func_sig, get_func_attrs} }, - { jl_f_donotdelete_addr, new JuliaFunction{XSTR(jl_f_donotdelete), get_donotdelete_sig, get_donotdelete_func_attrs} } + { jl_f_donotdelete_addr, new JuliaFunction{XSTR(jl_f_donotdelete), get_donotdelete_sig, get_donotdelete_func_attrs} }, + { jl_f__add_finalizer, new JuliaFunction{XSTR(jl_f__add_finalizer), get_func_sig, get_func_attrs} } }; return builtins; } diff --git a/src/gc.c b/src/gc.c index e299661db87d42..2d34df0edcd88b 100644 --- a/src/gc.c +++ b/src/gc.c @@ -488,7 +488,7 @@ void jl_gc_run_all_finalizers(jl_task_t *ct) run_finalizers(ct); } -static void gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT +void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT { assert(jl_atomic_load_relaxed(&ptls->gc_state) == 0); arraylist_t *a = &ptls->finalizers; @@ -518,7 +518,7 @@ static void gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT JL_DLLEXPORT void jl_gc_add_ptr_finalizer(jl_ptls_t ptls, jl_value_t *v, void *f) JL_NOTSAFEPOINT { - gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 1), f); + jl_gc_add_finalizer_(ptls, (void*)(((uintptr_t)v) | 1), f); } JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_function_t *f) JL_NOTSAFEPOINT @@ -527,7 +527,7 @@ JL_DLLEXPORT void jl_gc_add_finalizer_th(jl_ptls_t ptls, jl_value_t *v, jl_funct jl_gc_add_ptr_finalizer(ptls, v, jl_unbox_voidpointer(f)); } else { - gc_add_finalizer_(ptls, v, f); + jl_gc_add_finalizer_(ptls, v, f); } } diff --git a/src/julia_internal.h b/src/julia_internal.h index 02130ef963198f..be5716be997288 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -466,6 +466,7 @@ void jl_gc_track_malloced_array(jl_ptls_t ptls, jl_array_t *a) JL_NOTSAFEPOINT; void jl_gc_count_allocd(size_t sz) JL_NOTSAFEPOINT; void jl_gc_run_all_finalizers(jl_task_t *ct); void jl_release_task_stack(jl_ptls_t ptls, jl_task_t *task); +void jl_gc_add_finalizer_(jl_ptls_t ptls, void *v, void *f) JL_NOTSAFEPOINT; JL_DLLEXPORT void jl_gc_queue_binding(jl_binding_t *bnd) JL_NOTSAFEPOINT; void gc_setmark_buf(jl_ptls_t ptls, void *buf, uint8_t, size_t) JL_NOTSAFEPOINT; diff --git a/src/staticdata.c b/src/staticdata.c index 27fbb0fb336cf1..a6a6b3d8e62ee6 100644 --- a/src/staticdata.c +++ b/src/staticdata.c @@ -314,7 +314,7 @@ static const jl_fptr_args_t id_to_fptrs[] = { &jl_f_ifelse, &jl_f__structtype, &jl_f__abstracttype, &jl_f__primitivetype, &jl_f__typebody, &jl_f__setsuper, &jl_f__equiv_typedef, &jl_f_get_binding_type, &jl_f_set_binding_type, &jl_f_opaque_closure_call, &jl_f_donotdelete, - &jl_f_getglobal, &jl_f_setglobal, + &jl_f_getglobal, &jl_f_setglobal, &jl_f__add_finalizer, NULL }; typedef struct { diff --git a/test/compiler/inline.jl b/test/compiler/inline.jl index 4f2e8f8783f584..75eed6dd772e5f 100644 --- a/test/compiler/inline.jl +++ b/test/compiler/inline.jl @@ -1279,3 +1279,89 @@ end # Test that inlining doesn't accidentally delete a bad return_type call f_bad_return_type() = Core.Compiler.return_type(+, 1, 2) @test_throws MethodError f_bad_return_type() + +# Test that we can inline a finalizer for a struct that does not otherwise escape +global total_deallocations::Int = 0 + +mutable struct DoAllocNoEscape + function DoAllocNoEscape() + finalizer(new()) do this + global total_deallocations += 1 + end + end +end + +let src = code_typed1() do + for i = 1:1000 + DoAllocNoEscape() + end + end + @test count(isnew, src.code) == 0 +end + +# Test that finalizer elision doesn't cause a throw to be inlined into a function +# that shouldn't have it +const finalizer_should_throw = Ref{Bool}(true) +mutable struct DoAllocFinalizerThrows + function DoAllocFinalizerThrows() + finalizer(new()) do this + finalizer_should_throw[] && error("Unexpected finalizer throw") + end + end +end + +function f_finalizer_throws() + prev = GC.enable(false) + for i = 1:100 + DoAllocFinalizerThrows() + end + finalizer_should_throw[] = false + GC.enable(prev) + GC.gc() + return true +end + +@test f_finalizer_throws() + +# Test finalizers with static parameters +global last_finalizer_type::Type = Any +mutable struct DoAllocNoEscapeSparam{T} + x::T + function finalizer_sparam(d::DoAllocNoEscapeSparam{T}) where {T} + global total_deallocations += 1 + global last_finalizer_type = T + end + function DoAllocNoEscapeSparam{T}(x::T) where {T} + finalizer(finalizer_sparam, new{T}(x)) + end +end +DoAllocNoEscapeSparam(x::T) where {T} = DoAllocNoEscapeSparam{T}(x) + +let src = code_typed1(Tuple{Any}) do x + for i = 1:1000 + DoAllocNoEscapeSparam(x) + end + end + # This requires more inlining enhancments. For now just make sure this + # doesn't error. + @test count(isnew, src.code) in (0, 1) # == 0 +end + +# Test noinline finalizer +@noinline function noinline_finalizer(d) + global total_deallocations += 1 +end +mutable struct DoAllocNoEscapeNoInline + function DoAllocNoEscapeNoInline() + finalizer(noinline_finalizer, new()) + end +end + +let src = code_typed1() do + for i = 1:1000 + DoAllocNoEscapeNoInline() + end + end + @test count(isnew, src.code) == 1 + @test count(isinvoke(:noinline_finalizer), src.code) == 1 +end