diff --git a/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl b/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl index e08d08779e097..ddf202f378fb5 100644 --- a/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl +++ b/Compiler/extras/CompilerDevTools/src/CompilerDevTools.jl @@ -47,7 +47,7 @@ end function Compiler.transform_result_for_cache(interp::SplitCacheInterp, result::Compiler.InferenceResult, edges::Compiler.SimpleVector) opt = result.src::Compiler.OptimizationState - ir = opt.result.ir::Compiler.IRCode + ir = opt.optresult.ir::Compiler.IRCode override = with_new_compiler for inst in ir.stmts stmt = inst[:stmt] diff --git a/Compiler/src/optimize.jl b/Compiler/src/optimize.jl index 612105061c38d..fdf97c447559d 100644 --- a/Compiler/src/optimize.jl +++ b/Compiler/src/optimize.jl @@ -116,11 +116,14 @@ function inline_cost_clamp(x::Int) return convert(InlineCostType, x) end +const SRC_FLAG_DECLARED_INLINE = 0x1 +const SRC_FLAG_DECLARED_NOINLINE = 0x2 + is_declared_inline(@nospecialize src::MaybeCompressed) = - ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == 1 + ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == SRC_FLAG_DECLARED_INLINE is_declared_noinline(@nospecialize src::MaybeCompressed) = - ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == 2 + ccall(:jl_ir_flag_inlining, UInt8, (Any,), src) == SRC_FLAG_DECLARED_NOINLINE ##################### # OptimizationState # @@ -157,6 +160,7 @@ code_cache(state::InliningState) = WorldView(code_cache(state.interp), state.wor mutable struct OptimizationResult ir::IRCode + inline_flag::UInt8 simplified::Bool # indicates whether the IR was processed with `cfg_simplify!` end @@ -168,7 +172,7 @@ end mutable struct OptimizationState{Interp<:AbstractInterpreter} linfo::MethodInstance src::CodeInfo - result::Union{Nothing, OptimizationResult} + optresult::Union{Nothing, OptimizationResult} stmt_info::Vector{CallInfo} mod::Module sptypes::Vector{VarState} @@ -236,13 +240,29 @@ include("ssair/EscapeAnalysis.jl") include("ssair/passes.jl") include("ssair/irinterp.jl") +function ir_to_codeinf!(opt::OptimizationState, frame::InferenceState, edges::SimpleVector) + ir_to_codeinf!(opt, edges, compute_inlining_cost(frame.interp, frame.result, opt.optresult)) +end + +function ir_to_codeinf!(opt::OptimizationState, edges::SimpleVector, inlining_cost::InlineCostType) + src = ir_to_codeinf!(opt, edges) + src.inlining_cost = inlining_cost + src +end + +function ir_to_codeinf!(opt::OptimizationState, edges::SimpleVector) + src = ir_to_codeinf!(opt) + src.edges = edges + src +end + function ir_to_codeinf!(opt::OptimizationState) - (; linfo, src, result) = opt - if result === nothing + (; linfo, src, optresult) = opt + if optresult === nothing return src end - src = ir_to_codeinf!(src, result.ir) - opt.result = nothing + src = ir_to_codeinf!(src, optresult.ir) + opt.optresult = nothing opt.src = src maybe_validate_code(linfo, src, "optimized") return src @@ -485,63 +505,12 @@ end abstract_eval_ssavalue(s::SSAValue, src::Union{IRCode,IncrementalCompact}) = types(src)[s] """ - finish(interp::AbstractInterpreter, opt::OptimizationState, - ir::IRCode, caller::InferenceResult) + finishopt!(interp::AbstractInterpreter, opt::OptimizationState, ir::IRCode) -Post-process information derived by Julia-level optimizations for later use. -In particular, this function determines the inlineability of the optimized code. +Called at the end of optimization to store the resulting IR back into the OptimizationState. """ -function finish(interp::AbstractInterpreter, opt::OptimizationState, - ir::IRCode, caller::InferenceResult) - (; src, linfo) = opt - (; def, specTypes) = linfo - - force_noinline = is_declared_noinline(src) - - # compute inlining and other related optimizations - result = caller.result - @assert !(result isa LimitedAccuracy) - result = widenslotwrapper(result) - - opt.result = OptimizationResult(ir, false) - - # determine and cache inlineability - if !force_noinline - sig = unwrap_unionall(specTypes) - if !(isa(sig, DataType) && sig.name === Tuple.name) - force_noinline = true - end - if !is_declared_inline(src) && result === Bottom - force_noinline = true - end - end - if force_noinline - set_inlineable!(src, false) - elseif isa(def, Method) - if is_declared_inline(src) && isdispatchtuple(specTypes) - # obey @inline declaration if a dispatch barrier would not help - set_inlineable!(src, true) - else - # compute the cost (size) of inlining this code - params = OptimizationParams(interp) - cost_threshold = default = params.inline_cost_threshold - if ⊑(optimizer_lattice(interp), result, Tuple) && !isconcretetype(widenconst(result)) - cost_threshold += params.inline_tupleret_bonus - end - # if the method is declared as `@inline`, increase the cost threshold 20x - if is_declared_inline(src) - cost_threshold += 19*default - end - # a few functions get special treatment - if def.module === _topmod(def.module) - name = def.name - if name === :iterate || name === :unsafe_convert || name === :cconvert - cost_threshold += 4*default - end - end - src.inlining_cost = inline_cost(ir, params, cost_threshold) - end - end +function finishopt!(interp::AbstractInterpreter, opt::OptimizationState, ir::IRCode) + opt.optresult = OptimizationResult(ir, ccall(:jl_ir_flag_inlining, UInt8, (Any,), opt.src), false) return nothing end @@ -1015,7 +984,8 @@ end function optimize(interp::AbstractInterpreter, opt::OptimizationState, caller::InferenceResult) @zone "CC: OPTIMIZER" ir = run_passes_ipo_safe(opt.src, opt) ipo_dataflow_analysis!(interp, opt, ir, caller) - return finish(interp, opt, ir, caller) + finishopt!(interp, opt, ir) + return nothing end const ALL_PASS_NAMES = String[] @@ -1466,7 +1436,7 @@ function statement_or_branch_cost(@nospecialize(stmt), line::Int, src::Union{Cod return thiscost end -function inline_cost(ir::IRCode, params::OptimizationParams, cost_threshold::Int) +function inline_cost_model(ir::IRCode, params::OptimizationParams, cost_threshold::Int) bodycost = 0 for i = 1:length(ir.stmts) stmt = ir[SSAValue(i)][:stmt] diff --git a/Compiler/src/ssair/inlining.jl b/Compiler/src/ssair/inlining.jl index 3830197aef458..c7e052ed17218 100644 --- a/Compiler/src/ssair/inlining.jl +++ b/Compiler/src/ssair/inlining.jl @@ -976,7 +976,7 @@ function retrieve_ir_for_inlining(mi::MethodInstance, ir::IRCode, preserve_local return ir, spec_info, DebugInfo(ir.debuginfo, length(ir.stmts)) end function retrieve_ir_for_inlining(mi::MethodInstance, opt::OptimizationState, preserve_local_sources::Bool) - result = opt.result + result = opt.optresult if result !== nothing !result.simplified && simplify_ir!(result) return retrieve_ir_for_inlining(mi, result.ir, preserve_local_sources) diff --git a/Compiler/src/typeinfer.jl b/Compiler/src/typeinfer.jl index 2b4ada7805140..b7024502b069a 100644 --- a/Compiler/src/typeinfer.jl +++ b/Compiler/src/typeinfer.jl @@ -104,7 +104,10 @@ end function finish!(interp::AbstractInterpreter, caller::InferenceState, validation_world::UInt, time_before::UInt64) result = caller.result #@assert last(result.valid_worlds) <= get_world_counter() || isempty(caller.edges) - if isdefined(result, :ci) + if caller.cache_mode === CACHE_MODE_LOCAL + @assert !isdefined(result, :ci) + result.src = transform_result_for_local_cache(interp, result) + elseif isdefined(result, :ci) edges = result_edges(interp, caller) ci = result.ci # if we aren't cached, we don't need this edge @@ -115,21 +118,31 @@ function finish!(interp::AbstractInterpreter, caller::InferenceState, validation store_backedges(ci, edges) end inferred_result = nothing - uncompressed = inferred_result + uncompressed = result.src const_flag = is_result_constabi_eligible(result) + debuginfo = nothing discard_src = caller.cache_mode === CACHE_MODE_NULL || const_flag if !discard_src inferred_result = transform_result_for_cache(interp, result, edges) + if inferred_result !== nothing + uncompressed = inferred_result + debuginfo = get_debuginfo(inferred_result) + # Inlining may fast-path the global cache via `VolatileInferenceResult`, so store it back here + result.src = inferred_result + else + if isa(result.src, OptimizationState) + debuginfo = get_debuginfo(ir_to_codeinf!(result.src)) + elseif isa(result.src, CodeInfo) + debuginfo = get_debuginfo(result.src) + end + end # TODO: do we want to augment edges here with any :invoke targets that we got from inlining (such that we didn't have a direct edge to it already)? if inferred_result isa CodeInfo - result.src = inferred_result if may_compress(interp) nslots = length(inferred_result.slotflags) resize!(inferred_result.slottypes::Vector{Any}, nslots) resize!(inferred_result.slotnames, nslots) end - di = inferred_result.debuginfo - uncompressed = inferred_result inferred_result = maybe_compress_codeinfo(interp, result.linfo, inferred_result) result.is_src_volatile = false elseif ci.owner === nothing @@ -137,18 +150,21 @@ function finish!(interp::AbstractInterpreter, caller::InferenceState, validation inferred_result = nothing end end - if !@isdefined di - di = DebugInfo(result.linfo) + if debuginfo === nothing + debuginfo = DebugInfo(result.linfo) end time_now = _time_ns() time_self_ns = caller.time_self_ns + (time_now - time_before) time_total = (time_now - caller.time_start - caller.time_paused) * 1e-9 ccall(:jl_update_codeinst, Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Float64, Float64, Float64, Any, Any), ci, inferred_result, const_flag, first(result.valid_worlds), last(result.valid_worlds), encode_effects(result.ipo_effects), - result.analysis_results, time_total, caller.time_caches, time_self_ns * 1e-9, di, edges) + result.analysis_results, time_total, caller.time_caches, time_self_ns * 1e-9, debuginfo, edges) engine_reject(interp, ci) codegen = codegen_cache(interp) - if !discard_src && codegen !== nothing && uncompressed isa CodeInfo + if !discard_src && codegen !== nothing && (isa(uncompressed, CodeInfo) || isa(uncompressed, OptimizationState)) + if isa(uncompressed, OptimizationState) + uncompressed = ir_to_codeinf!(uncompressed, edges) + end # record that the caller could use this result to generate code when required, if desired, to avoid repeating n^2 work codegen[ci] = uncompressed if bootstrapping_compiler && inferred_result == nothing @@ -299,36 +315,123 @@ function adjust_cycle_frame!(sv::InferenceState, cycle_valid_worlds::WorldRange, return nothing end +function get_debuginfo(src) + isa(src, CodeInfo) && return src.debuginfo + isa(src, OptimizationState) && return src.src.debuginfo + return nothing +end + function is_result_constabi_eligible(result::InferenceResult) result_type = result.result return isa(result_type, Const) && is_foldable_nothrow(result.ipo_effects) && is_inlineable_constant(result_type.val) end -function transform_result_for_cache(::AbstractInterpreter, result::InferenceResult, edges::SimpleVector) +function compute_inlining_cost(interp::AbstractInterpreter, result::InferenceResult) + src = result.src + isa(src, OptimizationState) || return MAX_INLINE_COST + compute_inlining_cost(interp, result, src.optresult) +end + +function compute_inlining_cost(interp::AbstractInterpreter, result::InferenceResult, optresult#=::OptimizationResult=#) + return inline_cost_model(interp, result, optresult.inline_flag, optresult.ir) +end + +function inline_cost_model(interp::AbstractInterpreter, result::InferenceResult, + inline_flag::UInt8, ir::IRCode) + + inline_flag === SRC_FLAG_DECLARED_NOINLINE && return MAX_INLINE_COST + + mi = result.linfo + (; def, specTypes) = mi + if !isa(def, Method) + return MAX_INLINE_COST + end + + declared_inline = inline_flag === SRC_FLAG_DECLARED_INLINE + + rt = result.result + @assert !(rt isa LimitedAccuracy) + rt = widenslotwrapper(rt) + + sig = unwrap_unionall(specTypes) + if !(isa(sig, DataType) && sig.name === Tuple.name) + return MAX_INLINE_COST + end + if !declared_inline && rt === Bottom + return MAX_INLINE_COST + end + + if declared_inline && isdispatchtuple(specTypes) + # obey @inline declaration if a dispatch barrier would not help + return MIN_INLINE_COST + else + # compute the cost (size) of inlining this code + params = OptimizationParams(interp) + cost_threshold = default = params.inline_cost_threshold + if ⊑(optimizer_lattice(interp), rt, Tuple) && !isconcretetype(widenconst(rt)) + cost_threshold += params.inline_tupleret_bonus + end + # if the method is declared as `@inline`, increase the cost threshold 20x + if declared_inline + cost_threshold += 19*default + end + # a few functions get special treatment + if def.module === _topmod(def.module) + name = def.name + if name === :iterate || name === :unsafe_convert || name === :cconvert + cost_threshold += 4*default + end + end + return inline_cost_model(ir, params, cost_threshold) + end +end + +function transform_result_for_local_cache(interp::AbstractInterpreter, result::InferenceResult) + if is_result_constabi_eligible(result) + return nothing + end + src = result.src + if isa(src, OptimizationState) + # Compute and store any information required to determine the inlineability of the callee. + opt = src + opt.src.inlining_cost = compute_inlining_cost(interp, result) + end + return src +end + +function transform_result_for_cache(interp::AbstractInterpreter, result::InferenceResult, edges::SimpleVector) + inlining_cost = nothing src = result.src if isa(src, OptimizationState) - src = ir_to_codeinf!(src) + opt = src + inlining_cost = compute_inlining_cost(interp, result, opt.optresult) + discard_optimized_result(interp, opt, inlining_cost) && return nothing + src = ir_to_codeinf!(opt) end if isa(src, CodeInfo) src.edges = edges + if inlining_cost !== nothing + src.inlining_cost = inlining_cost + elseif may_optimize(interp) + src.inlining_cost = compute_inlining_cost(interp, result) + end end return src end +function discard_optimized_result(interp::AbstractInterpreter, opt#=::OptimizationState=#, inlining_cost#=::InlineCostType=#) + may_discard_trees(interp) || return false + return inlining_cost == MAX_INLINE_COST +end + function maybe_compress_codeinfo(interp::AbstractInterpreter, mi::MethodInstance, ci::CodeInfo) def = mi.def isa(def, Method) || return ci # don't compress toplevel code can_discard_trees = may_discard_trees(interp) cache_the_tree = !can_discard_trees || is_inlineable(ci) - if cache_the_tree - if may_compress(interp) - return ccall(:jl_compress_ir, String, (Any, Any), def, ci) - else - return ci - end - else - return nothing - end + cache_the_tree || return nothing + may_compress(interp) && return ccall(:jl_compress_ir, String, (Any, Any), def, ci) + return ci end function cache_result!(interp::AbstractInterpreter, result::InferenceResult, ci::CodeInstance) @@ -1103,8 +1206,7 @@ function typeinf_frame(interp::AbstractInterpreter, mi::MethodInstance, run_opti else opt = OptimizationState(frame, interp) optimize(interp, opt, frame.result) - src = ir_to_codeinf!(opt) - src.edges = Core.svec(opt.inlining.edges...) + src = ir_to_codeinf!(opt, frame, Core.svec(opt.inlining.edges...)) end result.src = frame.src = src end diff --git a/Compiler/test/codegen.jl b/Compiler/test/codegen.jl index fd8bbae70a346..45db9e73d5a3f 100644 --- a/Compiler/test/codegen.jl +++ b/Compiler/test/codegen.jl @@ -4,6 +4,7 @@ using Random using InteractiveUtils +using InteractiveUtils: code_llvm, code_native using Libdl using Test diff --git a/Compiler/test/inline.jl b/Compiler/test/inline.jl index 92e389ff0dc04..0a88907965f5a 100644 --- a/Compiler/test/inline.jl +++ b/Compiler/test/inline.jl @@ -1,5 +1,7 @@ # This file is a part of Julia. License is MIT: https://julialang.org/license +module inline_tests + using Test using Base.Meta using Core: ReturnNode @@ -2311,3 +2313,5 @@ g_noinline_invoke(x) = f_noinline_invoke(x) let src = code_typed1(g_noinline_invoke, (Union{Symbol,Nothing},)) @test !any(@nospecialize(x)->isa(x,GlobalRef), src.code) end + +end # module inline_tests