Skip to content

Commit

Permalink
optimizer: supports callsite annotations of inlining, fixes #18773
Browse files Browse the repository at this point in the history
Enable `@inline`/`@noinline` annotations on function callsites.
From #40754.

Now `@inline` and `@noinline` can be applied to a code block and then
the compiler will try to (not) inline calls within the block:
```julia
@inline f(...) # The compiler will try to inline `f`

@inline f(...) + g(...) # The compiler will try to inline `f`, `g` and `+`

@inline f(args...) = ... # Of course annotations on a definition is still allowed
```

Here are couple of notes on how those callsite annotations will work:
- callsite annotation always has the precedence over the annotation
  applied to the definition of the called function, whichever we use
  `@inline`/`@noinline`:
  ```julia
  @inline function explicit_inline(args...)
      # body
  end

  let
      @noinline explicit_inline(args...) # this call will not be inlined
  end
  ```
- when callsite annotations are nested, the innermost annotations has
  the precedence
  ```julia
  @noinline let a0, b0 = ...
      a = @inline f(a0)  # the compiler will try to inline this call
      b = notinlined(b0) # the compiler will NOT try to inline this call
      return a, b
  end
  ```
They're both tested and included in documentations.
  • Loading branch information
aviatesk committed Aug 25, 2021
1 parent a11de31 commit 00e956b
Show file tree
Hide file tree
Showing 16 changed files with 358 additions and 87 deletions.
11 changes: 7 additions & 4 deletions base/compiler/abstractinterpretation.jl
Original file line number Diff line number Diff line change
Expand Up @@ -595,7 +595,7 @@ function maybe_get_const_prop_profitable(interp::AbstractInterpreter, result::Me
return nothing
end
mi = mi::MethodInstance
if !force && !const_prop_methodinstance_heuristic(interp, method, mi)
if !force && !const_prop_methodinstance_heuristic(interp, match, mi)
add_remark!(interp, sv, "[constprop] Disabled by method instance heuristic")
return nothing
end
Expand Down Expand Up @@ -699,7 +699,8 @@ end
# This is a heuristic to avoid trying to const prop through complicated functions
# where we would spend a lot of time, but are probably unlikely to get an improved
# result anyway.
function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, method::Method, mi::MethodInstance)
function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, match::MethodMatch, mi::MethodInstance)
method = match.method
if method.is_for_opaque_closure
# Not inlining an opaque closure can be very expensive, so be generous
# with the const-prop-ability. It is quite possible that we can't infer
Expand All @@ -717,7 +718,8 @@ function const_prop_methodinstance_heuristic(interp::AbstractInterpreter, method
if isdefined(code, :inferred) && !cache_inlineable
cache_inf = code.inferred
if !(cache_inf === nothing)
cache_inlineable = inlining_policy(interp)(cache_inf) !== nothing
# TODO maybe we want to respect callsite `@inline`/`@noinline` annotations here ?
cache_inlineable = inlining_policy(interp, cache_inf, 0x00, match) !== nothing
end
end
if !cache_inlineable
Expand Down Expand Up @@ -1896,7 +1898,8 @@ function typeinf_local(interp::AbstractInterpreter, frame::InferenceState)
if isa(fname, SlotNumber)
changes = StateUpdate(fname, VarState(Any, false), changes, false)
end
elseif hd === :inbounds || hd === :meta || hd === :loopinfo || hd === :code_coverage_effect
elseif hd === :code_coverage_effect ||
(hd !== :boundscheck && hd !== nothing && is_meta_expr_head(hd)) # :boundscheck can be narrowed to Bool
# these do not generate code
else
t = abstract_eval_statement(interp, stmt, changes, frame)
Expand Down
59 changes: 49 additions & 10 deletions base/compiler/optimize.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,21 +21,30 @@ function push!(et::EdgeTracker, ci::CodeInstance)
push!(et, ci.def)
end

struct InliningState{S <: Union{EdgeTracker, Nothing}, T, P}
struct InliningState{S <: Union{EdgeTracker, Nothing}, T, I<:AbstractInterpreter}
params::OptimizationParams
et::S
mi_cache::T
policy::P
interp::I
end

function default_inlining_policy(@nospecialize(src))
function inlining_policy(interp::AbstractInterpreter, @nospecialize(src), stmt_flag::UInt8, match::Union{MethodMatch,InferenceResult})
if isa(src, CodeInfo) || isa(src, Vector{UInt8})
src_inferred = ccall(:jl_ir_flag_inferred, Bool, (Any,), src)
src_inlineable = ccall(:jl_ir_flag_inlineable, Bool, (Any,), src)
src_inlineable = is_stmt_inline(stmt_flag) || ccall(:jl_ir_flag_inlineable, Bool, (Any,), src)
return src_inferred && src_inlineable ? src : nothing
end
if isa(src, OptimizationState) && isdefined(src, :ir)
return src.src.inlineable ? src.ir : nothing
elseif isa(src, OptimizationState) && isdefined(src, :ir)
return (is_stmt_inline(stmt_flag) || src.src.inlineable) ? src.ir : nothing
elseif src === nothing && is_stmt_inline(stmt_flag) && isa(match, MethodMatch)
# when the source isn't available at this moment, try to re-infer and inline it
# NOTE we can make inference try to keep the source if the call is going to be inlined,
# but then inlining will depend on local state of inference and so the first entry
# and the succeeding ones may generate different code; rather we always re-infer
# the source to avoid the problem while it's obviously not most efficient
# HACK disable inlining for the re-inference to avoid cycles by making sure the following inference never comes here again
interp = NativeInterpreter(get_world_counter(interp); opt_params = OptimizationParams(; inlining = false))
src, rt = typeinf_code(interp, match.method, match.spec_types, match.sparams, true)
return src
end
return nothing
end
Expand All @@ -57,7 +66,7 @@ mutable struct OptimizationState
inlining = InliningState(params,
EdgeTracker(s_edges, frame.valid_worlds),
WorldView(code_cache(interp), frame.world),
inlining_policy(interp))
interp)
return new(frame.linfo,
frame.src, nothing, frame.stmt_info, frame.mod,
frame.sptypes, frame.slottypes, false,
Expand Down Expand Up @@ -86,7 +95,7 @@ mutable struct OptimizationState
inlining = InliningState(params,
nothing,
WorldView(code_cache(interp), get_world_counter()),
inlining_policy(interp))
interp)
return new(linfo,
src, nothing, stmt_info, mod,
sptypes_from_meth_instance(linfo), slottypes, false,
Expand Down Expand Up @@ -128,6 +137,10 @@ const SLOT_USEDUNDEF = 32 # slot has uses that might raise UndefVarError
# This statement was marked as @inbounds by the user. If replaced by inlining,
# any contained boundschecks may be removed
const IR_FLAG_INBOUNDS = 0x01
# This statement was marked as @inline by the user
const IR_FLAG_INLINE = 0x01 << 1
# This statement was marked as @noinline by the user
const IR_FLAG_NOINLINE = 0x01 << 2
# This statement may be removed if its result is unused. In particular it must
# thus be both pure and effect free.
const IR_FLAG_EFFECT_FREE = 0x01 << 4
Expand Down Expand Up @@ -173,6 +186,9 @@ function isinlineable(m::Method, me::OptimizationState, params::OptimizationPara
return inlineable
end

is_stmt_inline(stmt_flag::UInt8) = stmt_flag & IR_FLAG_INLINE != 0
is_stmt_noinline(stmt_flag::UInt8) = stmt_flag & IR_FLAG_NOINLINE != 0

# These affect control flow within the function (so may not be removed
# if there is no usage within the function), but don't affect the purity
# of the function as a whole.
Expand Down Expand Up @@ -359,6 +375,7 @@ function convert_to_ircode(ci::CodeInfo, code::Vector{Any}, coverage::Bool, sv::
renumber_ir_elements!(code, changemap, labelmap)

inbounds_depth = 0 # Number of stacked inbounds
inline_flags = BitVector()
meta = Any[]
flags = fill(0x00, length(code))
for i = 1:length(code)
Expand All @@ -373,16 +390,38 @@ function convert_to_ircode(ci::CodeInfo, code::Vector{Any}, coverage::Bool, sv::
inbounds_depth -= 1
end
stmt = nothing
elseif isexpr(stmt, :inline)
if stmt.args[1]::Bool
push!(inline_flags, true)
else
pop!(inline_flags)
end
stmt = nothing
elseif isexpr(stmt, :noinline)
if stmt.args[1]::Bool
push!(inline_flags, false)
else
pop!(inline_flags)
end
stmt = nothing
else
stmt = normalize(stmt, meta)
end
code[i] = stmt
if !(stmt === nothing)
if stmt !== nothing
if inbounds_depth > 0
flags[i] |= IR_FLAG_INBOUNDS
end
if !isempty(inline_flags)
if last(inline_flags)
flags[i] |= IR_FLAG_INLINE
else
flags[i] |= IR_FLAG_NOINLINE
end
end
end
end
@assert isempty(inline_flags) "malformed meta flags"
strip_trailing_junk!(ci, code, stmtinfo, flags)
cfg = compute_basic_blocks(code)
types = Any[]
Expand Down
Loading

0 comments on commit 00e956b

Please sign in to comment.