104104function finish! (interp:: AbstractInterpreter , caller:: InferenceState , validation_world:: UInt , time_before:: UInt64 )
105105 result = caller. result
106106 # @assert last(result.valid_worlds) <= get_world_counter() || isempty(caller.edges)
107- if isdefined (result, :ci )
107+ if caller. cache_mode === CACHE_MODE_LOCAL
108+ @assert ! isdefined (result, :ci )
109+ result. src = transform_result_for_local_cache (interp, result)
110+ elseif isdefined (result, :ci )
108111 edges = result_edges (interp, caller)
109112 ci = result. ci
110113 # if we aren't cached, we don't need this edge
@@ -115,40 +118,53 @@ function finish!(interp::AbstractInterpreter, caller::InferenceState, validation
115118 store_backedges (ci, edges)
116119 end
117120 inferred_result = nothing
118- uncompressed = inferred_result
121+ uncompressed = result . src
119122 const_flag = is_result_constabi_eligible (result)
123+ debuginfo = nothing
120124 discard_src = caller. cache_mode === CACHE_MODE_NULL || const_flag
121125 if ! discard_src
122126 inferred_result = transform_result_for_cache (interp, result, edges)
127+ if inferred_result != = nothing
128+ uncompressed = inferred_result
129+ debuginfo = get_debuginfo (inferred_result)
130+ # Inlining may fast-path the global cache via `VolatileInferenceResult`, so store it back here
131+ result. src = inferred_result
132+ else
133+ if isa (result. src, OptimizationState)
134+ debuginfo = get_debuginfo (ir_to_codeinf! (result. src))
135+ elseif isa (result. src, CodeInfo)
136+ debuginfo = get_debuginfo (result. src)
137+ end
138+ end
123139 # TODO : do we want to augment edges here with any :invoke targets that we got from inlining (such that we didn't have a direct edge to it already)?
124140 if inferred_result isa CodeInfo
125- result. src = inferred_result
126141 if may_compress (interp)
127142 nslots = length (inferred_result. slotflags)
128143 resize! (inferred_result. slottypes:: Vector{Any} , nslots)
129144 resize! (inferred_result. slotnames, nslots)
130145 end
131- di = inferred_result. debuginfo
132- uncompressed = inferred_result
133146 inferred_result = maybe_compress_codeinfo (interp, result. linfo, inferred_result)
134147 result. is_src_volatile = false
135148 elseif ci. owner === nothing
136149 # The global cache can only handle objects that codegen understands
137150 inferred_result = nothing
138151 end
139152 end
140- if ! @isdefined di
141- di = DebugInfo (result. linfo)
153+ if debuginfo === nothing
154+ debuginfo = DebugInfo (result. linfo)
142155 end
143156 time_now = _time_ns ()
144157 time_self_ns = caller. time_self_ns + (time_now - time_before)
145158 time_total = (time_now - caller. time_start - caller. time_paused) * 1e-9
146159 ccall (:jl_update_codeinst , Cvoid, (Any, Any, Int32, UInt, UInt, UInt32, Any, Float64, Float64, Float64, Any, Any),
147160 ci, inferred_result, const_flag, first (result. valid_worlds), last (result. valid_worlds), encode_effects (result. ipo_effects),
148- result. analysis_results, time_total, caller. time_caches, time_self_ns * 1e-9 , di , edges)
161+ result. analysis_results, time_total, caller. time_caches, time_self_ns * 1e-9 , debuginfo , edges)
149162 engine_reject (interp, ci)
150163 codegen = codegen_cache (interp)
151- if ! discard_src && codegen != = nothing && uncompressed isa CodeInfo
164+ if ! discard_src && codegen != = nothing && (isa (uncompressed, CodeInfo) || isa (uncompressed, OptimizationState))
165+ if isa (uncompressed, OptimizationState)
166+ uncompressed = ir_to_codeinf! (uncompressed, edges)
167+ end
152168 # record that the caller could use this result to generate code when required, if desired, to avoid repeating n^2 work
153169 codegen[ci] = uncompressed
154170 if bootstrapping_compiler && inferred_result == nothing
@@ -299,36 +315,123 @@ function adjust_cycle_frame!(sv::InferenceState, cycle_valid_worlds::WorldRange,
299315 return nothing
300316end
301317
318+ function get_debuginfo (src)
319+ isa (src, CodeInfo) && return src. debuginfo
320+ isa (src, OptimizationState) && return src. src. debuginfo
321+ return nothing
322+ end
323+
302324function is_result_constabi_eligible (result:: InferenceResult )
303325 result_type = result. result
304326 return isa (result_type, Const) && is_foldable_nothrow (result. ipo_effects) && is_inlineable_constant (result_type. val)
305327end
306328
307- function transform_result_for_cache (:: AbstractInterpreter , result:: InferenceResult , edges:: SimpleVector )
329+ function compute_inlining_cost (interp:: AbstractInterpreter , result:: InferenceResult )
330+ src = result. src
331+ isa (src, OptimizationState) || return MAX_INLINE_COST
332+ compute_inlining_cost (interp, result, src. optresult)
333+ end
334+
335+ function compute_inlining_cost (interp:: AbstractInterpreter , result:: InferenceResult , optresult#= ::OptimizationResult=# )
336+ return inline_cost_model (interp, result, optresult. inline_flag, optresult. ir)
337+ end
338+
339+ function inline_cost_model (interp:: AbstractInterpreter , result:: InferenceResult ,
340+ inline_flag:: UInt8 , ir:: IRCode )
341+
342+ inline_flag === SRC_FLAG_DECLARED_NOINLINE && return MAX_INLINE_COST
343+
344+ mi = result. linfo
345+ (; def, specTypes) = mi
346+ if ! isa (def, Method)
347+ return MAX_INLINE_COST
348+ end
349+
350+ declared_inline = inline_flag === SRC_FLAG_DECLARED_INLINE
351+
352+ rt = result. result
353+ @assert ! (rt isa LimitedAccuracy)
354+ rt = widenslotwrapper (rt)
355+
356+ sig = unwrap_unionall (specTypes)
357+ if ! (isa (sig, DataType) && sig. name === Tuple. name)
358+ return MAX_INLINE_COST
359+ end
360+ if ! declared_inline && rt === Bottom
361+ return MAX_INLINE_COST
362+ end
363+
364+ if declared_inline && isdispatchtuple (specTypes)
365+ # obey @inline declaration if a dispatch barrier would not help
366+ return MIN_INLINE_COST
367+ else
368+ # compute the cost (size) of inlining this code
369+ params = OptimizationParams (interp)
370+ cost_threshold = default = params. inline_cost_threshold
371+ if ⊑ (optimizer_lattice (interp), rt, Tuple) && ! isconcretetype (widenconst (rt))
372+ cost_threshold += params. inline_tupleret_bonus
373+ end
374+ # if the method is declared as `@inline`, increase the cost threshold 20x
375+ if declared_inline
376+ cost_threshold += 19 * default
377+ end
378+ # a few functions get special treatment
379+ if def. module === _topmod (def. module)
380+ name = def. name
381+ if name === :iterate || name === :unsafe_convert || name === :cconvert
382+ cost_threshold += 4 * default
383+ end
384+ end
385+ return inline_cost_model (ir, params, cost_threshold)
386+ end
387+ end
388+
389+ function transform_result_for_local_cache (interp:: AbstractInterpreter , result:: InferenceResult )
390+ if is_result_constabi_eligible (result)
391+ return nothing
392+ end
393+ src = result. src
394+ if isa (src, OptimizationState)
395+ # Compute and store any information required to determine the inlineability of the callee.
396+ opt = src
397+ opt. src. inlining_cost = compute_inlining_cost (interp, result)
398+ end
399+ return src
400+ end
401+
402+ function transform_result_for_cache (interp:: AbstractInterpreter , result:: InferenceResult , edges:: SimpleVector )
403+ inlining_cost = nothing
308404 src = result. src
309405 if isa (src, OptimizationState)
310- src = ir_to_codeinf! (src)
406+ opt = src
407+ inlining_cost = compute_inlining_cost (interp, result, opt. optresult)
408+ discard_optimized_result (interp, opt, inlining_cost) && return nothing
409+ src = ir_to_codeinf! (opt)
311410 end
312411 if isa (src, CodeInfo)
313412 src. edges = edges
413+ if inlining_cost != = nothing
414+ src. inlining_cost = inlining_cost
415+ elseif may_optimize (interp)
416+ src. inlining_cost = compute_inlining_cost (interp, result)
417+ end
314418 end
315419 return src
316420end
317421
422+ function discard_optimized_result (interp:: AbstractInterpreter , opt#= ::OptimizationState=# , inlining_cost#= ::InlineCostType=# )
423+ may_discard_trees (interp) || return false
424+ return inlining_cost == MAX_INLINE_COST
425+ end
426+
318427function maybe_compress_codeinfo (interp:: AbstractInterpreter , mi:: MethodInstance , ci:: CodeInfo )
319428 def = mi. def
320429 isa (def, Method) || return ci # don't compress toplevel code
321430 can_discard_trees = may_discard_trees (interp)
322431 cache_the_tree = ! can_discard_trees || is_inlineable (ci)
323- if cache_the_tree
324- if may_compress (interp)
325- return ccall (:jl_compress_ir , String, (Any, Any), def, ci)
326- else
327- return ci
328- end
329- else
330- return nothing
331- end
432+ cache_the_tree || return nothing
433+ may_compress (interp) && return ccall (:jl_compress_ir , String, (Any, Any), def, ci)
434+ return ci
332435end
333436
334437function cache_result! (interp:: AbstractInterpreter , result:: InferenceResult , ci:: CodeInstance )
@@ -1116,8 +1219,7 @@ function typeinf_frame(interp::AbstractInterpreter, mi::MethodInstance, run_opti
11161219 else
11171220 opt = OptimizationState (frame, interp)
11181221 optimize (interp, opt, frame. result)
1119- src = ir_to_codeinf! (opt)
1120- src. edges = Core. svec (opt. inlining. edges... )
1222+ src = ir_to_codeinf! (opt, frame, Core. svec (opt. inlining. edges... ))
11211223 end
11221224 result. src = frame. src = src
11231225 end
0 commit comments