From 532125d51d23f22c3fd117fe8a37c158fe16ac62 Mon Sep 17 00:00:00 2001
From: Keno Fischer <keno@juliacomputing.com>
Date: Sat, 15 Oct 2022 10:53:43 -0400
Subject: [PATCH] Add ability to not round-trip uncached inference results
 through IRCode (#47137)

There's generally three reasons inference results end up uncached:
1. They come from typeinf_ext
2. We discover some validity limitation (generally due to recursion)
3. They are used for constant propagation

Currently, we convert all such inference results back to CodeInfo,
in case they come from 1. However, for inference results of kind 3,
the only thing we ever do with them is turn them back into IRCode
for inlining. This round-tripping through IRCode is quite wasteful.
Stop doing that. This PR is the minimal change to accomplish that
by marking those inference results that actually need to be converted
back (for case 1). This probably needs some tweaking for external
AbstractInterpreters, but let's make sure this works and has the
right performance first.

This commit just adds the capability, but doesn't turn it on
by default, since the performance for base didn't quite look
favorable yet.
---
 base/compiler/optimize.jl  |  2 ++
 base/compiler/typeinfer.jl | 23 +++++++++++++++++++++--
 base/compiler/types.jl     |  5 +++--
 3 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/base/compiler/optimize.jl b/base/compiler/optimize.jl
index 558a9e94f05d0..dc532438b46a2 100644
--- a/base/compiler/optimize.jl
+++ b/base/compiler/optimize.jl
@@ -104,6 +104,8 @@ function inlining_policy(interp::AbstractInterpreter,
         else
             return nothing
         end
+    elseif isa(src, IRCode)
+        return src
     end
     return nothing
 end
diff --git a/base/compiler/typeinfer.jl b/base/compiler/typeinfer.jl
index 6db3c42a6ca54..82183cb594444 100644
--- a/base/compiler/typeinfer.jl
+++ b/base/compiler/typeinfer.jl
@@ -222,7 +222,19 @@ function finish!(interp::AbstractInterpreter, caller::InferenceResult)
     opt = caller.src
     if opt isa OptimizationState # implies `may_optimize(interp) === true`
         if opt.ir !== nothing
-            caller.src = ir_to_codeinf!(opt)
+            if caller.must_be_codeinf
+                caller.src = ir_to_codeinf!(opt)
+            elseif is_inlineable(opt.src)
+                # TODO: If the CFG is too big, inlining becomes more expensive and if we're going to
+                # use this IR over and over, it's worth simplifying it. Round trips through
+                # CodeInstance do this implicitly, since they recompute the CFG, so try to
+                # match that behavior here.
+                # ir = cfg_simplify!(opt.ir)
+                caller.src = opt.ir
+            else
+                # Not cached and not inlineable - drop the ir
+                caller.src = nothing
+            end
         end
     end
     return caller.src
@@ -925,6 +937,9 @@ function typeinf_edge(interp::AbstractInterpreter, method::Method, @nospecialize
         # completely new
         lock_mi_inference(interp, mi)
         result = InferenceResult(mi)
+        if cache === :local
+            result.must_be_codeinf = true # TODO directly keep `opt.ir` for this case
+        end
         frame = InferenceState(result, cache, interp) # always use the cache for edge targets
         if frame === nothing
             # can't get the source for this, so we know nothing
@@ -998,6 +1013,7 @@ function typeinf_frame(interp::AbstractInterpreter, method::Method, @nospecializ
     mi = specialize_method(method, atype, sparams)::MethodInstance
     ccall(:jl_typeinf_timing_begin, Cvoid, ())
     result = InferenceResult(mi)
+    result.must_be_codeinf = true
     frame = InferenceState(result, run_optimizer ? :global : :no, interp)
     frame === nothing && return nothing
     typeinf(interp, frame)
@@ -1056,7 +1072,9 @@ function typeinf_ext(interp::AbstractInterpreter, mi::MethodInstance)
         return retrieve_code_info(mi)
     end
     lock_mi_inference(interp, mi)
-    frame = InferenceState(InferenceResult(mi), #=cache=#:global, interp)
+    result = InferenceResult(mi)
+    result.must_be_codeinf = true
+    frame = InferenceState(result, #=cache=#:global, interp)
     frame === nothing && return nothing
     typeinf(interp, frame)
     ccall(:jl_typeinf_timing_end, Cvoid, ())
@@ -1099,6 +1117,7 @@ function typeinf_ext_toplevel(interp::AbstractInterpreter, linfo::MethodInstance
             ccall(:jl_typeinf_timing_begin, Cvoid, ())
             if !src.inferred
                 result = InferenceResult(linfo)
+                result.must_be_codeinf = true
                 frame = InferenceState(result, src, #=cache=#:global, interp)
                 typeinf(interp, frame)
                 @assert frame.inferred # TODO: deal with this better
diff --git a/base/compiler/types.jl b/base/compiler/types.jl
index 5b5d9c4b57c8d..fc7714523b2f9 100644
--- a/base/compiler/types.jl
+++ b/base/compiler/types.jl
@@ -41,18 +41,19 @@ mutable struct InferenceResult
     argtypes::Vector{Any}
     overridden_by_const::BitVector
     result                   # ::Type, or InferenceState if WIP
-    src                      # ::Union{CodeInfo, OptimizationState} if inferred copy is available, nothing otherwise
+    src                      # ::Union{CodeInfo, IRCode, OptimizationState} if inferred copy is available, nothing otherwise
     valid_worlds::WorldRange # if inference and optimization is finished
     ipo_effects::Effects     # if inference is finished
     effects::Effects         # if optimization is finished
     argescapes               # ::ArgEscapeCache if optimized, nothing otherwise
+    must_be_codeinf::Bool    # if this must come out as CodeInfo or leaving it as IRCode is ok
     # NOTE the main constructor is defined within inferencestate.jl
     global function _InferenceResult(
         linfo::MethodInstance,
         arginfo#=::Union{Nothing,Tuple{ArgInfo,InferenceState}}=#)
         argtypes, overridden_by_const = matching_cache_argtypes(linfo, arginfo)
         return new(linfo, argtypes, overridden_by_const, Any, nothing,
-            WorldRange(), Effects(), Effects(), nothing)
+            WorldRange(), Effects(), Effects(), nothing, true)
     end
 end