From ce688fdd76f683baedfd3334650c847730c1133c Mon Sep 17 00:00:00 2001
From: Andy Ayers <andya@microsoft.com>
Date: Mon, 22 Nov 2021 09:11:31 -0800
Subject: [PATCH] JIT: refactor to allow OSR to switch to optimized (#61851)

When OSR is enabled, the jit may still need to switch to optimized codegen if
there is something in the method that OSR cannot handle. Examples include:
* localloc
* loops in handlers
* reverse pinvoke (currently bypasses Tiering so somewhat moot)
* tail. prefixes (currently tolerated as Tiering should suffice)

When OSR is enabled, rework the "switch to optimize logic" in the jit to check
for these cases up front before we start importing code.

When both QuickJitForLoops and OnStackReplacement are enabled, this should ensure
that if we can't transition out of long-running Tier0 code (via OSR) then we will
instead optimize the method. Very few methods overall should opt-out of Tier0.

Note some of these unhandled constructs can eventually be handled by OSR, with
additional work.

For explicit tail calls:  this should replicate the current logic where we always optimize
methods with explicit tail calls unless we're instrumenting them.

To make this work with OSR we simply won't put patchpoints in methods with explicit
tail calls, instead trusting that tiering can get us to optimized versions.
---
 src/coreclr/jit/compiler.cpp             | 51 ++++++++++++++++++--
 src/coreclr/jit/compiler.h               | 36 +++++++-------
 src/coreclr/jit/compiler.hpp             |  4 ++
 src/coreclr/jit/fgbasic.cpp              | 61 ++++++++++++++++++++----
 src/coreclr/jit/fgopt.cpp                |  1 +
 src/coreclr/jit/flowgraph.cpp            |  8 +++-
 src/coreclr/jit/importer.cpp             | 44 +++++++++++------
 src/tests/JIT/opt/OSR/handlerloop.cs     | 31 ++++++++++++
 src/tests/JIT/opt/OSR/handlerloop.csproj | 24 ++++++++++
 9 files changed, 215 insertions(+), 45 deletions(-)
 create mode 100644 src/tests/JIT/opt/OSR/handlerloop.cs
 create mode 100644 src/tests/JIT/opt/OSR/handlerloop.csproj

diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
index a2b0265a579dd..3ec9e969bc59c 100644
--- a/src/coreclr/jit/compiler.cpp
+++ b/src/coreclr/jit/compiler.cpp
@@ -1878,6 +1878,7 @@ void Compiler::compInit(ArenaAllocator*       pAlloc,
     compJmpOpUsed         = false;
     compLongUsed          = false;
     compTailCallUsed      = false;
+    compTailPrefixSeen    = false;
     compLocallocSeen      = false;
     compLocallocUsed      = false;
     compLocallocOptimized = false;
@@ -6277,7 +6278,8 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
     info.compTotalColdCodeSize = 0;
     info.compClassProbeCount   = 0;
 
-    compHasBackwardJump = false;
+    compHasBackwardJump          = false;
+    compHasBackwardJumpInHandler = false;
 
 #ifdef DEBUG
     compCurBB = nullptr;
@@ -6431,10 +6433,51 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr,
         goto _Next;
     }
 
-    if (compHasBackwardJump && (info.compFlags & CORINFO_FLG_DISABLE_TIER0_FOR_LOOPS) != 0 && fgCanSwitchToOptimized())
+    // We may decide to optimize this method,
+    // to avoid spending a long time stuck in Tier0 code.
+    //
+    if (fgCanSwitchToOptimized())
     {
-        // Method likely has a loop, switch to the OptimizedTier to avoid spending too much time running slower code
-        fgSwitchToOptimized();
+        // We only expect to be able to do this at Tier0.
+        //
+        assert(opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0));
+
+        // Normal tiering should bail us out of Tier0 tail call induced loops.
+        // So keep these methods in Tier0 if we're gathering PGO data.
+        // If we're not gathering PGO, then switch these to optimized to
+        // minimize the number of tail call helper stubs we might need.
+        // Reconsider this if/when we're able to share those stubs.
+        //
+        // Honor the config setting that tells the jit to
+        // always optimize methods with loops.
+        //
+        // If that's not set, and OSR is enabled, the jit may still
+        // decide to optimize, if there's something in the method that
+        // OSR currently cannot handle.
+        //
+        const char* reason = nullptr;
+
+        if (compTailPrefixSeen && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR))
+        {
+            reason = "tail.call and not BBINSTR";
+        }
+        else if ((info.compFlags & CORINFO_FLG_DISABLE_TIER0_FOR_LOOPS) != 0)
+        {
+            if (compHasBackwardJump)
+            {
+                reason = "loop";
+            }
+        }
+        else if (JitConfig.TC_OnStackReplacement() > 0)
+        {
+            const bool patchpointsOK = compCanHavePatchpoints(&reason);
+            assert(patchpointsOK || (reason != nullptr));
+        }
+
+        if (reason != nullptr)
+        {
+            fgSwitchToOptimized(reason);
+        }
     }
 
     compSetOptimizationLevel();
diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index 97b8024b860e6..62447cce58a23 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -2983,6 +2983,8 @@ class Compiler
     bool fgNormalizeEHCase2();
     bool fgNormalizeEHCase3();
 
+    void fgCheckForLoopsInHandlers();
+
 #ifdef DEBUG
     void dispIncomingEHClause(unsigned num, const CORINFO_EH_CLAUSE& clause);
     void dispOutgoingEHClause(unsigned num, const CORINFO_EH_CLAUSE& clause);
@@ -6068,7 +6070,7 @@ class Compiler
     BasicBlock* fgLookupBB(unsigned addr);
 
     bool fgCanSwitchToOptimized();
-    void fgSwitchToOptimized();
+    void fgSwitchToOptimized(const char* reason);
 
     bool fgMayExplicitTailCall();
 
@@ -9385,21 +9387,23 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 
     InlineResult* compInlineResult; // The result of importing the inlinee method.
 
-    bool compDoAggressiveInlining; // If true, mark every method as CORINFO_FLG_FORCEINLINE
-    bool compJmpOpUsed;            // Does the method do a JMP
-    bool compLongUsed;             // Does the method use TYP_LONG
-    bool compFloatingPointUsed;    // Does the method use TYP_FLOAT or TYP_DOUBLE
-    bool compTailCallUsed;         // Does the method do a tailcall
-    bool compLocallocSeen;         // Does the method IL have localloc opcode
-    bool compLocallocUsed;         // Does the method use localloc.
-    bool compLocallocOptimized;    // Does the method have an optimized localloc
-    bool compQmarkUsed;            // Does the method use GT_QMARK/GT_COLON
-    bool compQmarkRationalized;    // Is it allowed to use a GT_QMARK/GT_COLON node.
-    bool compUnsafeCastUsed;       // Does the method use LDIND/STIND to cast between scalar/refernce types
-    bool compHasBackwardJump;      // Does the method (or some inlinee) have a lexically backwards jump?
-    bool compSwitchedToOptimized;  // Codegen initially was Tier0 but jit switched to FullOpts
-    bool compSwitchedToMinOpts;    // Codegen initially was Tier1/FullOpts but jit switched to MinOpts
-    bool compSuppressedZeroInit;   // There are vars with lvSuppressedZeroInit set
+    bool compDoAggressiveInlining;     // If true, mark every method as CORINFO_FLG_FORCEINLINE
+    bool compJmpOpUsed;                // Does the method do a JMP
+    bool compLongUsed;                 // Does the method use TYP_LONG
+    bool compFloatingPointUsed;        // Does the method use TYP_FLOAT or TYP_DOUBLE
+    bool compTailCallUsed;             // Does the method do a tailcall
+    bool compTailPrefixSeen;           // Does the method IL have tail. prefix
+    bool compLocallocSeen;             // Does the method IL have localloc opcode
+    bool compLocallocUsed;             // Does the method use localloc.
+    bool compLocallocOptimized;        // Does the method have an optimized localloc
+    bool compQmarkUsed;                // Does the method use GT_QMARK/GT_COLON
+    bool compQmarkRationalized;        // Is it allowed to use a GT_QMARK/GT_COLON node.
+    bool compUnsafeCastUsed;           // Does the method use LDIND/STIND to cast between scalar/refernce types
+    bool compHasBackwardJump;          // Does the method (or some inlinee) have a lexically backwards jump?
+    bool compHasBackwardJumpInHandler; // Does the method have a lexically backwards jump in a handler?
+    bool compSwitchedToOptimized;      // Codegen initially was Tier0 but jit switched to FullOpts
+    bool compSwitchedToMinOpts;        // Codegen initially was Tier1/FullOpts but jit switched to MinOpts
+    bool compSuppressedZeroInit;       // There are vars with lvSuppressedZeroInit set
 
 // NOTE: These values are only reliable after
 //       the importing is completely finished.
diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp
index 616080be59c12..5a4ca2d63db74 100644
--- a/src/coreclr/jit/compiler.hpp
+++ b/src/coreclr/jit/compiler.hpp
@@ -4716,6 +4716,10 @@ inline bool Compiler::compCanHavePatchpoints(const char** reason)
     {
         whyNot = "localloc";
     }
+    else if (compHasBackwardJumpInHandler)
+    {
+        whyNot = "loop in handler";
+    }
     else if (opts.IsReversePInvoke())
     {
         whyNot = "reverse pinvoke";
diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp
index 76a093a7c6290..a16a43efe7ca8 100644
--- a/src/coreclr/jit/fgbasic.cpp
+++ b/src/coreclr/jit/fgbasic.cpp
@@ -2738,15 +2738,9 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F
                         BADCODE3("tail call not followed by ret", " at offset %04X", (IL_OFFSET)(codeAddr - codeBegp));
                     }
 
-                    if (fgCanSwitchToOptimized() && fgMayExplicitTailCall())
+                    if (fgMayExplicitTailCall())
                     {
-                        // Method has an explicit tail call that may run like a loop or may not be generated as a tail
-                        // call in tier 0, switch to optimized to avoid spending too much time running slower code
-                        if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR) ||
-                            ((info.compFlags & CORINFO_FLG_DISABLE_TIER0_FOR_LOOPS) != 0))
-                        {
-                            fgSwitchToOptimized();
-                        }
+                        compTailPrefixSeen = true;
                     }
                 }
                 else
@@ -3534,6 +3528,57 @@ void Compiler::fgFindBasicBlocks()
 #endif
 
     fgNormalizeEH();
+
+    fgCheckForLoopsInHandlers();
+}
+
+//------------------------------------------------------------------------
+// fgCheckForLoopsInHandlers: scan blocks seeing if any handler block
+//   is a backedge target.
+//
+// Notes:
+//    Sets compHasBackwardJumpInHandler if so. This will disable
+//    setting patchpoints in this method and prompt the jit to
+//    optimize the method instead.
+//
+//    We assume any late-added handler (say for synchronized methods) will
+//    not introduce any loops.
+//
+void Compiler::fgCheckForLoopsInHandlers()
+{
+    // We only care about this if we are going to set OSR patchpoints
+    // and the method has exception handling.
+    //
+    if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0))
+    {
+        return;
+    }
+
+    if (JitConfig.TC_OnStackReplacement() == 0)
+    {
+        return;
+    }
+
+    if (info.compXcptnsCount == 0)
+    {
+        return;
+    }
+
+    // Walk blocks in handlers and filters, looing for a backedge target.
+    //
+    assert(!compHasBackwardJumpInHandler);
+    for (BasicBlock* const blk : Blocks())
+    {
+        if (blk->hasHndIndex())
+        {
+            if (blk->bbFlags & BBF_BACKWARD_JUMP_TARGET)
+            {
+                JITDUMP("\nHander block " FMT_BB "is backward jump target; can't have patchpoints in this method\n");
+                compHasBackwardJumpInHandler = true;
+                break;
+            }
+        }
+    }
 }
 
 //------------------------------------------------------------------------
diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp
index 428291b91c2f2..a41724f8ef42a 100644
--- a/src/coreclr/jit/fgopt.cpp
+++ b/src/coreclr/jit/fgopt.cpp
@@ -1523,6 +1523,7 @@ void Compiler::fgPostImportationCleanup()
 
                     BasicBlock* const newBlock = fgSplitBlockAtBeginning(fromBlock);
                     fromBlock->bbFlags |= BBF_INTERNAL;
+                    newBlock->bbFlags &= ~BBF_DONT_REMOVE;
 
                     GenTree* const entryStateLcl = gtNewLclvNode(entryStateVar, TYP_INT);
                     GenTree* const compareEntryStateToZero =
diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp
index d99bc4d0be846..3c0aaeb80395a 100644
--- a/src/coreclr/jit/flowgraph.cpp
+++ b/src/coreclr/jit/flowgraph.cpp
@@ -524,6 +524,9 @@ bool Compiler::fgCanSwitchToOptimized()
 //------------------------------------------------------------------------
 // fgSwitchToOptimized: Switch the opt level from tier 0 to optimized
 //
+// Arguments:
+//    reason - reason why opt level was switched
+//
 // Assumptions:
 //    - fgCanSwitchToOptimized() is true
 //    - compSetOptimizationLevel() has not been called
@@ -532,15 +535,16 @@ bool Compiler::fgCanSwitchToOptimized()
 //    This method is to be called at some point before compSetOptimizationLevel() to switch the opt level to optimized
 //    based on information gathered in early phases.
 
-void Compiler::fgSwitchToOptimized()
+void Compiler::fgSwitchToOptimized(const char* reason)
 {
     assert(fgCanSwitchToOptimized());
 
     // Switch to optimized and re-init options
-    JITDUMP("****\n**** JIT Tier0 jit request switching to Tier1 because of loop\n****\n");
+    JITDUMP("****\n**** JIT Tier0 jit request switching to Tier1 because: %s\n****\n", reason);
     assert(opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0));
     opts.jitFlags->Clear(JitFlags::JIT_FLAG_TIER0);
     opts.jitFlags->Clear(JitFlags::JIT_FLAG_BBINSTR);
+    opts.jitFlags->Clear(JitFlags::JIT_FLAG_OSR);
 
     // Leave a note for jit diagnostics
     compSwitchedToOptimized = true;
diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp
index 2f2189391c385..a2a09789e4d1d 100644
--- a/src/coreclr/jit/importer.cpp
+++ b/src/coreclr/jit/importer.cpp
@@ -11591,26 +11591,38 @@ void Compiler::impImportBlockCode(BasicBlock* block)
 #ifdef FEATURE_ON_STACK_REPLACEMENT
 
     // Are there any places in the method where we might add a patchpoint?
+    //
     if (compHasBackwardJump)
     {
-        // Are patchpoints enabled and supported?
-        if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0) && (JitConfig.TC_OnStackReplacement() > 0) &&
-            compCanHavePatchpoints())
+        // Is OSR enabled?
+        //
+        if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0) && (JitConfig.TC_OnStackReplacement() > 0))
         {
-            // We don't inline at Tier0, if we do, we may need rethink our approach.
-            // Could probably support inlines that don't introduce flow.
-            assert(!compIsForInlining());
-
-            // Is the start of this block a suitable patchpoint?
-            // Current strategy is blocks that are stack-empty and backwards branch targets and not in a handler
+            // OSR is not yet supported for methods with explicit tail calls.
             //
-            // Todo (perhaps): bail out of OSR and jit this method with optimization.
+            // But we also may not switch methods to be optimized as we should be
+            // able to avoid getting trapped in Tier0 code by normal call counting.
+            // So instead, just suppress adding patchpoints.
             //
-            if (!block->hasHndIndex() && ((block->bbFlags & BBF_BACKWARD_JUMP_TARGET) != 0) &&
-                (verCurrentState.esStackDepth == 0))
+            if (!compTailPrefixSeen)
             {
-                block->bbFlags |= BBF_PATCHPOINT;
-                setMethodHasPatchpoint();
+                assert(compCanHavePatchpoints());
+
+                // We don't inline at Tier0, if we do, we may need rethink our approach.
+                // Could probably support inlines that don't introduce flow.
+                assert(!compIsForInlining());
+
+                // Is the start of this block a suitable patchpoint?
+                //
+                if (((block->bbFlags & BBF_BACKWARD_JUMP_TARGET) != 0) && (verCurrentState.esStackDepth == 0))
+                {
+                    // We should have noted this earlier and bailed out of OSR.
+                    //
+                    assert(!block->hasHndIndex());
+
+                    block->bbFlags |= BBF_PATCHPOINT;
+                    setMethodHasPatchpoint();
+                }
             }
         }
     }
@@ -11630,10 +11642,12 @@ void Compiler::impImportBlockCode(BasicBlock* block)
     // propagate rareness back through flow and place the partial compilation patchpoints "earlier"
     // so there are fewer overall.
     //
+    // Note unlike OSR, it's ok to forgo these.
+    //
     // Todo: stress mode...
     //
     if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0) && (JitConfig.TC_PartialCompilation() > 0) &&
-        compCanHavePatchpoints())
+        compCanHavePatchpoints() && !compTailPrefixSeen)
     {
         // Is this block a good place for partial compilation?
         //
diff --git a/src/tests/JIT/opt/OSR/handlerloop.cs b/src/tests/JIT/opt/OSR/handlerloop.cs
new file mode 100644
index 0000000000000..01429b553c139
--- /dev/null
+++ b/src/tests/JIT/opt/OSR/handlerloop.cs
@@ -0,0 +1,31 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+
+// OSR can't bail us out of a loop in a handler
+//
+class OSRHandlerLoop
+{
+    public static int Main()
+    {
+        int result = 0;
+        int expected = 0;
+        try
+        {
+            result++;
+            expected = 704982705;
+        }
+        finally
+        {
+            for (int i = 0; i < 100_000; i++)
+            {
+                result += i;
+            }
+        }
+
+        Console.WriteLine($"{result} expected {expected}");
+
+        return (result == expected) ? 100 : -1;
+    }
+}
diff --git a/src/tests/JIT/opt/OSR/handlerloop.csproj b/src/tests/JIT/opt/OSR/handlerloop.csproj
new file mode 100644
index 0000000000000..9620f75474a93
--- /dev/null
+++ b/src/tests/JIT/opt/OSR/handlerloop.csproj
@@ -0,0 +1,24 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <DebugType />
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="$(MSBuildProjectName).cs" />
+  </ItemGroup>
+  <PropertyGroup>
+    <CLRTestBatchPreCommands><![CDATA[
+$(CLRTestBatchPreCommands)
+set COMPlus_TieredCompilation=1
+set COMPlus_TC_QuickJitForLoops=1
+set COMPlus_TC_OnStackReplacement=1
+]]></CLRTestBatchPreCommands>
+    <BashCLRTestPreCommands><![CDATA[
+$(BashCLRTestPreCommands)
+export COMPlus_TieredCompilation=1
+export COMPlus_TC_QuickJitForLoops=1
+export COMPlus_TC_OnStackReplacement=1
+]]></BashCLRTestPreCommands>
+  </PropertyGroup>
+</Project>