From c02db7475a307795dfe379673d8d6612213648be Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 18 Nov 2021 20:04:27 -0800 Subject: [PATCH 1/5] JIT: refactor to allow OSR to switch to optimized When OSR is enabled, the jit may still need to switch to optimized codegen if there is something in the method that OSR cannot handle. Examples include: * localloc * tail. prefixes * loops in handlers * reverse pinvoke (currently bypasses Tiering so somewhat moot) When OSR is enabled, rework the "switch to optimize logic" in the jit to check for these cases up front before we start importing code. When both QuickJitForLoops and OnStackReplacement are enabled, this should ensure that if we can't transition out of long-running Tier0 code (via OSR) then we will instead optimize the method. Very few methods overall should opt-out of Tier0. Note some of these unhandled constructs can eventually be handled by OSR, with additional work. --- src/coreclr/jit/compiler.cpp | 45 +++++++++++++++-- src/coreclr/jit/compiler.h | 36 +++++++------- src/coreclr/jit/compiler.hpp | 8 ++++ src/coreclr/jit/fgbasic.cpp | 61 ++++++++++++++++++++---- src/coreclr/jit/flowgraph.cpp | 8 +++- src/coreclr/jit/importer.cpp | 21 ++++---- src/tests/JIT/opt/OSR/handlerloop.cs | 31 ++++++++++++ src/tests/JIT/opt/OSR/handlerloop.csproj | 24 ++++++++++ 8 files changed, 196 insertions(+), 38 deletions(-) create mode 100644 src/tests/JIT/opt/OSR/handlerloop.cs create mode 100644 src/tests/JIT/opt/OSR/handlerloop.csproj diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index f38e2f9eefb92..3eef552fe092f 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -1873,6 +1873,7 @@ void Compiler::compInit(ArenaAllocator* pAlloc, compJmpOpUsed = false; compLongUsed = false; compTailCallUsed = false; + compTailPrefixSeen = false; compLocallocSeen = false; compLocallocUsed = false; compLocallocOptimized = false; @@ -6272,7 +6273,8 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr, info.compTotalColdCodeSize = 0; info.compClassProbeCount = 0; - compHasBackwardJump = false; + compHasBackwardJump = false; + compHasBackwardJumpInHandler = false; #ifdef DEBUG compCurBB = nullptr; @@ -6426,10 +6428,45 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr, goto _Next; } - if (compHasBackwardJump && (info.compFlags & CORINFO_FLG_DISABLE_TIER0_FOR_LOOPS) != 0 && fgCanSwitchToOptimized()) + // We may decide to optimize this method, + // to avoid spending a long time stuck in Tier0 code. + // + if (fgCanSwitchToOptimized()) { - // Method likely has a loop, switch to the OptimizedTier to avoid spending too much time running slower code - fgSwitchToOptimized(); + // We only expect to be able to do this at Tier0. + // + assert(opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0)); + + // Honor the config setting that tells the jit to + // always optimize methods with loops or explicit tail calls. + // + // If that's not set, and OSR is enabled, the jit may still + // decide to optimize, if there's something in the method that + // OSR currently cannot handle. + // + const char* reason = nullptr; + + if ((info.compFlags & CORINFO_FLG_DISABLE_TIER0_FOR_LOOPS) == 0) + { + if (compHasBackwardJump) + { + reason = "loop"; + } + else if (compTailPrefixSeen) + { + reason = "tail.call"; + } + } + else if (JitConfig.TC_OnStackReplacement() > 0) + { + const bool patchpointsOK = compCanHavePatchpoints(&reason); + assert(patchpointsOK || (reason != nullptr)); + } + + if (reason != nullptr) + { + fgSwitchToOptimized(reason); + } } compSetOptimizationLevel(); diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index b849e12e45296..b409f6374750f 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -2985,6 +2985,8 @@ class Compiler bool fgNormalizeEHCase2(); bool fgNormalizeEHCase3(); + void fgCheckForLoopsInHandlers(); + #ifdef DEBUG void dispIncomingEHClause(unsigned num, const CORINFO_EH_CLAUSE& clause); void dispOutgoingEHClause(unsigned num, const CORINFO_EH_CLAUSE& clause); @@ -6066,7 +6068,7 @@ class Compiler BasicBlock* fgLookupBB(unsigned addr); bool fgCanSwitchToOptimized(); - void fgSwitchToOptimized(); + void fgSwitchToOptimized(const char* reason); bool fgMayExplicitTailCall(); @@ -9383,21 +9385,23 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX InlineResult* compInlineResult; // The result of importing the inlinee method. - bool compDoAggressiveInlining; // If true, mark every method as CORINFO_FLG_FORCEINLINE - bool compJmpOpUsed; // Does the method do a JMP - bool compLongUsed; // Does the method use TYP_LONG - bool compFloatingPointUsed; // Does the method use TYP_FLOAT or TYP_DOUBLE - bool compTailCallUsed; // Does the method do a tailcall - bool compLocallocSeen; // Does the method IL have localloc opcode - bool compLocallocUsed; // Does the method use localloc. - bool compLocallocOptimized; // Does the method have an optimized localloc - bool compQmarkUsed; // Does the method use GT_QMARK/GT_COLON - bool compQmarkRationalized; // Is it allowed to use a GT_QMARK/GT_COLON node. - bool compUnsafeCastUsed; // Does the method use LDIND/STIND to cast between scalar/refernce types - bool compHasBackwardJump; // Does the method (or some inlinee) have a lexically backwards jump? - bool compSwitchedToOptimized; // Codegen initially was Tier0 but jit switched to FullOpts - bool compSwitchedToMinOpts; // Codegen initially was Tier1/FullOpts but jit switched to MinOpts - bool compSuppressedZeroInit; // There are vars with lvSuppressedZeroInit set + bool compDoAggressiveInlining; // If true, mark every method as CORINFO_FLG_FORCEINLINE + bool compJmpOpUsed; // Does the method do a JMP + bool compLongUsed; // Does the method use TYP_LONG + bool compFloatingPointUsed; // Does the method use TYP_FLOAT or TYP_DOUBLE + bool compTailCallUsed; // Does the method do a tailcall + bool compTailPrefixSeen; // Does the method IL have tail. prefix + bool compLocallocSeen; // Does the method IL have localloc opcode + bool compLocallocUsed; // Does the method use localloc. + bool compLocallocOptimized; // Does the method have an optimized localloc + bool compQmarkUsed; // Does the method use GT_QMARK/GT_COLON + bool compQmarkRationalized; // Is it allowed to use a GT_QMARK/GT_COLON node. + bool compUnsafeCastUsed; // Does the method use LDIND/STIND to cast between scalar/refernce types + bool compHasBackwardJump; // Does the method (or some inlinee) have a lexically backwards jump? + bool compHasBackwardJumpInHandler; // Does the method have a lexically backwards jump in a handler? + bool compSwitchedToOptimized; // Codegen initially was Tier0 but jit switched to FullOpts + bool compSwitchedToMinOpts; // Codegen initially was Tier1/FullOpts but jit switched to MinOpts + bool compSuppressedZeroInit; // There are vars with lvSuppressedZeroInit set // NOTE: These values are only reliable after // the importing is completely finished. diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index 0fdcc26810a20..ba99ea4d001cd 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -4736,6 +4736,14 @@ inline bool Compiler::compCanHavePatchpoints(const char** reason) { whyNot = "localloc"; } + else if (compHasBackwardJumpInHandler) + { + whyNot = "loop in handler"; + } + else if (compTailPrefixSeen) + { + whyNot = "tail.call"; + } else if (opts.IsReversePInvoke()) { whyNot = "reverse pinvoke"; diff --git a/src/coreclr/jit/fgbasic.cpp b/src/coreclr/jit/fgbasic.cpp index 76a093a7c6290..a16a43efe7ca8 100644 --- a/src/coreclr/jit/fgbasic.cpp +++ b/src/coreclr/jit/fgbasic.cpp @@ -2738,15 +2738,9 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F BADCODE3("tail call not followed by ret", " at offset %04X", (IL_OFFSET)(codeAddr - codeBegp)); } - if (fgCanSwitchToOptimized() && fgMayExplicitTailCall()) + if (fgMayExplicitTailCall()) { - // Method has an explicit tail call that may run like a loop or may not be generated as a tail - // call in tier 0, switch to optimized to avoid spending too much time running slower code - if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR) || - ((info.compFlags & CORINFO_FLG_DISABLE_TIER0_FOR_LOOPS) != 0)) - { - fgSwitchToOptimized(); - } + compTailPrefixSeen = true; } } else @@ -3534,6 +3528,57 @@ void Compiler::fgFindBasicBlocks() #endif fgNormalizeEH(); + + fgCheckForLoopsInHandlers(); +} + +//------------------------------------------------------------------------ +// fgCheckForLoopsInHandlers: scan blocks seeing if any handler block +// is a backedge target. +// +// Notes: +// Sets compHasBackwardJumpInHandler if so. This will disable +// setting patchpoints in this method and prompt the jit to +// optimize the method instead. +// +// We assume any late-added handler (say for synchronized methods) will +// not introduce any loops. +// +void Compiler::fgCheckForLoopsInHandlers() +{ + // We only care about this if we are going to set OSR patchpoints + // and the method has exception handling. + // + if (!opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0)) + { + return; + } + + if (JitConfig.TC_OnStackReplacement() == 0) + { + return; + } + + if (info.compXcptnsCount == 0) + { + return; + } + + // Walk blocks in handlers and filters, looing for a backedge target. + // + assert(!compHasBackwardJumpInHandler); + for (BasicBlock* const blk : Blocks()) + { + if (blk->hasHndIndex()) + { + if (blk->bbFlags & BBF_BACKWARD_JUMP_TARGET) + { + JITDUMP("\nHander block " FMT_BB "is backward jump target; can't have patchpoints in this method\n"); + compHasBackwardJumpInHandler = true; + break; + } + } + } } //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 654683fbba335..f0e5ee972bef2 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -524,6 +524,9 @@ bool Compiler::fgCanSwitchToOptimized() //------------------------------------------------------------------------ // fgSwitchToOptimized: Switch the opt level from tier 0 to optimized // +// Arguments: +// reason - reason why opt level was switched +// // Assumptions: // - fgCanSwitchToOptimized() is true // - compSetOptimizationLevel() has not been called @@ -532,15 +535,16 @@ bool Compiler::fgCanSwitchToOptimized() // This method is to be called at some point before compSetOptimizationLevel() to switch the opt level to optimized // based on information gathered in early phases. -void Compiler::fgSwitchToOptimized() +void Compiler::fgSwitchToOptimized(const char* reason) { assert(fgCanSwitchToOptimized()); // Switch to optimized and re-init options - JITDUMP("****\n**** JIT Tier0 jit request switching to Tier1 because of loop\n****\n"); + JITDUMP("****\n**** JIT Tier0 jit request switching to Tier1 because: %s\n****\n", reason); assert(opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0)); opts.jitFlags->Clear(JitFlags::JIT_FLAG_TIER0); opts.jitFlags->Clear(JitFlags::JIT_FLAG_BBINSTR); + opts.jitFlags->Clear(JitFlags::JIT_FLAG_OSR); // Leave a note for jit diagnostics compSwitchedToOptimized = true; diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index 8f6e4670b532e..d73c70ffe4381 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -11591,24 +11591,27 @@ void Compiler::impImportBlockCode(BasicBlock* block) #ifdef FEATURE_ON_STACK_REPLACEMENT // Are there any places in the method where we might add a patchpoint? + // if (compHasBackwardJump) { - // Are patchpoints enabled and supported? - if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0) && (JitConfig.TC_OnStackReplacement() > 0) && - compCanHavePatchpoints()) + // Is OSR enabled? + // + if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0) && (JitConfig.TC_OnStackReplacement() > 0)) { + assert(compCanHavePatchpoints()); + // We don't inline at Tier0, if we do, we may need rethink our approach. // Could probably support inlines that don't introduce flow. assert(!compIsForInlining()); // Is the start of this block a suitable patchpoint? - // Current strategy is blocks that are stack-empty and backwards branch targets and not in a handler - // - // Todo (perhaps): bail out of OSR and jit this method with optimization. // - if (!block->hasHndIndex() && ((block->bbFlags & BBF_BACKWARD_JUMP_TARGET) != 0) && - (verCurrentState.esStackDepth == 0)) + if (((block->bbFlags & BBF_BACKWARD_JUMP_TARGET) != 0) && (verCurrentState.esStackDepth == 0)) { + // We should have noted this earlier and bailed out of OSR. + // + assert(!block->hasHndIndex()); + block->bbFlags |= BBF_PATCHPOINT; setMethodHasPatchpoint(); } @@ -11630,6 +11633,8 @@ void Compiler::impImportBlockCode(BasicBlock* block) // propagate rareness back through flow and place the partial compilation patchpoints "earlier" // so there are fewer overall. // + // Note unlike OSR, it's ok to forgo these. + // // Todo: stress mode... // if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0) && (JitConfig.TC_PartialCompilation() > 0) && diff --git a/src/tests/JIT/opt/OSR/handlerloop.cs b/src/tests/JIT/opt/OSR/handlerloop.cs new file mode 100644 index 0000000000000..2009d3fb612f1 --- /dev/null +++ b/src/tests/JIT/opt/OSR/handlerloop.cs @@ -0,0 +1,31 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; + +// OSR can't bail us out of a loop in a handler +// +class OSRHandlerLoop +{ + public static int Main() + { + int result = 0; + int expected = 704982705; + try + { + result++; + expected = 100000; + } + finally + { + for (int i = 0; i < 100_000; i++) + { + result += i; + } + } + + Console.WriteLine($"{result} expected {expected}"); + + return (result == expected) ? 100 : -1; + } +} diff --git a/src/tests/JIT/opt/OSR/handlerloop.csproj b/src/tests/JIT/opt/OSR/handlerloop.csproj new file mode 100644 index 0000000000000..9620f75474a93 --- /dev/null +++ b/src/tests/JIT/opt/OSR/handlerloop.csproj @@ -0,0 +1,24 @@ + + + Exe + + True + + + + + + + + + From 38a4b63e3859bec4dcc67a679737043c02b6e695 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 19 Nov 2021 14:04:09 -0800 Subject: [PATCH 2/5] fix qjfl check --- src/coreclr/jit/compiler.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 3eef552fe092f..26436b8b3b136 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -6446,7 +6446,7 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr, // const char* reason = nullptr; - if ((info.compFlags & CORINFO_FLG_DISABLE_TIER0_FOR_LOOPS) == 0) + if ((info.compFlags & CORINFO_FLG_DISABLE_TIER0_FOR_LOOPS) != 0) { if (compHasBackwardJump) { From 6af6ef2b7b0874f154039f4f917b870aa342615a Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 19 Nov 2021 15:32:34 -0800 Subject: [PATCH 3/5] Revise interaction of QJFL, OSR, BBINSTR, and explicit tail calls. This should replicate the current logic where we alwatys optimize methods with explicit tail calls unless we're instrumenting them. To make this work with OSR we simply won't put patchpoints in methods with explicit tail calls, instead trusting that Tiering can get us to optimized versions. --- src/coreclr/jit/compiler.cpp | 18 ++++++++++++------ src/coreclr/jit/compiler.hpp | 4 ---- src/coreclr/jit/importer.cpp | 35 ++++++++++++++++++++++------------- 3 files changed, 34 insertions(+), 23 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 26436b8b3b136..279193df047b1 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -6437,8 +6437,14 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr, // assert(opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0)); + // Normal tiering should bail us out of Tier0 tail call induced loops. + // So keep these methods in Tier0 if we're gathering PGO data. + // If we're not gathering PGO, then switch these to optimized to + // minimize the number of tail call helper stubs we might need. + // Reconsider this if/when we're able to share those stubs. + // // Honor the config setting that tells the jit to - // always optimize methods with loops or explicit tail calls. + // always optimize methods with loops. // // If that's not set, and OSR is enabled, the jit may still // decide to optimize, if there's something in the method that @@ -6446,16 +6452,16 @@ int Compiler::compCompileHelper(CORINFO_MODULE_HANDLE classPtr, // const char* reason = nullptr; - if ((info.compFlags & CORINFO_FLG_DISABLE_TIER0_FOR_LOOPS) != 0) + if (compTailPrefixSeen && !opts.jitFlags->IsSet(JitFlags::JIT_FLAG_BBINSTR)) + { + reason = "tail.call and not BBINSTR"; + } + else if ((info.compFlags & CORINFO_FLG_DISABLE_TIER0_FOR_LOOPS) != 0) { if (compHasBackwardJump) { reason = "loop"; } - else if (compTailPrefixSeen) - { - reason = "tail.call"; - } } else if (JitConfig.TC_OnStackReplacement() > 0) { diff --git a/src/coreclr/jit/compiler.hpp b/src/coreclr/jit/compiler.hpp index ba99ea4d001cd..ed5f4475e5b7c 100644 --- a/src/coreclr/jit/compiler.hpp +++ b/src/coreclr/jit/compiler.hpp @@ -4740,10 +4740,6 @@ inline bool Compiler::compCanHavePatchpoints(const char** reason) { whyNot = "loop in handler"; } - else if (compTailPrefixSeen) - { - whyNot = "tail.call"; - } else if (opts.IsReversePInvoke()) { whyNot = "reverse pinvoke"; diff --git a/src/coreclr/jit/importer.cpp b/src/coreclr/jit/importer.cpp index d73c70ffe4381..3e37f7b7d5436 100644 --- a/src/coreclr/jit/importer.cpp +++ b/src/coreclr/jit/importer.cpp @@ -11598,22 +11598,31 @@ void Compiler::impImportBlockCode(BasicBlock* block) // if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0) && (JitConfig.TC_OnStackReplacement() > 0)) { - assert(compCanHavePatchpoints()); - - // We don't inline at Tier0, if we do, we may need rethink our approach. - // Could probably support inlines that don't introduce flow. - assert(!compIsForInlining()); - - // Is the start of this block a suitable patchpoint? + // OSR is not yet supported for methods with explicit tail calls. // - if (((block->bbFlags & BBF_BACKWARD_JUMP_TARGET) != 0) && (verCurrentState.esStackDepth == 0)) + // But we also may not switch methods to be optimized as we should be + // able to avoid getting trapped in Tier0 code by normal call counting. + // So instead, just suppress adding patchpoints. + // + if (!compTailPrefixSeen) { - // We should have noted this earlier and bailed out of OSR. + assert(compCanHavePatchpoints()); + + // We don't inline at Tier0, if we do, we may need rethink our approach. + // Could probably support inlines that don't introduce flow. + assert(!compIsForInlining()); + + // Is the start of this block a suitable patchpoint? // - assert(!block->hasHndIndex()); + if (((block->bbFlags & BBF_BACKWARD_JUMP_TARGET) != 0) && (verCurrentState.esStackDepth == 0)) + { + // We should have noted this earlier and bailed out of OSR. + // + assert(!block->hasHndIndex()); - block->bbFlags |= BBF_PATCHPOINT; - setMethodHasPatchpoint(); + block->bbFlags |= BBF_PATCHPOINT; + setMethodHasPatchpoint(); + } } } } @@ -11638,7 +11647,7 @@ void Compiler::impImportBlockCode(BasicBlock* block) // Todo: stress mode... // if (opts.jitFlags->IsSet(JitFlags::JIT_FLAG_TIER0) && (JitConfig.TC_PartialCompilation() > 0) && - compCanHavePatchpoints()) + compCanHavePatchpoints() && !compTailPrefixSeen) { // Is this block a good place for partial compilation? // From 78d5ad9aa19ad168f7e11da1f4ee1b47823415a0 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 19 Nov 2021 17:14:59 -0800 Subject: [PATCH 4/5] fix test --- src/tests/JIT/opt/OSR/handlerloop.cs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/tests/JIT/opt/OSR/handlerloop.cs b/src/tests/JIT/opt/OSR/handlerloop.cs index 2009d3fb612f1..01429b553c139 100644 --- a/src/tests/JIT/opt/OSR/handlerloop.cs +++ b/src/tests/JIT/opt/OSR/handlerloop.cs @@ -10,11 +10,11 @@ class OSRHandlerLoop public static int Main() { int result = 0; - int expected = 704982705; + int expected = 0; try { result++; - expected = 100000; + expected = 704982705; } finally { From 958df7819edb507b1617885fd0c685b0e6f1869c Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sat, 20 Nov 2021 09:12:48 -0800 Subject: [PATCH 5/5] clear don't remove on OSR step block --- src/coreclr/jit/fgopt.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 428291b91c2f2..a41724f8ef42a 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -1523,6 +1523,7 @@ void Compiler::fgPostImportationCleanup() BasicBlock* const newBlock = fgSplitBlockAtBeginning(fromBlock); fromBlock->bbFlags |= BBF_INTERNAL; + newBlock->bbFlags &= ~BBF_DONT_REMOVE; GenTree* const entryStateLcl = gtNewLclvNode(entryStateVar, TYP_INT); GenTree* const compareEntryStateToZero =