From 1f303a13e74ed05a1e024a54e64af62a92533e0c Mon Sep 17 00:00:00 2001 From: SingleAccretion <62474226+SingleAccretion@users.noreply.github.com> Date: Fri, 27 May 2022 21:33:44 +0300 Subject: [PATCH] Do not recost and rethread trees inside `optRemoveRangeCheck` (#69895) * Do not set order in "optRemoveRangeCheck" All but one caller of the method (RangeCheck) already do so in their "for (GenTree* node : stmt->TreeList())" loops, so it is not necessary. Additionally, re-threading the statement, when combined with "gtSetEvalOrder", can have the consequence of redirecting said loops, possibly causing them to miss some trees, which was observed in early propagation when working on removing "GT_INDEX". A few small diffs because we no longer recost when removing range checks in loop cloning, which is generally not necessary because cloning runs before the "global" costing is performed, except there is one quirk in "gtSetEvalOrder" which was looking as "compCurStmt", and that is not set during "fgSetBlockOrder". * Work around a TP regression Gets us back 0.13% (!!!) instructions according to PIN. --- src/coreclr/jit/compiler.h | 6 ++++-- src/coreclr/jit/optimizer.cpp | 19 ++++++------------- src/coreclr/jit/rangecheck.cpp | 15 +++++++++++++-- src/coreclr/jit/rangecheck.h | 3 +++ 4 files changed, 26 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index aee837012579c..b4cfe60a692d9 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5494,7 +5494,6 @@ class Compiler GenTree* fgInsertCommaFormTemp(GenTree** ppTree, CORINFO_CLASS_HANDLE structType = nullptr); GenTree* fgMakeMultiUse(GenTree** ppTree, CORINFO_CLASS_HANDLE structType = nullptr); -private: // Recognize a bitwise rotation pattern and convert into a GT_ROL or a GT_ROR node. GenTree* fgRecognizeAndMorphBitwiseRotation(GenTree* tree); bool fgOperIsBitwiseRotationRoot(genTreeOps oper); @@ -5506,8 +5505,11 @@ class Compiler #endif //-------- Determine the order in which the trees will be evaluated ------- - GenTree* fgSetTreeSeq(GenTree* tree, bool isLIR = false); +public: void fgSetStmtSeq(Statement* stmt); + +private: + GenTree* fgSetTreeSeq(GenTree* tree, bool isLIR = false); void fgSetBlockOrder(BasicBlock* block); //------------------------- Morphing -------------------------------------- diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 423c72d08c893..6006907e6933e 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -8107,10 +8107,12 @@ void Compiler::AddModifiedElemTypeAllContainingLoops(unsigned lnum, CORINFO_CLAS // Return Value: // Rewritten "check" - no-op if it has no side effects or the tree that contains them. // -// Assumptions: -// This method is capable of removing checks of two kinds: COMMA-based and standalone top-level ones. -// In case of a COMMA-based check, "check" must be a non-null first operand of a non-null COMMA. -// In case of a standalone check, "comma" must be null and "check" - "stmt"'s root. +// Notes: +// This method is capable of removing checks of two kinds: COMMA-based and standalone top-level +// ones. In case of a COMMA-based check, "check" must be a non-null first operand of a non-null +// COMMA. In case of a standalone check, "comma" must be null and "check" - "stmt"'s root. +// +// Does not keep costs or node threading up to date, but does update side effect flags. // GenTree* Compiler::optRemoveRangeCheck(GenTreeBoundsChk* check, GenTree* comma, Statement* stmt) { @@ -8165,15 +8167,6 @@ GenTree* Compiler::optRemoveRangeCheck(GenTreeBoundsChk* check, GenTree* comma, gtUpdateSideEffects(stmt, tree); - // Recalculate the GetCostSz(), etc... - gtSetStmtInfo(stmt); - - // Re-thread the nodes if necessary - if (fgStmtListThreaded) - { - fgSetStmtSeq(stmt); - } - #ifdef DEBUG if (verbose) { diff --git a/src/coreclr/jit/rangecheck.cpp b/src/coreclr/jit/rangecheck.cpp index c4e7b8adc559f..c5afd07a3ffeb 100644 --- a/src/coreclr/jit/rangecheck.cpp +++ b/src/coreclr/jit/rangecheck.cpp @@ -24,6 +24,7 @@ RangeCheck::RangeCheck(Compiler* pCompiler) , m_pCompiler(pCompiler) , m_alloc(pCompiler->getAllocator(CMK_RangeCheck)) , m_nVisitBudget(MAX_VISIT_BUDGET) + , m_updateStmt(false) { } @@ -255,6 +256,7 @@ void RangeCheck::OptimizeRangeCheck(BasicBlock* block, Statement* stmt, GenTree* { JITDUMP("Removing range check\n"); m_pCompiler->optRemoveRangeCheck(bndsChk, comma, stmt); + m_updateStmt = true; return; } } @@ -296,8 +298,8 @@ void RangeCheck::OptimizeRangeCheck(BasicBlock* block, Statement* stmt, GenTree* { JITDUMP("[RangeCheck::OptimizeRangeCheck] Between bounds\n"); m_pCompiler->optRemoveRangeCheck(bndsChk, comma, stmt); + m_updateStmt = true; } - return; } void RangeCheck::Widen(BasicBlock* block, GenTree* tree, Range* pRange) @@ -1545,14 +1547,23 @@ void RangeCheck::OptimizeRangeChecks() { for (Statement* const stmt : block->Statements()) { + m_updateStmt = false; + for (GenTree* const tree : stmt->TreeList()) { - if (IsOverBudget()) + if (IsOverBudget() && !m_updateStmt) { return; } + OptimizeRangeCheck(block, stmt, tree); } + + if (m_updateStmt) + { + m_pCompiler->gtSetStmtInfo(stmt); + m_pCompiler->fgSetStmtSeq(stmt); + } } } } diff --git a/src/coreclr/jit/rangecheck.h b/src/coreclr/jit/rangecheck.h index 7161aa2d162c4..b6309b1dc1e74 100644 --- a/src/coreclr/jit/rangecheck.h +++ b/src/coreclr/jit/rangecheck.h @@ -650,4 +650,7 @@ class RangeCheck // The number of nodes for which range is computed throughout the current method. // When this limit is zero, we have exhausted all the budget to walk the ud-chain. int m_nVisitBudget; + + // Set to "true" whenever we remove a check and need to re-thread the statement. + bool m_updateStmt; };