diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 13a7791b18d6e..569fd27c076e4 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5935,13 +5935,11 @@ class Compiler VNSet* m_pHoistedInCurLoop; public: - // Value numbers of expressions that have been hoisted in parent loops in the loop nest. - VNSet m_hoistedInParentLoops; - // Value numbers of expressions that have been hoisted in the current (or most recent) loop in the nest. // Previous decisions on loop-invariance of value numbers in the current loop. VNSet m_curLoopVnInvariantCache; + // Get the VN cache for current loop VNSet* GetHoistedInCurLoop(Compiler* comp) { if (m_pHoistedInCurLoop == nullptr) @@ -5951,35 +5949,35 @@ class Compiler return m_pHoistedInCurLoop; } - VNSet* ExtractHoistedInCurLoop() + // Return the so far collected VNs in cache for current loop and reset it. + void ResetHoistedInCurLoop() { - VNSet* res = m_pHoistedInCurLoop; m_pHoistedInCurLoop = nullptr; - return res; + JITDUMP("Resetting m_pHoistedInCurLoop\n"); } LoopHoistContext(Compiler* comp) - : m_pHoistedInCurLoop(nullptr) - , m_hoistedInParentLoops(comp->getAllocatorLoopHoist()) - , m_curLoopVnInvariantCache(comp->getAllocatorLoopHoist()) + : m_pHoistedInCurLoop(nullptr), m_curLoopVnInvariantCache(comp->getAllocatorLoopHoist()) { } }; - // Do hoisting for loop "lnum" (an index into the optLoopTable), and all loops nested within it. - // Tracks the expressions that have been hoisted by containing loops by temporarily recording their - // value numbers in "m_hoistedInParentLoops". This set is not modified by the call. + // Do hoisting of all loops nested within loop "lnum" (an index into the optLoopTable), followed + // by the loop "lnum" itself. + // + // "m_pHoistedInCurLoop" helps a lot in eliminating duplicate expressions getting hoisted + // and reducing the count of total expressions hoisted out of loop. When calculating the + // profitability, we compare this with number of registers and hence, lower the number of expressions + // getting hoisted, better chances that they will get enregistered and CSE considering them. + // void optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt); // Do hoisting for a particular loop ("lnum" is an index into the optLoopTable.) - // Assumes that expressions have been hoisted in containing loops if their value numbers are in - // "m_hoistedInParentLoops". - // - void optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt); + // Returns the new preheaders created. + void optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt, BasicBlockList* existingPreHeaders); // Hoist all expressions in "blocks" that are invariant in loop "loopNum" (an index into the optLoopTable) - // outside of that loop. Exempt expressions whose value number is in "m_hoistedInParentLoops"; add VN's of hoisted - // expressions to "hoistInLoop". + // outside of that loop. void optHoistLoopBlocks(unsigned loopNum, ArrayStack* blocks, LoopHoistContext* hoistContext); // Return true if the tree looks profitable to hoist out of loop 'lnum'. @@ -6362,6 +6360,9 @@ class Compiler // A loop contains itself. bool optLoopContains(unsigned l1, unsigned l2) const; + // Returns the lpEntry for given preheader block of a loop + BasicBlock* optLoopEntry(BasicBlock* preHeader); + // Updates the loop table by changing loop "loopInd", whose head is required // to be "from", to be "to". Also performs this transformation for any // loop nested in "loopInd" that shares the same head as "loopInd". diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index b848d6400bca8..4cf5db0a250d0 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -3151,6 +3151,22 @@ bool Compiler::optLoopContains(unsigned l1, unsigned l2) const } } +//----------------------------------------------------------------------------- +// optLoopEntry: For a given preheader of a loop, returns the lpEntry. +// +// Arguments: +// preHeader -- preheader of a loop +// +// Returns: +// Corresponding loop entry block. +// +BasicBlock* Compiler::optLoopEntry(BasicBlock* preHeader) +{ + assert((preHeader->bbFlags & BBF_LOOP_PREHEADER) != 0); + + return (preHeader->bbJumpDest == nullptr) ? preHeader->bbNext : preHeader->bbJumpDest; +} + //----------------------------------------------------------------------------- // optUpdateLoopHead: Replace the `head` block of a loop in the loop table. // Considers all child loops that might share the same head (recursively). @@ -5992,6 +6008,7 @@ void Compiler::optPerformHoistExpr(GenTree* origExpr, BasicBlock* exprBb, unsign { printf("\nHoisting a copy of "); printTreeID(origExpr); + printf(" " FMT_VN, origExpr->gtVNPair.GetLiberal()); printf(" from " FMT_BB " into PreHeader " FMT_BB " for loop " FMT_LP " <" FMT_BB ".." FMT_BB ">:\n", exprBb->bbNum, optLoopTable[lnum].lpHead->bbNum, lnum, optLoopTable[lnum].lpTop->bbNum, optLoopTable[lnum].lpBottom->bbNum); @@ -6262,46 +6279,51 @@ void Compiler::optHoistLoopNest(unsigned lnum, LoopHoistContext* hoistCtxt) m_loopsConsidered++; #endif // LOOP_HOIST_STATS - optHoistThisLoop(lnum, hoistCtxt); - - VNSet* hoistedInCurLoop = hoistCtxt->ExtractHoistedInCurLoop(); + BasicBlockList* preHeadersOfChildLoops = nullptr; + BasicBlockList* firstPreHeader = nullptr; if (optLoopTable[lnum].lpChild != BasicBlock::NOT_IN_LOOP) { - // Add the ones hoisted in "lnum" to "hoistedInParents" for any nested loops. - // TODO-Cleanup: we should have a set abstraction for loops. - if (hoistedInCurLoop != nullptr) - { - for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys) - { -#ifdef DEBUG - bool b; - assert(!hoistCtxt->m_hoistedInParentLoops.Lookup(keys.Get(), &b)); -#endif - hoistCtxt->m_hoistedInParentLoops.Set(keys.Get(), true); - } - } + BitVecTraits m_visitedTraits(fgBBNumMax * 2, this); + BitVec m_visited(BitVecOps::MakeEmpty(&m_visitedTraits)); for (unsigned child = optLoopTable[lnum].lpChild; child != BasicBlock::NOT_IN_LOOP; child = optLoopTable[child].lpSibling) { optHoistLoopNest(child, hoistCtxt); - } - // Now remove them. - // TODO-Cleanup: we should have a set abstraction for loops. - if (hoistedInCurLoop != nullptr) - { - for (VNSet::KeyIterator keys = hoistedInCurLoop->Begin(); !keys.Equal(hoistedInCurLoop->End()); ++keys) + if (optLoopTable[child].lpFlags & LPFLG_HAS_PREHEAD) { - // Note that we asserted when we added these that they hadn't been members, so removing is appropriate. - hoistCtxt->m_hoistedInParentLoops.Remove(keys.Get()); + // If any preheaders were found, add them to the tracking list + + BasicBlock* preHeaderBlock = optLoopTable[child].lpHead; + if (!BitVecOps::IsMember(&m_visitedTraits, m_visited, preHeaderBlock->bbNum)) + { + BitVecOps::AddElemD(&m_visitedTraits, m_visited, preHeaderBlock->bbNum); + JITDUMP(" PREHEADER: " FMT_BB "\n", preHeaderBlock->bbNum); + + // Here, we are arranging the blocks in reverse execution order, so when they are pushed + // on the stack that hoist these blocks further sees them in execution order. + if (firstPreHeader == nullptr) + { + preHeadersOfChildLoops = new (this, CMK_LoopHoist) BasicBlockList(preHeaderBlock, nullptr); + firstPreHeader = preHeadersOfChildLoops; + } + else + { + preHeadersOfChildLoops->next = + new (this, CMK_LoopHoist) BasicBlockList(preHeaderBlock, nullptr); + preHeadersOfChildLoops = preHeadersOfChildLoops->next; + } + } } } } + + optHoistThisLoop(lnum, hoistCtxt, firstPreHeader); } -void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt) +void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt, BasicBlockList* existingPreHeaders) { LoopDsc* pLoopDsc = &optLoopTable[lnum]; @@ -6413,24 +6435,61 @@ void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt) // or side-effect dependent things. // // We really should consider hoisting from conditionally executed blocks, if they are frequently executed - // and it is safe to evaluate the tree early. - // - // In particular if we have a loop nest, when scanning the outer loop we should consider hoisting from blocks - // in enclosed loops. However, this is likely to scale poorly, and we really should instead start - // hoisting inner to outer. + // and it is safe to evaluate the tree early // ArrayStack defExec(getAllocatorLoopHoist()); + + bool pushAllPreheaders = false; + if (pLoopDsc->lpExitCnt == 1) { assert(pLoopDsc->lpExit != nullptr); - JITDUMP(" Only considering hoisting in blocks that dominate exit block " FMT_BB "\n", pLoopDsc->lpExit->bbNum); - BasicBlock* cur = pLoopDsc->lpExit; + JITDUMP(" Considering hoisting in blocks that either dominate exit block " FMT_BB + " or preheaders of nested loops, if any:\n", + pLoopDsc->lpExit->bbNum); + // Push dominators, until we reach "entry" or exit the loop. + // Also push the preheaders that were added for the nested loops, + // if any, along the way such that in the final list, the dominating + // blocks are visited before the dominated blocks. + // + // TODO-CQ: In future, we should create preheaders upfront before building + // dominators so we don't have to do this extra work here. + BasicBlock* cur = pLoopDsc->lpExit; + BasicBlockList* preHeadersList = existingPreHeaders; + while (cur != nullptr && pLoopDsc->lpContains(cur) && cur != pLoopDsc->lpEntry) { + JITDUMP(" -- " FMT_BB " (dominate exit block)\n", cur->bbNum); defExec.Push(cur); cur = cur->bbIDom; + + if (preHeadersList != nullptr) + { + BasicBlock* preHeaderBlock = preHeadersList->block; + BasicBlock* lpEntry = optLoopEntry(preHeaderBlock); + if (cur->bbNum < lpEntry->bbNum) + { + JITDUMP(" -- " FMT_BB " (preheader of " FMT_LP ")\n", preHeaderBlock->bbNum, + lpEntry->bbNatLoopNum); + + defExec.Push(preHeaderBlock); + preHeadersList = preHeadersList->next; + } + } + } + + // Push the remaining preheaders, if any. This usually will happen if entry + // and exit blocks of lnum is same. + while (preHeadersList != nullptr) + { + BasicBlock* preHeaderBlock = preHeadersList->block; + JITDUMP(" -- " FMT_BB " (preheader of " FMT_LP ")\n", preHeaderBlock->bbNum, + optLoopEntry(preHeaderBlock)->bbNatLoopNum); + defExec.Push(preHeaderBlock); + preHeadersList = preHeadersList->next; } + // If we didn't reach the entry block, give up and *just* push the entry block. if (cur != pLoopDsc->lpEntry) { @@ -6438,17 +6497,37 @@ void Compiler::optHoistThisLoop(unsigned lnum, LoopHoistContext* hoistCtxt) "block " FMT_BB "\n", pLoopDsc->lpEntry->bbNum); defExec.Reset(); + pushAllPreheaders = true; } - defExec.Push(pLoopDsc->lpEntry); } else // More than one exit { - JITDUMP(" only considering hoisting in entry block " FMT_BB "\n", pLoopDsc->lpEntry->bbNum); // We'll assume that only the entry block is definitely executed. // We could in the future do better. - defExec.Push(pLoopDsc->lpEntry); + + JITDUMP(" Considering hoisting in entry block " FMT_BB " because " FMT_LP " has more than one exit\n", + pLoopDsc->lpEntry->bbNum, lnum); + pushAllPreheaders = true; + } + + if (pushAllPreheaders) + { + // We will still push all the preheaders found. + BasicBlockList* preHeadersList = existingPreHeaders; + + while (preHeadersList != nullptr) + { + BasicBlock* preHeaderBlock = preHeadersList->block; + JITDUMP(" -- " FMT_BB " (preheader of " FMT_LP ")\n", preHeaderBlock->bbNum, + optLoopEntry(preHeaderBlock)->bbNatLoopNum); + defExec.Push(preHeaderBlock); + preHeadersList = preHeadersList->next; + } } + JITDUMP(" -- " FMT_BB " (entry block)\n", pLoopDsc->lpEntry->bbNum); + defExec.Push(pLoopDsc->lpEntry); + optHoistLoopBlocks(lnum, &defExec, hoistCtxt); } @@ -6765,6 +6844,24 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo return vnIsInvariant; } + bool IsHoistableOverExcepSibling(GenTree* node, bool siblingHasExcep) + { + JITDUMP(" [%06u]", dspTreeID(node)); + + if ((node->gtFlags & GTF_ALL_EFFECT) != 0) + { + // If the hoistable node has any side effects, make sure + // we don't hoist it past a sibling that throws any exception. + if (siblingHasExcep) + { + JITDUMP(" not hoistable: cannot move past node that throws exception.\n"); + return false; + } + } + JITDUMP(" hoistable\n"); + return true; + } + //------------------------------------------------------------------------ // IsTreeLoopMemoryInvariant: determine if the value number of tree // is dependent on the tree being executed within the current loop @@ -6868,6 +6965,7 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo fgWalkResult PostOrderVisit(GenTree** use, GenTree* user) { GenTree* tree = *use; + JITDUMP("----- PostOrderVisit for [%06u]\n", dspTreeID(tree)); if (tree->OperIsLocal()) { @@ -7174,6 +7272,15 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo // cctor dependent node is initially not hoistable and may become hoistable later, // when its parent comma node is visited. // + // TODO-CQ: Ideally, we should be hoisting all the nodes having side-effects in execution + // order as well as the ones that don't have side-effects at all. However, currently, we + // just restrict hoisting a node(s) (that are children of `comma`) if one of the siblings + // (which is executed before the given node) has side-effects (exceptions). "Descendants + // of ancestors might have side-effects and we might hoist nodes past them. This needs + // to be addressed properly. + bool visitedCurr = false; + bool isCommaTree = tree->OperIs(GT_COMMA); + bool hasExcep = false; for (int i = 0; i < m_valueStack.Height(); i++) { Value& value = m_valueStack.BottomRef(i); @@ -7182,17 +7289,32 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo { assert(value.Node() != tree); + if (IsHoistableOverExcepSibling(value.Node(), hasExcep)) + { + m_compiler->optHoistCandidate(value.Node(), m_currentBlock, m_loopNum, m_hoistContext); + } + // Don't hoist this tree again. value.m_hoistable = false; value.m_invariant = false; - - m_compiler->optHoistCandidate(value.Node(), m_currentBlock, m_loopNum, m_hoistContext); } else if (value.Node() != tree) { + if (visitedCurr && isCommaTree) + { + // If we have visited current tree, now we are visiting children. + // For GT_COMMA nodes, we want to track if any children throws and + // should not hoist further children past it. + hasExcep = (tree->gtFlags & GTF_EXCEPT) != 0; + } JITDUMP(" [%06u] not %s: %s\n", dspTreeID(value.Node()), value.m_invariant ? "invariant" : "hoistable", value.m_failReason); } + else + { + visitedCurr = true; + JITDUMP(" [%06u] not hoistable : current node\n", dspTreeID(value.Node())); + } } } @@ -7236,6 +7358,8 @@ void Compiler::optHoistLoopBlocks(unsigned loopNum, ArrayStack* blo visitor.HoistBlock(block); } + + hoistContext->ResetHoistedInCurLoop(); } void Compiler::optHoistCandidate(GenTree* tree, BasicBlock* treeBb, unsigned lnum, LoopHoistContext* hoistCtxt) @@ -7249,17 +7373,12 @@ void Compiler::optHoistCandidate(GenTree* tree, BasicBlock* treeBb, unsigned lnu return; } - if (hoistCtxt->m_hoistedInParentLoops.Lookup(tree->gtVNPair.GetLiberal())) - { - JITDUMP(" ... already hoisted same VN in parent\n"); - // already hoisted in a parent loop, so don't hoist this expression. - return; - } - if (hoistCtxt->GetHoistedInCurLoop(this)->Lookup(tree->gtVNPair.GetLiberal())) { - JITDUMP(" ... already hoisted same VN in current\n"); // already hoisted this expression in the current loop, so don't hoist this expression. + + JITDUMP(" [%06u] ... already hoisted " FMT_VN " in " FMT_LP "\n ", dspTreeID(tree), + tree->gtVNPair.GetLiberal(), lnum); return; }