From 46b4b6dd847ea6961dbcfd804209c4e6f3536a72 Mon Sep 17 00:00:00 2001 From: Bruce Forstall Date: Fri, 23 Apr 2021 12:55:52 -0700 Subject: [PATCH] Don't recompute preds lists during loop cloning Instead, add and modify the appropriate preds when the mechanical cloning is performed. This will preserve existing profile data on the edges. Contributes to #49030 No x86/x64 SPMI asm diffs. --- src/coreclr/jit/compiler.h | 7 +- src/coreclr/jit/fgdiagnostic.cpp | 49 ++++---- src/coreclr/jit/fgflow.cpp | 10 +- src/coreclr/jit/fgopt.cpp | 7 +- src/coreclr/jit/flowgraph.cpp | 5 +- src/coreclr/jit/jithashtable.h | 2 +- src/coreclr/jit/optimizer.cpp | 198 +++++++++++++++++++++++++------ 7 files changed, 208 insertions(+), 70 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 29e300d12e9d6..ee2abdb945f60 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -5268,7 +5268,7 @@ class Compiler // When the flow graph changes, we need to update the block numbers, predecessor lists, reachability sets, and // dominators. - void fgUpdateChangedFlowGraph(bool computeDoms = true); + void fgUpdateChangedFlowGraph(const bool computePreds = true, const bool computeDoms = true); public: // Compute the predecessors of the blocks in the control flow graph. @@ -6534,8 +6534,9 @@ class Compiler void optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to); // Updates the successors of "blk": if "blk2" is a successor of "blk", and there is a mapping for "blk2->blk3" in - // "redirectMap", change "blk" so that "blk3" is this successor. Note that the predecessor lists are not updated. - void optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap); + // "redirectMap", change "blk" so that "blk3" is this successor. If `addPreds` is true, add predecessor edges + // for the block based on its new target, whether redirected or not. + void optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap, const bool addPreds = false); // Marks the containsCall information to "lnum" and any parent loops. void AddContainsCallAllContainingLoops(unsigned lnum); diff --git a/src/coreclr/jit/fgdiagnostic.cpp b/src/coreclr/jit/fgdiagnostic.cpp index 180d4cfc469e2..057e0d312f171 100644 --- a/src/coreclr/jit/fgdiagnostic.cpp +++ b/src/coreclr/jit/fgdiagnostic.cpp @@ -707,11 +707,24 @@ bool Compiler::fgDumpFlowGraph(Phases phase) fprintf(fgxFile, ">"); } + // In some cases, we want to change the display based on whether an edge is lexically backwards, forwards, + // or lexical successor. Also, for the region tree, using the lexical order is useful for determining where + // to insert in the tree, to determine nesting. We'd like to use the bbNum to do this. However, we don't + // want to renumber the blocks. So, create a mapping of bbNum to ordinal, and compare block order by + // comparing the mapped ordinals instead. + + unsigned blockOrdinal = 0; + unsigned* blkMap = new (this, CMK_DebugOnly) unsigned[fgBBNumMax + 1]; + memset(blkMap, 0, sizeof(unsigned) * (fgBBNumMax + 1)); + for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) + { + blkMap[block->bbNum] = blockOrdinal++; + } + static const char* kindImage[] = {"EHFINALLYRET", "EHFILTERRET", "EHCATCHRET", "THROW", "RETURN", "NONE", "ALWAYS", "LEAVE", "CALLFINALLY", "COND", "SWITCH"}; BasicBlock* block; - unsigned blockOrdinal; for (block = fgFirstBB, blockOrdinal = 1; block != nullptr; block = block->bbNext, blockOrdinal++) { if (createDotFile) @@ -840,13 +853,13 @@ bool Compiler::fgDumpFlowGraph(Phases phase) const char* sep = ""; - if (bSource->bbNum > bTarget->bbNum) + if (blkMap[bSource->bbNum] > blkMap[bTarget->bbNum]) { // Lexical backedge fprintf(fgxFile, " [color=green"); sep = ", "; } - else if ((bSource->bbNum + 1) == bTarget->bbNum) + else if ((blkMap[bSource->bbNum] + 1) == blkMap[bTarget->bbNum]) { // Lexical successor fprintf(fgxFile, " [color=blue, weight=20"); @@ -953,12 +966,12 @@ bool Compiler::fgDumpFlowGraph(Phases phase) { BasicBlock* const bTarget = bSource->GetSucc(i); fprintf(fgxFile, " " FMT_BB " -> " FMT_BB, bSource->bbNum, bTarget->bbNum); - if (bSource->bbNum > bTarget->bbNum) + if (blkMap[bSource->bbNum] > blkMap[bTarget->bbNum]) { // Lexical backedge fprintf(fgxFile, " [color=green]\n"); } - else if ((bSource->bbNum + 1) == bTarget->bbNum) + else if ((blkMap[bSource->bbNum] + 1) == blkMap[bTarget->bbNum]) { // Lexical successor fprintf(fgxFile, " [color=blue]\n"); @@ -1027,24 +1040,11 @@ bool Compiler::fgDumpFlowGraph(Phases phase) }; public: - RegionGraph(Compiler* comp) : m_comp(comp), m_rgnRoot(nullptr) + RegionGraph(Compiler* comp, unsigned* blkMap) : m_comp(comp), m_rgnRoot(nullptr), m_blkMap(blkMap) { // Create a root region that encompasses the whole function. - // We don't want to renumber the blocks, but it's useful to have a sequential number - // representing the lexical block order so we know where to insert a block range - // in the region tree. To do this, create a mapping of bbNum to ordinal, and compare - // block order by comparing the mapped ordinals. - m_rgnRoot = new (m_comp, CMK_DebugOnly) Region(RegionType::Root, "Root", comp->fgFirstBB, comp->fgLastBB); - - unsigned bbOrdinal = 0; - m_blkMap = new (m_comp, CMK_DebugOnly) unsigned[comp->fgBBNumMax + 1]; - memset(m_blkMap, 0, sizeof(unsigned) * (comp->fgBBNumMax + 1)); - for (BasicBlock* block = comp->fgFirstBB; block != nullptr; block = block->bbNext) - { - m_blkMap[block->bbNum] = bbOrdinal++; - } } //------------------------------------------------------------------------ @@ -1353,7 +1353,7 @@ bool Compiler::fgDumpFlowGraph(Phases phase) // Define the region graph object. We'll add regions to this, then output the graph. - RegionGraph rgnGraph(this); + RegionGraph rgnGraph(this, blkMap); // Add the EH regions to the region graph. An EH region consists of a region for the // `try`, a region for the handler, and, for filter/filter-handlers, a region for the @@ -2071,7 +2071,7 @@ class BBPredsChecker bool CheckEhTryDsc(BasicBlock* block, BasicBlock* blockPred, EHblkDsc* ehTryDsc); bool CheckEhHndDsc(BasicBlock* block, BasicBlock* blockPred, EHblkDsc* ehHndlDsc); bool CheckJump(BasicBlock* blockPred, BasicBlock* block); - bool CheckEHFinalyRet(BasicBlock* blockPred, BasicBlock* block); + bool CheckEHFinallyRet(BasicBlock* blockPred, BasicBlock* block); private: Compiler* comp; @@ -2130,7 +2130,7 @@ unsigned BBPredsChecker::CheckBBPreds(BasicBlock* block, unsigned curTraversalSt assert(CheckJump(blockPred, block)); } - // Make sure preds are in increasting BBnum order + // Make sure preds are in increasing BBnum order // assert(block->checkPredListOrder()); @@ -2232,7 +2232,7 @@ bool BBPredsChecker::CheckJump(BasicBlock* blockPred, BasicBlock* block) return true; case BBJ_EHFINALLYRET: - assert(CheckEHFinalyRet(blockPred, block)); + assert(CheckEHFinallyRet(blockPred, block)); return true; case BBJ_THROW: @@ -2265,9 +2265,8 @@ bool BBPredsChecker::CheckJump(BasicBlock* blockPred, BasicBlock* block) return false; } -bool BBPredsChecker::CheckEHFinalyRet(BasicBlock* blockPred, BasicBlock* block) +bool BBPredsChecker::CheckEHFinallyRet(BasicBlock* blockPred, BasicBlock* block) { - // If the current block is a successor to a BBJ_EHFINALLYRET (return from finally), // then the lexically previous block should be a call to the same finally. // Verify all of that. diff --git a/src/coreclr/jit/fgflow.cpp b/src/coreclr/jit/fgflow.cpp index e3b6869f11655..fda7f90145181 100644 --- a/src/coreclr/jit/fgflow.cpp +++ b/src/coreclr/jit/fgflow.cpp @@ -686,7 +686,7 @@ void Compiler::fgRemovePreds() // void Compiler::fgComputePreds() { - noway_assert(fgFirstBB); + noway_assert(fgFirstBB != nullptr); BasicBlock* block; @@ -697,6 +697,14 @@ void Compiler::fgComputePreds() fgDispBasicBlocks(); printf("\n"); } + + // Check that the block numbers are increasing order. + unsigned lastBBnum = fgFirstBB->bbNum; + for (BasicBlock* block = fgFirstBB->bbNext; block != nullptr; block = block->bbNext) + { + assert(lastBBnum < block->bbNum); + lastBBnum = block->bbNum; + } #endif // DEBUG // Reset everything pred related diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index b73fb634718f6..d0256fb95141e 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -174,7 +174,7 @@ bool Compiler::fgReachable(BasicBlock* b1, BasicBlock* b2) // Arguments: // computeDoms -- `true` if we should recompute dominators // -void Compiler::fgUpdateChangedFlowGraph(bool computeDoms) +void Compiler::fgUpdateChangedFlowGraph(const bool computePreds, const bool computeDoms) { // We need to clear this so we don't hit an assert calling fgRenumberBlocks(). fgDomsComputed = false; @@ -182,7 +182,10 @@ void Compiler::fgUpdateChangedFlowGraph(bool computeDoms) JITDUMP("\nRenumbering the basic blocks for fgUpdateChangeFlowGraph\n"); fgRenumberBlocks(); - fgComputePreds(); + if (computePreds) // This condition is only here until all phases don't require it. + { + fgComputePreds(); + } fgComputeEnterBlocksSet(); fgComputeReachabilitySets(); if (computeDoms) diff --git a/src/coreclr/jit/flowgraph.cpp b/src/coreclr/jit/flowgraph.cpp index 046f939772bd7..5c24e7725d3a4 100644 --- a/src/coreclr/jit/flowgraph.cpp +++ b/src/coreclr/jit/flowgraph.cpp @@ -190,8 +190,9 @@ PhaseStatus Compiler::fgInsertGCPolls() { noway_assert(opts.OptimizationEnabled()); fgReorderBlocks(); - constexpr bool computeDoms = false; - fgUpdateChangedFlowGraph(computeDoms); + constexpr bool computePreds = true; + constexpr bool computeDoms = false; + fgUpdateChangedFlowGraph(computePreds, computeDoms); } #ifdef DEBUG if (verbose) diff --git a/src/coreclr/jit/jithashtable.h b/src/coreclr/jit/jithashtable.h index 131f804a1127f..3713c4a19195d 100644 --- a/src/coreclr/jit/jithashtable.h +++ b/src/coreclr/jit/jithashtable.h @@ -471,7 +471,7 @@ class JitHashTable public: //------------------------------------------------------------------------ - // CheckGrowth: Replace the bucket table with a larger one and copy all nodes + // Reallocate: Replace the bucket table with a larger one and copy all nodes // from the existing bucket table. // // Notes: diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 67c26e26c4e5f..daf459faca5be 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2555,7 +2555,8 @@ void Compiler::optFindNaturalLoops() } if (mod) { - fgUpdateChangedFlowGraph(); + constexpr bool computePreds = true; + fgUpdateChangedFlowGraph(computePreds); } #ifdef DEBUG @@ -2606,20 +2607,26 @@ void Compiler::optIdentifyLoopsForAlignment() #endif } -void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap) +void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap, const bool addPreds) { BasicBlock* newJumpDest = nullptr; switch (blk->bbJumpKind) { case BBJ_THROW: case BBJ_RETURN: - case BBJ_NONE: case BBJ_EHFILTERRET: case BBJ_EHFINALLYRET: case BBJ_EHCATCHRET: // These have no jump destination to update. break; + case BBJ_NONE: + if (addPreds) + { + fgAddRefPred(blk->bbNext, blk); + } + break; + case BBJ_ALWAYS: case BBJ_LEAVE: case BBJ_CALLFINALLY: @@ -2629,6 +2636,15 @@ void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap) { blk->bbJumpDest = newJumpDest; } + if (addPreds) + { + fgAddRefPred(blk->bbJumpDest, blk); + if (blk->bbJumpKind == BBJ_COND) + { + // Add a pred for the fall-through path. + fgAddRefPred(blk->bbNext, blk); + } + } break; case BBJ_SWITCH: @@ -2636,11 +2652,17 @@ void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap) bool redirected = false; for (unsigned i = 0; i < blk->bbJumpSwt->bbsCount; i++) { - if (redirectMap->Lookup(blk->bbJumpSwt->bbsDstTab[i], &newJumpDest)) + BasicBlock* switchDest = blk->bbJumpSwt->bbsDstTab[i]; + if (redirectMap->Lookup(switchDest, &newJumpDest)) { + switchDest = newJumpDest; blk->bbJumpSwt->bbsDstTab[i] = newJumpDest; redirected = true; } + if (addPreds) + { + fgAddRefPred(switchDest, blk); + } } // If any redirections happend, invalidate the switch table map for the switch. if (redirected) @@ -3959,7 +3981,8 @@ void Compiler::optUnrollLoops() if (change) { - fgUpdateChangedFlowGraph(); + constexpr bool computePreds = true; + fgUpdateChangedFlowGraph(computePreds); } #ifdef DEBUG @@ -5481,7 +5504,7 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // We're going to transform this loop: // - // H --> E + // H --> E (or, H conditionally branches around the loop and has fall-through to F == T == E) // F // T // E @@ -5507,60 +5530,96 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) if (h->bbJumpKind != BBJ_NONE && h->bbJumpKind != BBJ_ALWAYS) { // Make a new block to be the unique entry to the loop. - assert(h->bbJumpKind == BBJ_COND && h->bbNext == loop.lpEntry); + JITDUMP("Create new unique single-successor entry to loop\n"); + assert((h->bbJumpKind == BBJ_COND) && (h->bbNext == loop.lpEntry)); BasicBlock* newH = fgNewBBafter(BBJ_NONE, h, /*extendRegion*/ true); - newH->bbWeight = newH->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight; + JITDUMP("Adding " FMT_BB " after " FMT_BB "\n", newH->bbNum, h->bbNum); + newH->bbWeight = newH->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight; BlockSetOps::Assign(this, newH->bbReach, h->bbReach); // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning. newH->bbNatLoopNum = ambientLoop; - h = newH; - optUpdateLoopHead(loopInd, loop.lpHead, h); + optUpdateLoopHead(loopInd, h, newH); + + fgAddRefPred(newH, h); // Add h->newH pred edge + JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", h->bbNum, newH->bbNum); + fgReplacePred(newH->bbNext, h, newH); // Replace pred in COND fall-through block. + JITDUMP("Replace " FMT_BB " -> " FMT_BB " with " FMT_BB " -> " FMT_BB "\n", h->bbNum, newH->bbNext->bbNum, + newH->bbNum, newH->bbNext->bbNum); + + h = newH; } + assert(h == loop.lpHead); - // First, make X2 after B, if necessary. (Not necessary if b is a BBJ_ALWAYS.) + // Make X2 after B, if necessary. (Not necessary if B is a BBJ_ALWAYS.) // "newPred" will be the predecessor of the blocks of the cloned loop. BasicBlock* b = loop.lpBottom; BasicBlock* newPred = b; if (b->bbJumpKind != BBJ_ALWAYS) { + assert(b->bbJumpKind == BBJ_COND); + BasicBlock* x = b->bbNext; if (x != nullptr) { + JITDUMP("Create branch around cloned loop\n"); BasicBlock* x2 = fgNewBBafter(BBJ_ALWAYS, b, /*extendRegion*/ true); - x2->bbWeight = x2->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight; + JITDUMP("Adding " FMT_BB " after " FMT_BB "\n", x2->bbNum, b->bbNum); + x2->bbWeight = x2->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight; // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning. x2->bbNatLoopNum = ambientLoop; x2->bbJumpDest = x; BlockSetOps::Assign(this, x2->bbReach, h->bbReach); + + fgAddRefPred(x2, b); // Add b->x2 pred edge + JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", b->bbNum, x2->bbNum); + fgReplacePred(x, b, x2); // The pred of x is now x2, not the fall-through of COND b. + JITDUMP("Replace " FMT_BB " -> " FMT_BB " with " FMT_BB " -> " FMT_BB "\n", b->bbNum, x->bbNum, x2->bbNum, + x->bbNum); + newPred = x2; } } // Now we'll make "h2", after "h" to go to "e" -- unless the loop is a do-while, // so that "h" already falls through to "e" (e == t == f). + // It might look like this code is unreachable, since "h" must be a BBJ_ALWAYS, but + // later we will change "h" to a BBJ_COND along with a set of loop conditions. + // TODO: it still might be unreachable, since cloning currently is restricted to "do-while" loop forms. BasicBlock* h2 = nullptr; - if (loop.lpHead->bbNext != loop.lpEntry) + if (h->bbNext != loop.lpEntry) { - BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, loop.lpHead, /*extendRegion*/ true); - h2->bbWeight = h2->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight; + assert(h->bbJumpKind == BBJ_ALWAYS); + JITDUMP("Create branch to entry of optimized loop\n"); + BasicBlock* h2 = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true); + JITDUMP("Adding " FMT_BB " after " FMT_BB "\n", h2->bbNum, h->bbNum); + h2->bbWeight = h2->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight; // This is in the scope of a surrounding loop, if one exists -- the parent of the loop we're cloning. h2->bbNatLoopNum = ambientLoop; h2->bbJumpDest = loop.lpEntry; - optUpdateLoopHead(loopInd, loop.lpHead, h2); + + fgAddRefPred(h2, h); // Add h->h2 pred edge + JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", h->bbNum, h2->bbNum); + fgReplacePred(loop.lpEntry, h, h2); + JITDUMP("Replace " FMT_BB " -> " FMT_BB " with " FMT_BB " -> " FMT_BB "\n", h->bbNum, loop.lpEntry->bbNum, + h2->bbNum, loop.lpEntry->bbNum); + + optUpdateLoopHead(loopInd, h, h2); + + // NOTE: 'h' is no longer the loop head; 'h2' is! } // Now we'll clone the blocks of the loop body. BasicBlock* newFirst = nullptr; - BasicBlock* newBot = nullptr; BlockToBlockMap* blockMap = new (getAllocator(CMK_LoopClone)) BlockToBlockMap(getAllocator(CMK_LoopClone)); for (BasicBlock* blk = loop.lpFirst; blk != loop.lpBottom->bbNext; blk = blk->bbNext) { BasicBlock* newBlk = fgNewBBafter(blk->bbJumpKind, newPred, /*extendRegion*/ true); + JITDUMP("Adding " FMT_BB " (copy of " FMT_BB ") after " FMT_BB "\n", newBlk->bbNum, blk->bbNum, newPred->bbNum); // Call CloneBlockState to make a copy of the block's statements (and attributes), and assert that it // has a return value indicating success, because optCanOptimizeByLoopCloningVisitor has already @@ -5568,6 +5627,10 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) bool cloneOk = BasicBlock::CloneBlockState(this, newBlk, blk); noway_assert(cloneOk); + // We're going to create the preds below, which will set the bbRefs properly, + // so clear out the cloned bbRefs field. + newBlk->bbRefs = 0; + #if FEATURE_LOOP_ALIGN // If the original loop is aligned, do not align the cloned loop because cloned loop will be executed in // rare scenario. Additionally, having to align cloned loop will force us to disable some VEX prefix encoding @@ -5588,7 +5651,6 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) { newFirst = newBlk; } - newBot = newBlk; // Continually overwrite to make sure we get the last one. newPred = newBlk; blockMap->Set(blk, newBlk); } @@ -5596,7 +5658,8 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // Perform the static optimizations on the fast path. optPerformStaticOptimizations(loopInd, context DEBUGARG(true)); - // Now go through the new blocks, remapping their jump targets within the loop. + // Now go through the new blocks, remapping their jump targets within the loop + // and updating the preds lists. for (BasicBlock* blk = loop.lpFirst; blk != loop.lpBottom->bbNext; blk = blk->bbNext) { BasicBlock* newblk = nullptr; @@ -5609,17 +5672,28 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) optCopyBlkDest(blk, newblk); // Now redirect the new block according to "blockMap". - optRedirectBlock(newblk, blockMap); - } + const bool addPreds = true; + optRedirectBlock(newblk, blockMap, addPreds); - assert((h->bbJumpKind == BBJ_NONE && (h->bbNext == h2 || h->bbNext == loop.lpEntry)) || - (h->bbJumpKind == BBJ_ALWAYS)); - - // If all the conditions are true, go to E2. - BasicBlock* e2 = nullptr; - bool foundIt = blockMap->Lookup(loop.lpEntry, &e2); + blk->ensurePredListOrder(this); + } - h->bbJumpKind = BBJ_COND; +#ifdef DEBUG + // Display the preds for the new blocks, after all the new blocks have been redirected. + JITDUMP("Preds after loop copy:\n"); + for (BasicBlock* blk = loop.lpFirst; blk != loop.lpBottom->bbNext; blk = blk->bbNext) + { + BasicBlock* newblk = nullptr; + bool b = blockMap->Lookup(blk, &newblk); + assert(b && newblk != nullptr); + JITDUMP(FMT_BB ":", newblk->bbNum); + for (flowList* pred = newblk->bbPreds; pred != nullptr; pred = pred->flNext) + { + JITDUMP(" " FMT_BB, pred->getBlock()->bbNum); + } + JITDUMP("\n"); + } +#endif // DEBUG // We will create the following structure // @@ -5634,15 +5708,55 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // We should always have block conditions, at the minimum, the array should be deref-able assert(context->HasBlockConditions(loopInd)); + if (h->bbJumpKind == BBJ_NONE) + { + assert((h->bbNext == h2) || (h->bbNext == loop.lpEntry)); + } + else + { + assert(h->bbJumpKind == BBJ_ALWAYS); + assert(h->bbJumpDest == loop.lpEntry); + } + + // If all the conditions are true, go to E2. + BasicBlock* e2 = nullptr; + bool foundIt = blockMap->Lookup(loop.lpEntry, &e2); + + // We're going to replace the fall-through path from "h". + if (h->bbJumpKind == BBJ_NONE) + { + fgRemoveRefPred(h->bbNext, h); + } + // Create a unique header for the slow path. - BasicBlock* slowHead = fgNewBBafter(BBJ_ALWAYS, h, true); + JITDUMP("Create unique head block for slow path loop\n"); + BasicBlock* slowHead = fgNewBBafter(BBJ_ALWAYS, h, /*extendRegion*/ true); + JITDUMP("Adding " FMT_BB " after " FMT_BB "\n", slowHead->bbNum, h->bbNum); slowHead->bbWeight = h->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight; slowHead->bbNatLoopNum = ambientLoop; slowHead->bbJumpDest = e2; + fgAddRefPred(slowHead, h); + JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", h->bbNum, slowHead->bbNum); + + // This is the only predecessor to the copied loop, and it hasn't been added yet. + fgAddRefPred(slowHead->bbJumpDest, slowHead); + JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", slowHead->bbNum, slowHead->bbJumpDest->bbNum); + + // "h" is now going to be a COND block + h->bbJumpKind = BBJ_COND; + BasicBlock* condLast = optInsertLoopChoiceConditions(context, loopInd, h, slowHead); condLast->bbJumpDest = slowHead; + JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", condLast->bbNum, condLast->bbJumpDest->bbNum); + fgAddRefPred(condLast->bbJumpDest, condLast); + + // Add the fall-through path pred. + assert(condLast->bbJumpKind == BBJ_COND); + JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", condLast->bbNum, condLast->bbNext->bbNum); + fgAddRefPred(condLast->bbNext, condLast); + // If h2 is present it is already the head or replace 'h' by 'condLast'. if (h2 == nullptr) { @@ -5656,7 +5770,8 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // loop to take. loop.lpFlags |= LPFLG_DONT_UNROLL; - fgUpdateChangedFlowGraph(); + constexpr bool computePreds = false; + fgUpdateChangedFlowGraph(computePreds); } //-------------------------------------------------------------------------------------------------- @@ -5668,8 +5783,8 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // head loop head for "loopNum" // slowHead the slow path loop head // -// Return Values: -// None. +// Return Value: +// The last conditional block inserted. // // Operation: // Create the following structure. @@ -5686,6 +5801,7 @@ void Compiler::optCloneLoop(unsigned loopInd, LoopCloneContext* context) // h2/entry (fast) // // Insert condition 0 in 'h' and create other condition blocks and insert conditions in them. +// On entry, block 'h' is a conditional block, but its bbJumpDest hasn't yet been set. // BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context, unsigned loopNum, @@ -5702,12 +5818,22 @@ BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context, bool isHeaderBlock = (curCond == head); // Flip the condition if header block. - context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, isHeaderBlock); + context->CondToStmtInBlock(this, *((*levelCond)[i]), curCond, /*reverse*/ isHeaderBlock); // Create each condition block ensuring wiring between them. - BasicBlock* tmp = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, true); + BasicBlock* tmp = fgNewBBafter(BBJ_COND, isHeaderBlock ? slowHead : curCond, /*extendRegion*/ true); curCond->bbJumpDest = isHeaderBlock ? tmp : slowHead; - curCond = tmp; + + JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", curCond->bbNum, curCond->bbJumpDest->bbNum); + fgAddRefPred(curCond->bbJumpDest, curCond); + + if (!isHeaderBlock) + { + JITDUMP("Adding " FMT_BB " -> " FMT_BB "\n", curCond->bbNum, tmp->bbNum); + fgAddRefPred(tmp, curCond); + } + + curCond = tmp; curCond->inheritWeight(head); curCond->bbNatLoopNum = head->bbNatLoopNum; @@ -5715,7 +5841,7 @@ BasicBlock* Compiler::optInsertLoopChoiceConditions(LoopCloneContext* context, } // Finally insert cloning conditions after all deref conditions have been inserted. - context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, false); + context->CondToStmtInBlock(this, *(context->GetConditions(loopNum)), curCond, /*reverse*/ false); return curCond; }