diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 3e762b987076b..8cf029f7d8c7d 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6868,6 +6868,8 @@ class Compiler // PhaseStatus optRedundantBranches(); bool optRedundantBranch(BasicBlock* const block); + bool optJumpThread(BasicBlock* const block, BasicBlock* const domBlock); + bool optReachable(BasicBlock* const fromBlock, BasicBlock* const toBlock); #if ASSERTION_PROP /************************************************************************** diff --git a/src/coreclr/jit/redundantbranchopts.cpp b/src/coreclr/jit/redundantbranchopts.cpp index f7584905cd244..df844db0eba83 100644 --- a/src/coreclr/jit/redundantbranchopts.cpp +++ b/src/coreclr/jit/redundantbranchopts.cpp @@ -11,16 +11,19 @@ // PhaseStatus Compiler::optRedundantBranches() { - // We attempt this "bottom up" so walk the flow graph in postorder. - // - bool madeChanges = false; - for (unsigned i = fgDomBBcount; i > 0; --i) +#if DEBUG + if (verbose) { - BasicBlock* const block = fgBBInvPostOrder[i]; + fgDispBasicBlocks(verboseTrees); + } +#endif // DEBUG - // Upstream phases like optOptimizeBools may remove blocks - // that are referenced in bbInvPosOrder. + bool madeChanges = false; + + for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) + { + // Skip over any removed blocks. // if ((block->bbFlags & BBF_REMOVED) != 0) { @@ -35,6 +38,20 @@ PhaseStatus Compiler::optRedundantBranches() } } + // Reset visited flags, in case we set any. + // + for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) + { + block->bbFlags &= ~BBF_VISITED; + } + +#if DEBUG + if (verbose && madeChanges) + { + fgDispBasicBlocks(verboseTrees); + } +#endif // DEBUG + return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } @@ -84,15 +101,8 @@ bool Compiler::optRedundantBranch(BasicBlock* const block) while (domBlock != nullptr) { - if (prevBlock == block) - { - JITDUMP("\nChecking " FMT_BB " for redundancy\n", block->bbNum); - } - // Check the current dominator // - JITDUMP(" ... checking dom " FMT_BB "\n", domBlock->bbNum); - if (domBlock->bbJumpKind == BBJ_COND) { Statement* const domJumpStmt = domBlock->lastStmt(); @@ -102,41 +112,45 @@ bool Compiler::optRedundantBranch(BasicBlock* const block) if (domCmpTree->OperKind() & GTK_RELOP) { - ValueNum domCmpVN = domCmpTree->GetVN(VNK_Conservative); + // We can use liberal VNs as bounds checks are not yet + // manifest explicitly as relops. + // + ValueNum domCmpVN = domCmpTree->GetVN(VNK_Liberal); // Note we could also infer the tree relop's value from similar relops higher in the dom tree. // For example, (x >= 0) dominating (x > 0), or (x < 0) dominating (x > 0). // // That is left as a future enhancement. // - if (domCmpVN == tree->GetVN(VNK_Conservative)) + if (domCmpVN == tree->GetVN(VNK_Liberal)) { - // Thes compare in "tree" is redundant. + // The compare in "tree" is redundant. // Is there a unique path from the dominating compare? + // + JITDUMP("\nDominator " FMT_BB " of " FMT_BB " has relop with same liberal VN:\n", domBlock->bbNum, + block->bbNum); + DISPTREE(domCmpTree); JITDUMP(" Redundant compare; current relop:\n"); DISPTREE(tree); - JITDUMP(" dominating relop in " FMT_BB " with same VN:\n", domBlock->bbNum); - DISPTREE(domCmpTree); - - BasicBlock* trueSuccessor = domBlock->bbJumpDest; - BasicBlock* falseSuccessor = domBlock->bbNext; - const bool trueReaches = fgReachable(trueSuccessor, block); - const bool falseReaches = fgReachable(falseSuccessor, block); + BasicBlock* const trueSuccessor = domBlock->bbJumpDest; + BasicBlock* const falseSuccessor = domBlock->bbNext; + const bool trueReaches = optReachable(trueSuccessor, block); + const bool falseReaches = optReachable(falseSuccessor, block); if (trueReaches && falseReaches) { // Both dominating compare outcomes reach the current block so we can't infer the // value of the relop. // - // If the dominating compare is close to the current compare, this may be a missed - // opportunity to tail duplicate. + // However we may be able to update the flow from block's predecessors so they + // bypass block and instead transfer control to jump's successors (aka jump threading). // - JITDUMP("Both successors of " FMT_BB " reach, can't optimize\n", domBlock->bbNum); + const bool wasThreaded = optJumpThread(block, domBlock); - if ((trueSuccessor->GetUniqueSucc() == block) || (falseSuccessor->GetUniqueSucc() == block)) + if (wasThreaded) { - JITDUMP("Perhaps we should have tail duplicated " FMT_BB "\n", block->bbNum); + return true; } } else if (trueReaches) @@ -161,8 +175,12 @@ bool Compiler::optRedundantBranch(BasicBlock* const block) { // No apparent path from the dominating BB. // - // If domBlock or block is in an EH handler we may fail to find a path. - // Just ignore those cases. + // We should rarely see this given that optReachable is returning + // up to date results, but as we optimize we create unreachable blocks, + // and that can lead to cases where we can't find paths. That means we may be + // optimizing code that is now unreachable, but attempts to fix or avoid + // doing that lead to more complications, and it isn't that common. + // So we just tolerate it. // // No point in looking further up the tree. // @@ -182,7 +200,6 @@ bool Compiler::optRedundantBranch(BasicBlock* const block) // if (relopValue == -1) { - JITDUMP("Failed to find a suitable dominating compare, so we won't optimize\n"); return false; } @@ -219,3 +236,411 @@ bool Compiler::optRedundantBranch(BasicBlock* const block) fgMorphBlockStmt(block, stmt DEBUGARG(__FUNCTION__)); return true; } + +//------------------------------------------------------------------------ +// optJumpThread: try and bypass the current block by rerouting +// flow from predecessors directly to successors. +// +// Arguments: +// block - block with branch to optimize +// domBlock - a dominating block that has an equivalent branch +// +// Returns: +// True if the branch was optimized. +// +// Notes: +// +// A B A B +// \ / | | +// \ / | | +// block ==> | | +// / \ | | +// / \ | | +// C D C D +// +bool Compiler::optJumpThread(BasicBlock* const block, BasicBlock* const domBlock) +{ + assert(block->bbJumpKind == BBJ_COND); + assert(domBlock->bbJumpKind == BBJ_COND); + + // If the dominating block is not the immediate dominator + // we would need to duplicate a lot of code to thread + // the jumps. Pass for now. + // + if (domBlock != block->bbIDom) + { + JITDUMP(" -- not idom, so no threading\n"); + return false; + } + + JITDUMP("Both successors of IDom " FMT_BB " reach " FMT_BB " -- attempting jump threading\n", domBlock->bbNum, + block->bbNum); + + // Since flow is going to bypass block, make sure there + // is nothing in block that can cause a side effect. + // + // Note we neglect PHI assignments. This reflects a general lack of + // SSA update abilities in the jit. We really should update any uses + // of PHIs defined here with the corresponding PHI input operand. + // + // TODO: if block has side effects, for those predecessors that are + // favorable (ones that don't reach block via a critical edge), consider + // duplicating block's IR into the predecessor. This is the jump threading + // analog of the optimization we encourage via fgOptimizeUncondBranchToSimpleCond. + // + Statement* const lastStmt = block->lastStmt(); + + for (Statement* stmt = block->FirstNonPhiDef(); stmt != nullptr; stmt = stmt->GetNextStmt()) + { + GenTree* const tree = stmt->GetRootNode(); + + // We can ignore exception side effects in the jump tree. + // + // They are covered by the exception effects in the dominating compare. + // We know this because the VNs match and they encode exception states. + // + if ((tree->gtFlags & GTF_SIDE_EFFECT) != 0) + { + if (stmt == lastStmt) + { + assert(tree->OperIs(GT_JTRUE)); + if ((tree->gtFlags & GTF_SIDE_EFFECT) == GTF_EXCEPT) + { + // However, be conservative if block is in a try as we might not + // have a full picture of EH flow. + // + if (!block->hasTryIndex()) + { + // We will ignore the side effect on this tree. + // + continue; + } + } + } + + JITDUMP(FMT_BB " has side effects; no threading\n", block->bbNum); + return false; + } + } + + // In order to optimize we have to be able to determine which predecessors + // are correlated exclusively with a true value for block's relop, and which + // are correlated exclusively with a false value (aka true preds and false preds). + // + // To do this we try and follow the flow from domBlock to block; any block pred + // reachable from domBlock's true edge is a true pred, and vice versa. + // + // However, there are some exceptions: + // + // * It's possible for a pred to be reachable from both paths out of domBlock; + // if so, we can't jump thread that pred. + // + // * It's also possible that a pred can't branch directly to a successor as + // it might violate EH region constraints. Since this causes the same issues + // as an ambiguous pred we'll just classify these as ambiguous too. + // + // * It's also possible to have preds with implied eh flow to the current + // block, eg a catch return, and so we won't see either path reachable. + // We'll handle those as ambiguous as well. + // + // * It's also possible that the pred is a switch; we will treat switch + // preds as ambiguous as well. + // + // For true preds and false preds we can reroute flow. It may turn out that + // one of the preds falls through to block. We would prefer not to introduce + // a new block to allow changing that fall through to a jump, so if we have + // both a pred that is not a true pred, and a fall through, we defer optimizing + // the fall through pred as well. + // + // This creates an ordering issue, and to resolve it we have to walk the pred + // list twice. Classification of preds should be cheap so we just rerun the + // reachability checks twice as well. + // + int numPreds = 0; + int numAmbiguousPreds = 0; + int numTruePreds = 0; + int numFalsePreds = 0; + BasicBlock* uniqueTruePred = nullptr; + BasicBlock* uniqueFalsePred = nullptr; + BasicBlock* fallThroughPred = nullptr; + BasicBlock* const trueSuccessor = domBlock->bbJumpDest; + BasicBlock* const falseSuccessor = domBlock->bbNext; + BasicBlock* const trueTarget = block->bbJumpDest; + BasicBlock* const falseTarget = block->bbNext; + + for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext) + { + BasicBlock* const predBlock = pred->getBlock(); + numPreds++; + + // Treat switch preds as ambiguous for now. + // + if (predBlock->bbJumpKind == BBJ_SWITCH) + { + JITDUMP(FMT_BB " is a switch pred\n", predBlock->bbNum); + numAmbiguousPreds++; + continue; + } + + const bool isTruePred = + ((predBlock == domBlock) && (trueSuccessor == block)) || optReachable(trueSuccessor, predBlock); + const bool isFalsePred = + ((predBlock == domBlock) && (falseSuccessor == block)) || optReachable(falseSuccessor, predBlock); + + if (isTruePred == isFalsePred) + { + // Either both reach, or neither reaches. + // + // We should rarely see (false,false) given that optReachable is returning + // up to date results, but as we optimize we create unreachable blocks, + // and that can lead to cases where we can't find paths. That means we may be + // optimizing code that is now unreachable, but attempts to fix or avoid doing that + // lead to more complications, and it isn't that common. So we tolerate it. + // + JITDUMP(FMT_BB " is an ambiguous pred\n", predBlock->bbNum); + numAmbiguousPreds++; + continue; + } + + if (isTruePred) + { + if (!BasicBlock::sameEHRegion(predBlock, trueTarget)) + { + JITDUMP(FMT_BB " is an eh constrained pred\n", predBlock->bbNum); + numAmbiguousPreds++; + continue; + } + + if (numTruePreds == 0) + { + uniqueTruePred = predBlock; + } + else + { + uniqueTruePred = nullptr; + } + + numTruePreds++; + JITDUMP(FMT_BB " is a true pred\n", predBlock->bbNum); + } + else + { + assert(isFalsePred); + + if (!BasicBlock::sameEHRegion(predBlock, falseTarget)) + { + JITDUMP(FMT_BB " is an eh constrained pred\n", predBlock->bbNum); + numAmbiguousPreds++; + continue; + } + + if (numFalsePreds == 0) + { + uniqueFalsePred = predBlock; + } + else + { + uniqueFalsePred = nullptr; + } + + numFalsePreds++; + JITDUMP(FMT_BB " is a false pred\n", predBlock->bbNum); + } + + // Note if the true or false pred is the fall through pred. + // + if (predBlock->bbNext == block) + { + JITDUMP(FMT_BB " is the fall-through pred\n", predBlock->bbNum); + assert(fallThroughPred == nullptr); + fallThroughPred = predBlock; + } + } + + // All preds should have been classified. + // + assert(numPreds == numTruePreds + numFalsePreds + numAmbiguousPreds); + + if ((numTruePreds == 0) && (numFalsePreds == 0)) + { + // This is possible, but should be rare. + // + JITDUMP(FMT_BB " only has ambiguous preds, not optimizing\n", block->bbNum); + return false; + } + + if ((numAmbiguousPreds > 0) && (fallThroughPred != nullptr)) + { + JITDUMP(FMT_BB " has both ambiguous preds and a fall through pred, not optimizing\n", block->bbNum); + return false; + } + + // We should be good to go + // + JITDUMP("Optimizing via jump threading\n"); + + // Now reroute the flow from the predecessors. + // + // If there is a fall through pred, modify block by deleting the terminal + // jump statement, and update it to jump or fall through to the appropriate successor. + // Note this is just a refinement of pre-existing flow so no EH check is needed. + // + // All other predecessors must reach block via a jump. So we can update their + // flow directly by changing their jump targets to the appropriate successor, + // provided it's a permissable flow in our EH model. + // + for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext) + { + BasicBlock* const predBlock = pred->getBlock(); + + if (predBlock->bbJumpKind == BBJ_SWITCH) + { + // Skip over switch preds, they will continue to flow to block. + // + continue; + } + + const bool isTruePred = + ((predBlock == domBlock) && (trueSuccessor == block)) || optReachable(trueSuccessor, predBlock); + const bool isFalsePred = + ((predBlock == domBlock) && (falseSuccessor == block)) || optReachable(falseSuccessor, predBlock); + + if (isTruePred == isFalsePred) + { + // Skip over ambiguous preds, they will continue to flow to block. + // + continue; + } + + if (!BasicBlock::sameEHRegion(predBlock, isTruePred ? trueTarget : falseTarget)) + { + // Skip over eh constrained preds, they will continue to flow to block. + continue; + } + + // Is this the one and only unambiguous fall through pred? + // + if (predBlock->bbNext == block) + { + assert(predBlock == fallThroughPred); + + // No other pred can safely pass control through block. + // + assert(numAmbiguousPreds == 0); + + // Clean out the terminal branch statement; we are going to repurpose this block + // + Statement* lastStmt = block->lastStmt(); + fgRemoveStmt(block, lastStmt); + + if (isTruePred) + { + JITDUMP("Fall through flow from pred " FMT_BB " -> " FMT_BB " implies predicate true\n", + predBlock->bbNum, block->bbNum); + JITDUMP(" repurposing " FMT_BB " to always jump to " FMT_BB "\n", block->bbNum, trueTarget->bbNum); + fgRemoveRefPred(block->bbNext, block); + block->bbJumpKind = BBJ_ALWAYS; + } + else + { + assert(isFalsePred); + JITDUMP("Fall through flow from pred " FMT_BB " -> " FMT_BB " implies predicate false\n", + predBlock->bbNum, block->bbNum); + JITDUMP(" repurposing " FMT_BB " to always fall through to " FMT_BB "\n", block->bbNum, + falseTarget->bbNum); + fgRemoveRefPred(block->bbJumpDest, block); + block->bbJumpKind = BBJ_NONE; + } + } + else + { + assert(predBlock->bbNext != block); + if (isTruePred) + { + assert(!optReachable(falseSuccessor, predBlock)); + JITDUMP("Jump flow from pred " FMT_BB " -> " FMT_BB + " implies predicate true; we can safely redirect flow to be " FMT_BB " -> " FMT_BB "\n", + predBlock->bbNum, block->bbNum, predBlock->bbNum, trueTarget->bbNum); + + fgRemoveRefPred(block, predBlock); + fgReplaceJumpTarget(predBlock, trueTarget, block); + fgAddRefPred(trueTarget, predBlock); + } + else + { + assert(isFalsePred); + JITDUMP("Jump flow from pred " FMT_BB " -> " FMT_BB + " implies predicate false; we can safely redirect flow to be " FMT_BB " -> " FMT_BB "\n", + predBlock->bbNum, block->bbNum, predBlock->bbNum, falseTarget->bbNum); + + fgRemoveRefPred(block, predBlock); + fgReplaceJumpTarget(predBlock, falseTarget, block); + fgAddRefPred(falseTarget, predBlock); + falseTarget->bbFlags |= BBF_JMP_TARGET; + } + } + } + + // We optimized. + // + fgModified = true; + return true; +} + +//------------------------------------------------------------------------ +// optReachable: see if there's a path from one block to another, +// including paths involving EH flow. +// +// Arguments: +// fromBlock - staring block +// toBlock - ending block +// +// Returns: +// true if there is a path, false if there is no path +// +// Notes: +// Like fgReachable, but computed on demand (and so accurate given +// the current flow graph), and also considers paths involving EH. +// +// This may overstate "true" reachability in methods where there are +// finallies with multiple continuations. +// +bool Compiler::optReachable(BasicBlock* const fromBlock, BasicBlock* const toBlock) +{ + if (fromBlock == toBlock) + { + return true; + } + + for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->bbNext) + { + block->bbFlags &= ~BBF_VISITED; + } + + ArrayStack stack(getAllocator(CMK_Reachability)); + stack.Push(fromBlock); + + while (!stack.Empty()) + { + BasicBlock* const nextBlock = stack.Pop(); + nextBlock->bbFlags |= BBF_VISITED; + assert(nextBlock != toBlock); + + for (BasicBlock* succ : nextBlock->GetAllSuccs(this)) + { + if (succ == toBlock) + { + return true; + } + + if ((succ->bbFlags & BBF_VISITED) != 0) + { + continue; + } + + stack.Push(succ); + } + } + + return false; +}