Skip to content

Commit 8fad985

Browse files
JIT: Run switch peeling only once (#113326)
Part of #107749. fgReorderBlocks runs switch peeling, which means we have both an early and a late pass of it; the former pass seems unnecessary. I've refactored our switch peeling pass to make it easier to incorporate into the switch recognition loop so that we only loop the block list once. I expected the refactor part of this PR to be no-diff, but it turns out we don't propagate the fgHasSwitch member of inlinee compilers to the root compiler, which pessimized the old switch peeling pass. These changes unlock switch peeling in a few more cases.
1 parent b9adf8d commit 8fad985

File tree

4 files changed

+122
-143
lines changed

4 files changed

+122
-143
lines changed

src/coreclr/jit/compiler.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -6184,7 +6184,7 @@ class Compiler
61846184

61856185
bool fgOptimizeSwitchBranches(BasicBlock* block);
61866186

6187-
bool fgOptimizeSwitchJumps();
6187+
void fgPeelSwitch(BasicBlock* block);
61886188
#ifdef DEBUG
61896189
void fgPrintEdgeWeights();
61906190
#endif

src/coreclr/jit/fginline.cpp

+11-6
Original file line numberDiff line numberDiff line change
@@ -1650,13 +1650,18 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
16501650
info.compNeedsConsecutiveRegisters |= InlineeCompiler->info.compNeedsConsecutiveRegisters;
16511651
#endif
16521652

1653-
// If the inlinee compiler encounters switch tables, disable hot/cold splitting in the root compiler.
1654-
// TODO-CQ: Implement hot/cold splitting of methods with switch tables.
1655-
if (InlineeCompiler->fgHasSwitch && opts.compProcedureSplitting)
1653+
if (InlineeCompiler->fgHasSwitch)
16561654
{
1657-
opts.compProcedureSplitting = false;
1658-
JITDUMP("Turning off procedure splitting for this method, as inlinee compiler encountered switch tables; "
1659-
"implementation limitation.\n");
1655+
fgHasSwitch = true;
1656+
1657+
// If the inlinee compiler encounters switch tables, disable hot/cold splitting in the root compiler.
1658+
// TODO-CQ: Implement hot/cold splitting of methods with switch tables.
1659+
if (opts.compProcedureSplitting)
1660+
{
1661+
opts.compProcedureSplitting = false;
1662+
JITDUMP("Turning off procedure splitting for this method, as inlinee compiler encountered switch tables; "
1663+
"implementation limitation.\n");
1664+
}
16601665
}
16611666

16621667
#ifdef FEATURE_SIMD

src/coreclr/jit/fgopt.cpp

+87-127
Original file line numberDiff line numberDiff line change
@@ -2825,140 +2825,113 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump)
28252825
}
28262826

28272827
//-----------------------------------------------------------------------------
2828-
// fgOptimizeSwitchJump: see if a switch has a dominant case, and modify to
2829-
// check for that case up front (aka switch peeling).
2828+
// fgPeelSwitch: Modify a switch to check for its dominant case up front.
28302829
//
2831-
// Returns:
2832-
// True if the switch now has an upstream check for the dominant case.
2830+
// Parameters:
2831+
// block - The switch block with a dominant case
28332832
//
2834-
bool Compiler::fgOptimizeSwitchJumps()
2833+
void Compiler::fgPeelSwitch(BasicBlock* block)
28352834
{
2836-
if (!fgHasSwitch)
2837-
{
2838-
return false;
2839-
}
2840-
2841-
bool modified = false;
2842-
2843-
for (BasicBlock* const block : Blocks())
2844-
{
2845-
// Lowering expands switches, so calling this method on lowered IR
2846-
// does not make sense.
2847-
//
2848-
assert(!block->IsLIR());
2849-
2850-
if (!block->KindIs(BBJ_SWITCH))
2851-
{
2852-
continue;
2853-
}
2854-
2855-
if (block->isRunRarely())
2856-
{
2857-
continue;
2858-
}
2859-
2860-
if (!block->GetSwitchTargets()->bbsHasDominantCase)
2861-
{
2862-
continue;
2863-
}
2835+
assert(block->KindIs(BBJ_SWITCH));
2836+
assert(block->GetSwitchTargets()->bbsHasDominantCase);
2837+
assert(!block->isRunRarely());
28642838

2865-
// We currently will only see dominant cases with PGO.
2866-
//
2867-
assert(block->hasProfileWeight());
2839+
// Lowering expands switches, so calling this method on lowered IR
2840+
// does not make sense.
2841+
//
2842+
assert(!block->IsLIR());
28682843

2869-
const unsigned dominantCase = block->GetSwitchTargets()->bbsDominantCase;
2844+
// We currently will only see dominant cases with PGO.
2845+
//
2846+
assert(block->hasProfileWeight());
28702847

2871-
JITDUMP(FMT_BB " has switch with dominant case %u, considering peeling\n", block->bbNum, dominantCase);
2848+
const unsigned dominantCase = block->GetSwitchTargets()->bbsDominantCase;
2849+
JITDUMP(FMT_BB " has switch with dominant case %u, considering peeling\n", block->bbNum, dominantCase);
28722850

2873-
// The dominant case should not be the default case, as we already peel that one.
2874-
//
2875-
assert(dominantCase < (block->GetSwitchTargets()->bbsCount - 1));
2876-
BasicBlock* const dominantTarget = block->GetSwitchTargets()->bbsDstTab[dominantCase]->getDestinationBlock();
2877-
Statement* const switchStmt = block->lastStmt();
2878-
GenTree* const switchTree = switchStmt->GetRootNode();
2879-
assert(switchTree->OperIs(GT_SWITCH));
2880-
GenTree* const switchValue = switchTree->AsOp()->gtGetOp1();
2881-
2882-
// Split the switch block just before at the switch.
2883-
//
2884-
// After this, newBlock is the switch block, and
2885-
// block is the upstream block.
2886-
//
2887-
BasicBlock* newBlock = nullptr;
2888-
2889-
if (block->firstStmt() == switchStmt)
2890-
{
2891-
newBlock = fgSplitBlockAtBeginning(block);
2892-
}
2893-
else
2894-
{
2895-
newBlock = fgSplitBlockAfterStatement(block, switchStmt->GetPrevStmt());
2896-
}
2851+
// The dominant case should not be the default case, as we already peel that one.
2852+
//
2853+
assert(dominantCase < (block->GetSwitchTargets()->bbsCount - 1));
2854+
BasicBlock* const dominantTarget = block->GetSwitchTargets()->bbsDstTab[dominantCase]->getDestinationBlock();
2855+
Statement* const switchStmt = block->lastStmt();
2856+
GenTree* const switchTree = switchStmt->GetRootNode();
2857+
assert(switchTree->OperIs(GT_SWITCH));
2858+
GenTree* const switchValue = switchTree->gtGetOp1();
2859+
2860+
// Split the switch block just before at the switch.
2861+
//
2862+
// After this, newBlock is the switch block, and
2863+
// block is the upstream block.
2864+
//
2865+
BasicBlock* newBlock = nullptr;
28972866

2898-
// Set up a compare in the upstream block, "stealing" the switch value tree.
2899-
//
2900-
GenTree* const dominantCaseCompare = gtNewOperNode(GT_EQ, TYP_INT, switchValue, gtNewIconNode(dominantCase));
2901-
GenTree* const jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, dominantCaseCompare);
2902-
Statement* const jmpStmt = fgNewStmtFromTree(jmpTree, switchStmt->GetDebugInfo());
2903-
fgInsertStmtAtEnd(block, jmpStmt);
2867+
if (block->firstStmt() == switchStmt)
2868+
{
2869+
newBlock = fgSplitBlockAtBeginning(block);
2870+
}
2871+
else
2872+
{
2873+
newBlock = fgSplitBlockAfterStatement(block, switchStmt->GetPrevStmt());
2874+
}
29042875

2905-
// Reattach switch value to the switch. This may introduce a comma
2906-
// in the upstream compare tree, if the switch value expression is complex.
2907-
//
2908-
switchTree->AsOp()->gtOp1 = fgMakeMultiUse(&dominantCaseCompare->AsOp()->gtOp1);
2876+
// Set up a compare in the upstream block, "stealing" the switch value tree.
2877+
//
2878+
GenTree* const dominantCaseCompare = gtNewOperNode(GT_EQ, TYP_INT, switchValue, gtNewIconNode(dominantCase));
2879+
GenTree* const jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, dominantCaseCompare);
2880+
Statement* const jmpStmt = fgNewStmtFromTree(jmpTree, switchStmt->GetDebugInfo());
2881+
fgInsertStmtAtEnd(block, jmpStmt);
29092882

2910-
// Update flags
2911-
//
2912-
switchTree->gtFlags = switchTree->AsOp()->gtOp1->gtFlags & GTF_ALL_EFFECT;
2913-
dominantCaseCompare->gtFlags |= dominantCaseCompare->AsOp()->gtOp1->gtFlags & GTF_ALL_EFFECT;
2914-
jmpTree->gtFlags |= dominantCaseCompare->gtFlags & GTF_ALL_EFFECT;
2915-
dominantCaseCompare->gtFlags |= GTF_RELOP_JMP_USED | GTF_DONT_CSE;
2883+
// Reattach switch value to the switch. This may introduce a comma
2884+
// in the upstream compare tree, if the switch value expression is complex.
2885+
//
2886+
switchTree->AsOp()->gtOp1 = fgMakeMultiUse(&dominantCaseCompare->AsOp()->gtOp1);
29162887

2917-
// Wire up the new control flow.
2918-
//
2919-
FlowEdge* const blockToTargetEdge = fgAddRefPred(dominantTarget, block);
2920-
FlowEdge* const blockToNewBlockEdge = newBlock->bbPreds;
2921-
block->SetCond(blockToTargetEdge, blockToNewBlockEdge);
2888+
// Update flags
2889+
//
2890+
switchTree->gtFlags = switchTree->gtGetOp1()->gtFlags & GTF_ALL_EFFECT;
2891+
dominantCaseCompare->gtFlags |= dominantCaseCompare->gtGetOp1()->gtFlags & GTF_ALL_EFFECT;
2892+
jmpTree->gtFlags |= dominantCaseCompare->gtFlags & GTF_ALL_EFFECT;
2893+
dominantCaseCompare->gtFlags |= GTF_RELOP_JMP_USED | GTF_DONT_CSE;
29222894

2923-
// Update profile data
2924-
//
2925-
const weight_t fraction = newBlock->GetSwitchTargets()->bbsDominantFraction;
2926-
const weight_t blockToTargetWeight = block->bbWeight * fraction;
2895+
// Wire up the new control flow.
2896+
//
2897+
FlowEdge* const blockToTargetEdge = fgAddRefPred(dominantTarget, block);
2898+
FlowEdge* const blockToNewBlockEdge = newBlock->bbPreds;
2899+
block->SetCond(blockToTargetEdge, blockToNewBlockEdge);
29272900

2928-
newBlock->decreaseBBProfileWeight(blockToTargetWeight);
2901+
// Update profile data
2902+
//
2903+
const weight_t fraction = newBlock->GetSwitchTargets()->bbsDominantFraction;
2904+
const weight_t blockToTargetWeight = block->bbWeight * fraction;
29292905

2930-
blockToTargetEdge->setLikelihood(fraction);
2931-
blockToNewBlockEdge->setLikelihood(max(0.0, 1.0 - fraction));
2906+
newBlock->decreaseBBProfileWeight(blockToTargetWeight);
29322907

2933-
JITDUMP("fgOptimizeSwitchJumps: Updated flow into " FMT_BB " needs to be propagated. Data %s inconsistent.\n",
2934-
newBlock->bbNum, fgPgoConsistent ? "is now" : "was already");
2935-
fgPgoConsistent = false;
2908+
blockToTargetEdge->setLikelihood(fraction);
2909+
blockToNewBlockEdge->setLikelihood(max(0.0, 1.0 - fraction));
29362910

2937-
// For now we leave the switch as is, since there's no way
2938-
// to indicate that one of the cases is now unreachable.
2939-
//
2940-
// But it no longer has a dominant case.
2941-
//
2942-
newBlock->GetSwitchTargets()->bbsHasDominantCase = false;
2911+
JITDUMP("fgPeelSwitch: Updated flow into " FMT_BB " needs to be propagated. Data %s inconsistent.\n",
2912+
newBlock->bbNum, fgPgoConsistent ? "is now" : "was already");
2913+
fgPgoConsistent = false;
29432914

2944-
if (fgNodeThreading == NodeThreading::AllTrees)
2945-
{
2946-
// The switch tree has been modified.
2947-
JITDUMP("Rethreading " FMT_STMT "\n", switchStmt->GetID());
2948-
gtSetStmtInfo(switchStmt);
2949-
fgSetStmtSeq(switchStmt);
2915+
// For now we leave the switch as is, since there's no way
2916+
// to indicate that one of the cases is now unreachable.
2917+
//
2918+
// But it no longer has a dominant case.
2919+
//
2920+
newBlock->GetSwitchTargets()->bbsHasDominantCase = false;
29502921

2951-
// fgNewStmtFromTree() already threaded the tree, but calling fgMakeMultiUse() might have
2952-
// added new nodes if a COMMA was introduced.
2953-
JITDUMP("Rethreading " FMT_STMT "\n", jmpStmt->GetID());
2954-
gtSetStmtInfo(jmpStmt);
2955-
fgSetStmtSeq(jmpStmt);
2956-
}
2922+
if (fgNodeThreading == NodeThreading::AllTrees)
2923+
{
2924+
// The switch tree has been modified.
2925+
JITDUMP("Rethreading " FMT_STMT "\n", switchStmt->GetID());
2926+
gtSetStmtInfo(switchStmt);
2927+
fgSetStmtSeq(switchStmt);
29572928

2958-
modified = true;
2929+
// fgNewStmtFromTree() already threaded the tree, but calling fgMakeMultiUse() might have
2930+
// added new nodes if a COMMA was introduced.
2931+
JITDUMP("Rethreading " FMT_STMT "\n", jmpStmt->GetID());
2932+
gtSetStmtInfo(jmpStmt);
2933+
fgSetStmtSeq(jmpStmt);
29592934
}
2960-
2961-
return modified;
29622935
}
29632936

29642937
//-----------------------------------------------------------------------------
@@ -3317,19 +3290,6 @@ bool Compiler::fgReorderBlocks(bool useProfile)
33173290
}
33183291
#endif // FEATURE_EH_WINDOWS_X86
33193292

3320-
//
3321-
// If we are using profile weights we can change some
3322-
// switch jumps into conditional test and jump
3323-
//
3324-
if (fgIsUsingProfileWeights())
3325-
{
3326-
optimizedSwitches = fgOptimizeSwitchJumps();
3327-
if (optimizedSwitches)
3328-
{
3329-
fgUpdateFlowGraph();
3330-
}
3331-
}
3332-
33333293
if (useProfile)
33343294
{
33353295
// Don't run the new layout until we get to the backend,

src/coreclr/jit/switchrecognition.cpp

+23-9
Original file line numberDiff line numberDiff line change
@@ -25,25 +25,39 @@ PhaseStatus Compiler::optRecognizeAndOptimizeSwitchJumps()
2525
{
2626
bool modified = false;
2727

28+
for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->Next())
29+
{
30+
if (block->isRunRarely())
31+
{
32+
continue;
33+
}
34+
2835
// Limit to XARCH, ARM is already doing a great job with such comparisons using
2936
// a series of ccmp instruction (see ifConvert phase).
3037
#ifdef TARGET_XARCH
31-
for (BasicBlock* block = fgFirstBB; block != nullptr; block = block->Next())
32-
{
3338
// block->KindIs(BBJ_COND) check is for better throughput.
34-
if (block->KindIs(BBJ_COND) && !block->isRunRarely() && optSwitchDetectAndConvert(block))
39+
if (block->KindIs(BBJ_COND) && optSwitchDetectAndConvert(block))
3540
{
3641
JITDUMP("Converted block " FMT_BB " to switch\n", block->bbNum)
3742
modified = true;
43+
44+
// Converted switches won't have dominant cases, so we can skip the switch peeling check.
45+
assert(!block->GetSwitchTargets()->bbsHasDominantCase);
3846
}
39-
}
47+
else
4048
#endif
4149

42-
// When we have profile data, we can identify a switch's dominant case.
43-
// Try peeling the dominant case by checking for it up front.
44-
if (fgIsUsingProfileWeights())
45-
{
46-
modified |= fgOptimizeSwitchJumps();
50+
if (block->KindIs(BBJ_SWITCH) && block->GetSwitchTargets()->bbsHasDominantCase)
51+
{
52+
fgPeelSwitch(block);
53+
modified = true;
54+
55+
// Switch peeling will convert this block into a check for the dominant case,
56+
// and insert the updated switch block after, which doesn't have a dominant case.
57+
// Skip over the switch block in the loop iteration.
58+
assert(block->Next()->KindIs(BBJ_SWITCH));
59+
block = block->Next();
60+
}
4761
}
4862

4963
return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;

0 commit comments

Comments
 (0)