Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4912,6 +4912,10 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl

if (opts.OptimizationEnabled())
{
// Conditional to switch conversion, and switch peeling
//
DoPhase(this, PHASE_SWITCH_RECOGNITION, &Compiler::optRecognizeAndOptimizeSwitchJumps);

// Optimize boolean conditions
//
DoPhase(this, PHASE_OPTIMIZE_BOOLS, &Compiler::optOptimizeBools);
Expand All @@ -4920,10 +4924,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
//
DoPhase(this, PHASE_IF_CONVERSION, &Compiler::optIfConversion);

// Conditional to switch conversion, and switch peeling
//
DoPhase(this, PHASE_SWITCH_RECOGNITION, &Compiler::optRecognizeAndOptimizeSwitchJumps);

// Run flow optimizations before reordering blocks
//
DoPhase(this, PHASE_OPTIMIZE_PRE_LAYOUT, &Compiler::optOptimizePreLayout);
Expand Down
79 changes: 63 additions & 16 deletions src/coreclr/jit/switchrecognition.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,13 @@
// We mainly rely on TryLowerSwitchToBitTest in these heuristics, but jump tables can be useful
// even without conversion to a bitmap test.
#define SWITCH_MAX_DISTANCE ((TARGET_POINTER_SIZE * BITS_PER_BYTE) - 1)
#define SWITCH_MIN_TESTS 3

#if TARGET_ARM64
// ARM64 has conditional instructions which can be more efficient than a switch->bittest (optOptimizeBools)
#define SWITCH_MIN_TESTS 5
#else
#define SWITCH_MIN_TESTS 3
#endif

// This is a heuristics based value tuned for optimal performance
#define CONVERT_SWITCH_TO_CCMP_MIN_TEST 5
Expand All @@ -35,9 +41,6 @@ PhaseStatus Compiler::optRecognizeAndOptimizeSwitchJumps()
continue;
}

// Limit to XARCH, ARM is already doing a great job with such comparisons using
// a series of ccmp instruction (see ifConvert phase).
#ifdef TARGET_XARCH
// block->KindIs(BBJ_COND) check is for better throughput.
if (block->KindIs(BBJ_COND) && optSwitchDetectAndConvert(block))
{
Expand All @@ -47,10 +50,7 @@ PhaseStatus Compiler::optRecognizeAndOptimizeSwitchJumps()
// Converted switches won't have dominant cases, so we can skip the switch peeling check.
assert(!block->GetSwitchTargets()->HasDominantCase());
}
else
#endif

if (block->KindIs(BBJ_SWITCH) && block->GetSwitchTargets()->HasDominantCase())
else if (block->KindIs(BBJ_SWITCH) && block->GetSwitchTargets()->HasDominantCase())
{
fgPeelSwitch(block);
modified = true;
Expand All @@ -66,11 +66,49 @@ PhaseStatus Compiler::optRecognizeAndOptimizeSwitchJumps()
return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
}

//------------------------------------------------------------------------------
// SkipFallthroughBlocks : Skip all fallthrough blocks (empty BBJ_ALWAYS blocks)
//
// Arguments:
// compiler - The compiler instance.
// block - the block to process
//
// Return Value:
// block that is not an empty fallthrough block
//
static BasicBlock* SkipFallthroughBlocks(Compiler* comp, BasicBlock* block)
{
BasicBlock* origBlock = block;
if (!block->KindIs(BBJ_ALWAYS) || (block->firstStmt() != nullptr))
{
// Fast path
return block;
}

// We might consider ignoring statements with only NOPs if that is profitable.

BitVecTraits traits(comp->compBasicBlockID, comp);
BitVec visited = BitVecOps::MakeEmpty(&traits);
BitVecOps::AddElemD(&traits, visited, block->bbID);
while (block->KindIs(BBJ_ALWAYS) && (block->firstStmt() == nullptr) &&
BasicBlock::sameEHRegion(block, block->GetTarget()))
{
block = block->GetTarget();
if (!BitVecOps::TryAddElemD(&traits, visited, block->bbID))
{
// A cycle detected, bail out.
return origBlock;
}
}
return block;
}

//------------------------------------------------------------------------------
// IsConstantTestCondBlock : Does the given block represent a simple BBJ_COND
// constant test? e.g. JTRUE(EQ/NE(X, CNS)).
//
// Arguments:
// compiler - The compiler instance.
// block - The block to check
// allowSideEffects - is variableNode allowed to have side-effects (COMMA)?
// trueTarget - [out] The successor visited if X == CNS
Expand All @@ -82,7 +120,8 @@ PhaseStatus Compiler::optRecognizeAndOptimizeSwitchJumps()
// Return Value:
// True if the block represents a constant test, false otherwise
//
bool IsConstantTestCondBlock(const BasicBlock* block,
bool IsConstantTestCondBlock(Compiler* comp,
const BasicBlock* block,
bool allowSideEffects,
BasicBlock** trueTarget,
BasicBlock** falseTarget,
Expand Down Expand Up @@ -123,9 +162,11 @@ bool IsConstantTestCondBlock(const BasicBlock* block,
return false;
}

*isReversed = rootNode->gtGetOp1()->OperIs(GT_NE);
*trueTarget = *isReversed ? block->GetFalseTarget() : block->GetTrueTarget();
*falseTarget = *isReversed ? block->GetTrueTarget() : block->GetFalseTarget();
*isReversed = rootNode->gtGetOp1()->OperIs(GT_NE);
*trueTarget =
SkipFallthroughBlocks(comp, *isReversed ? block->GetFalseTarget() : block->GetTrueTarget());
*falseTarget =
SkipFallthroughBlocks(comp, *isReversed ? block->GetTrueTarget() : block->GetFalseTarget());

if (block->FalseTargetIs(block) || block->TrueTargetIs(block))
{
Expand Down Expand Up @@ -172,6 +213,12 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock, bool testingFor
{
assert(firstBlock->KindIs(BBJ_COND));

#ifdef TARGET_ARM
// Bitmap test (TryLowerSwitchToBitTest) is quite verbose on ARM32 (~6 instructions), it results in massive size
// regressions.
return false;
#endif

GenTree* variableNode = nullptr;
ssize_t cns = 0;
BasicBlock* trueTarget = nullptr;
Expand All @@ -184,7 +231,7 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock, bool testingFor
// and then try to accumulate as many constant test blocks as possible. Once we hit
// a block that doesn't match the pattern, we start processing the accumulated blocks.
bool isReversed = false;
if (IsConstantTestCondBlock(firstBlock, true, &trueTarget, &falseTarget, &isReversed, &variableNode, &cns))
if (IsConstantTestCondBlock(this, firstBlock, true, &trueTarget, &falseTarget, &isReversed, &variableNode, &cns))
{
if (isReversed)
{
Expand Down Expand Up @@ -234,7 +281,7 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock, bool testingFor
}

// Inspect secondary blocks
if (IsConstantTestCondBlock(currBb, false, &currTrueTarget, &currFalseTarget, &isReversed,
if (IsConstantTestCondBlock(this, currBb, false, &currTrueTarget, &currFalseTarget, &isReversed,
&currVariableNode, &currCns))
{
if (currTrueTarget != trueTarget)
Expand Down Expand Up @@ -399,10 +446,10 @@ bool Compiler::optSwitchConvert(BasicBlock* firstBlock,
BasicBlock* blockIfTrue = nullptr;
BasicBlock* blockIfFalse = nullptr;
bool isReversed = false;
const bool isTest = IsConstantTestCondBlock(lastBlock, false, &blockIfTrue, &blockIfFalse, &isReversed);
const bool isTest = IsConstantTestCondBlock(this, lastBlock, false, &blockIfTrue, &blockIfFalse, &isReversed);
assert(isTest);

assert(firstBlock->TrueTargetIs(blockIfTrue));
assert(SkipFallthroughBlocks(this, firstBlock->GetTrueTarget()) == SkipFallthroughBlocks(this, blockIfTrue));
FlowEdge* const trueEdge = firstBlock->GetTrueEdge();
FlowEdge* const falseEdge = firstBlock->GetFalseEdge();

Expand Down
Loading