diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 56d0e18da286c2..713dbb190a071e 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4912,6 +4912,10 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl if (opts.OptimizationEnabled()) { + // Conditional to switch conversion, and switch peeling + // + DoPhase(this, PHASE_SWITCH_RECOGNITION, &Compiler::optRecognizeAndOptimizeSwitchJumps); + // Optimize boolean conditions // DoPhase(this, PHASE_OPTIMIZE_BOOLS, &Compiler::optOptimizeBools); @@ -4920,10 +4924,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_IF_CONVERSION, &Compiler::optIfConversion); - // Conditional to switch conversion, and switch peeling - // - DoPhase(this, PHASE_SWITCH_RECOGNITION, &Compiler::optRecognizeAndOptimizeSwitchJumps); - // Run flow optimizations before reordering blocks // DoPhase(this, PHASE_OPTIMIZE_PRE_LAYOUT, &Compiler::optOptimizePreLayout); diff --git a/src/coreclr/jit/switchrecognition.cpp b/src/coreclr/jit/switchrecognition.cpp index d7ed297859032c..f187de4ab56bf0 100644 --- a/src/coreclr/jit/switchrecognition.cpp +++ b/src/coreclr/jit/switchrecognition.cpp @@ -9,7 +9,13 @@ // We mainly rely on TryLowerSwitchToBitTest in these heuristics, but jump tables can be useful // even without conversion to a bitmap test. #define SWITCH_MAX_DISTANCE ((TARGET_POINTER_SIZE * BITS_PER_BYTE) - 1) -#define SWITCH_MIN_TESTS 3 + +#if TARGET_ARM64 +// ARM64 has conditional instructions which can be more efficient than a switch->bittest (optOptimizeBools) +#define SWITCH_MIN_TESTS 5 +#else +#define SWITCH_MIN_TESTS 3 +#endif // This is a heuristics based value tuned for optimal performance #define CONVERT_SWITCH_TO_CCMP_MIN_TEST 5 @@ -35,9 +41,6 @@ PhaseStatus Compiler::optRecognizeAndOptimizeSwitchJumps() continue; } -// Limit to XARCH, ARM is already doing a great job with such comparisons using -// a series of ccmp instruction (see ifConvert phase). -#ifdef TARGET_XARCH // block->KindIs(BBJ_COND) check is for better throughput. if (block->KindIs(BBJ_COND) && optSwitchDetectAndConvert(block)) { @@ -47,10 +50,7 @@ PhaseStatus Compiler::optRecognizeAndOptimizeSwitchJumps() // Converted switches won't have dominant cases, so we can skip the switch peeling check. assert(!block->GetSwitchTargets()->HasDominantCase()); } - else -#endif - - if (block->KindIs(BBJ_SWITCH) && block->GetSwitchTargets()->HasDominantCase()) + else if (block->KindIs(BBJ_SWITCH) && block->GetSwitchTargets()->HasDominantCase()) { fgPeelSwitch(block); modified = true; @@ -66,11 +66,49 @@ PhaseStatus Compiler::optRecognizeAndOptimizeSwitchJumps() return modified ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING; } +//------------------------------------------------------------------------------ +// SkipFallthroughBlocks : Skip all fallthrough blocks (empty BBJ_ALWAYS blocks) +// +// Arguments: +// compiler - The compiler instance. +// block - the block to process +// +// Return Value: +// block that is not an empty fallthrough block +// +static BasicBlock* SkipFallthroughBlocks(Compiler* comp, BasicBlock* block) +{ + BasicBlock* origBlock = block; + if (!block->KindIs(BBJ_ALWAYS) || (block->firstStmt() != nullptr)) + { + // Fast path + return block; + } + + // We might consider ignoring statements with only NOPs if that is profitable. + + BitVecTraits traits(comp->compBasicBlockID, comp); + BitVec visited = BitVecOps::MakeEmpty(&traits); + BitVecOps::AddElemD(&traits, visited, block->bbID); + while (block->KindIs(BBJ_ALWAYS) && (block->firstStmt() == nullptr) && + BasicBlock::sameEHRegion(block, block->GetTarget())) + { + block = block->GetTarget(); + if (!BitVecOps::TryAddElemD(&traits, visited, block->bbID)) + { + // A cycle detected, bail out. + return origBlock; + } + } + return block; +} + //------------------------------------------------------------------------------ // IsConstantTestCondBlock : Does the given block represent a simple BBJ_COND // constant test? e.g. JTRUE(EQ/NE(X, CNS)). // // Arguments: +// compiler - The compiler instance. // block - The block to check // allowSideEffects - is variableNode allowed to have side-effects (COMMA)? // trueTarget - [out] The successor visited if X == CNS @@ -82,7 +120,8 @@ PhaseStatus Compiler::optRecognizeAndOptimizeSwitchJumps() // Return Value: // True if the block represents a constant test, false otherwise // -bool IsConstantTestCondBlock(const BasicBlock* block, +bool IsConstantTestCondBlock(Compiler* comp, + const BasicBlock* block, bool allowSideEffects, BasicBlock** trueTarget, BasicBlock** falseTarget, @@ -123,9 +162,11 @@ bool IsConstantTestCondBlock(const BasicBlock* block, return false; } - *isReversed = rootNode->gtGetOp1()->OperIs(GT_NE); - *trueTarget = *isReversed ? block->GetFalseTarget() : block->GetTrueTarget(); - *falseTarget = *isReversed ? block->GetTrueTarget() : block->GetFalseTarget(); + *isReversed = rootNode->gtGetOp1()->OperIs(GT_NE); + *trueTarget = + SkipFallthroughBlocks(comp, *isReversed ? block->GetFalseTarget() : block->GetTrueTarget()); + *falseTarget = + SkipFallthroughBlocks(comp, *isReversed ? block->GetTrueTarget() : block->GetFalseTarget()); if (block->FalseTargetIs(block) || block->TrueTargetIs(block)) { @@ -172,6 +213,12 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock, bool testingFor { assert(firstBlock->KindIs(BBJ_COND)); +#ifdef TARGET_ARM + // Bitmap test (TryLowerSwitchToBitTest) is quite verbose on ARM32 (~6 instructions), it results in massive size + // regressions. + return false; +#endif + GenTree* variableNode = nullptr; ssize_t cns = 0; BasicBlock* trueTarget = nullptr; @@ -184,7 +231,7 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock, bool testingFor // and then try to accumulate as many constant test blocks as possible. Once we hit // a block that doesn't match the pattern, we start processing the accumulated blocks. bool isReversed = false; - if (IsConstantTestCondBlock(firstBlock, true, &trueTarget, &falseTarget, &isReversed, &variableNode, &cns)) + if (IsConstantTestCondBlock(this, firstBlock, true, &trueTarget, &falseTarget, &isReversed, &variableNode, &cns)) { if (isReversed) { @@ -234,7 +281,7 @@ bool Compiler::optSwitchDetectAndConvert(BasicBlock* firstBlock, bool testingFor } // Inspect secondary blocks - if (IsConstantTestCondBlock(currBb, false, &currTrueTarget, &currFalseTarget, &isReversed, + if (IsConstantTestCondBlock(this, currBb, false, &currTrueTarget, &currFalseTarget, &isReversed, &currVariableNode, &currCns)) { if (currTrueTarget != trueTarget) @@ -399,10 +446,10 @@ bool Compiler::optSwitchConvert(BasicBlock* firstBlock, BasicBlock* blockIfTrue = nullptr; BasicBlock* blockIfFalse = nullptr; bool isReversed = false; - const bool isTest = IsConstantTestCondBlock(lastBlock, false, &blockIfTrue, &blockIfFalse, &isReversed); + const bool isTest = IsConstantTestCondBlock(this, lastBlock, false, &blockIfTrue, &blockIfFalse, &isReversed); assert(isTest); - assert(firstBlock->TrueTargetIs(blockIfTrue)); + assert(SkipFallthroughBlocks(this, firstBlock->GetTrueTarget()) == SkipFallthroughBlocks(this, blockIfTrue)); FlowEdge* const trueEdge = firstBlock->GetTrueEdge(); FlowEdge* const falseEdge = firstBlock->GetFalseEdge();