-
Notifications
You must be signed in to change notification settings - Fork 5.2k
JIT: some small improvements to tail duplication #37038
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -14526,19 +14526,40 @@ bool Compiler::fgOptimizeSwitchBranches(BasicBlock* block) | |
// fgBlockEndFavorsTailDuplication: | ||
// Heuristic function that returns true if this block ends in a statement that looks favorable | ||
// for tail-duplicating its successor (such as assigning a constant to a local). | ||
// Args: | ||
// | ||
// Arguments: | ||
// block: BasicBlock we are considering duplicating the successor of | ||
// lclNum: local that is used by the successor block, provided by | ||
// prior call to fgBlockIsGoodTailDuplicationCandidate | ||
// | ||
// Returns: | ||
// true if it seems like a good idea | ||
// true if block end is favorable for tail duplication | ||
// | ||
bool Compiler::fgBlockEndFavorsTailDuplication(BasicBlock* block) | ||
// Notes: | ||
// This is the second half of the evaluation for tail duplication, where we try | ||
// to determine if this predecessor block assigns a constant or provides useful | ||
// information about a local that is tested in an unconditionally executed successor. | ||
// If so then duplicating the successor will likely allow the test to be | ||
// optimized away. | ||
// | ||
bool Compiler::fgBlockEndFavorsTailDuplication(BasicBlock* block, unsigned lclNum) | ||
{ | ||
if (block->isRunRarely()) | ||
{ | ||
return false; | ||
} | ||
|
||
Statement* lastStmt = block->lastStmt(); | ||
// If the local is address exposed, we currently can't optimize. | ||
// | ||
LclVarDsc* const lclDsc = lvaGetDesc(lclNum); | ||
|
||
if (lclDsc->lvAddrExposed) | ||
{ | ||
return false; | ||
} | ||
|
||
Statement* const lastStmt = block->lastStmt(); | ||
Statement* const firstStmt = block->FirstNonPhiDef(); | ||
|
||
if (lastStmt == nullptr) | ||
{ | ||
|
@@ -14549,37 +14570,76 @@ bool Compiler::fgBlockEndFavorsTailDuplication(BasicBlock* block) | |
// is an assignment of a constant, arraylength, or a relop. | ||
// This is because these statements produce information about values | ||
// that would otherwise be lost at the upcoming merge point. | ||
GenTree* tree = lastStmt->GetRootNode(); | ||
if (tree->gtOper != GT_ASG) | ||
{ | ||
return false; | ||
} | ||
// | ||
// Check up to N statements... | ||
// | ||
const int limit = 2; | ||
int count = 0; | ||
Statement* stmt = lastStmt; | ||
|
||
if (tree->OperIsBlkOp()) | ||
while (count < limit) | ||
{ | ||
return false; | ||
} | ||
count++; | ||
GenTree* const tree = stmt->GetRootNode(); | ||
if (tree->OperIs(GT_ASG) && !tree->OperIsBlkOp()) | ||
{ | ||
GenTree* const op1 = tree->AsOp()->gtOp1; | ||
|
||
GenTree* op2 = tree->AsOp()->gtOp2; | ||
if (op2->gtOper != GT_ARR_LENGTH && !op2->OperIsConst() && ((op2->OperKind() & GTK_RELOP) == 0)) | ||
{ | ||
return false; | ||
if (op1->IsLocal()) | ||
{ | ||
const unsigned op1LclNum = op1->AsLclVarCommon()->GetLclNum(); | ||
|
||
if (op1LclNum == lclNum) | ||
{ | ||
GenTree* const op2 = tree->AsOp()->gtOp2; | ||
|
||
if (op2->OperIs(GT_ARR_LENGTH) || op2->OperIsConst() || op2->OperIsCompare()) | ||
{ | ||
return true; | ||
} | ||
} | ||
} | ||
} | ||
|
||
Statement* const prevStmt = stmt->GetPrevStmt(); | ||
|
||
// The statement list prev links wrap from first->last, so exit | ||
// when we see lastStmt again, as we've now seen all statements. | ||
// | ||
if (prevStmt == lastStmt) | ||
{ | ||
break; | ||
} | ||
|
||
stmt = prevStmt; | ||
} | ||
|
||
return true; | ||
return false; | ||
} | ||
|
||
//------------------------------------------------------------- | ||
// fgBlockIsGoodTailDuplicationCandidate: | ||
// Heuristic function that examines a block (presumably one that is a merge point) to determine | ||
// if it should be duplicated. | ||
// args: | ||
// if it is a good candidate to be duplicated. | ||
// | ||
// Arguments: | ||
// target - the tail block (candidate for duplication) | ||
// returns: | ||
// true if this block seems like a good candidate for duplication | ||
// | ||
bool Compiler::fgBlockIsGoodTailDuplicationCandidate(BasicBlock* target) | ||
// Returns: | ||
// true if this is a good candidate, false otherwise | ||
// if true, lclNum is set to lcl to scan for in predecessor block | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This could use a little elaboration, e.g. explaining that good tail call duplication is predicted when we have an assignment to 'lcl' in the predecessor block that can provide information enabling optimization of a compare involving 'lcl' in the successor block, or something to that effect. There's a partial explanation in the above function, but it only tells half the story. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added some comments, see if this is better. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Excellent thanks! |
||
// | ||
// Notes: | ||
// The current heuristic is that tail duplication is deemed favorable if this | ||
// block simply tests the value of a local against a constant or some other local. | ||
// | ||
// This is the first half of the evaluation for tail duplication. We subsequently | ||
// need to check if predecessors of this block assigns a constant to the local. | ||
// | ||
bool Compiler::fgBlockIsGoodTailDuplicationCandidate(BasicBlock* target, unsigned* lclNum) | ||
{ | ||
*lclNum = BAD_VAR_NUM; | ||
|
||
// Here we are looking for blocks with a single statement feeding a conditional branch. | ||
// These blocks are small, and when duplicated onto the tail of blocks that end in | ||
// assignments, there is a high probability of the branch completely going away. | ||
|
@@ -14612,8 +14672,8 @@ bool Compiler::fgBlockIsGoodTailDuplicationCandidate(BasicBlock* target) | |
} | ||
|
||
// must be some kind of relational operator | ||
GenTree* cond = tree->AsOp()->gtOp1; | ||
if (!(cond->OperKind() & GTK_RELOP)) | ||
GenTree* const cond = tree->AsOp()->gtOp1; | ||
if (!cond->OperIsCompare()) | ||
{ | ||
return false; | ||
} | ||
|
@@ -14624,6 +14684,7 @@ bool Compiler::fgBlockIsGoodTailDuplicationCandidate(BasicBlock* target) | |
{ | ||
op1 = op1->AsOp()->gtOp1; | ||
} | ||
|
||
if (!op1->IsLocal() && !op1->OperIsConst()) | ||
{ | ||
return false; | ||
|
@@ -14635,11 +14696,44 @@ bool Compiler::fgBlockIsGoodTailDuplicationCandidate(BasicBlock* target) | |
{ | ||
op2 = op2->AsOp()->gtOp1; | ||
} | ||
|
||
if (!op2->IsLocal() && !op2->OperIsConst()) | ||
{ | ||
return false; | ||
} | ||
|
||
// Tree must have one constant and one local, or be comparing | ||
// the same local to itself. | ||
unsigned lcl1 = BAD_VAR_NUM; | ||
unsigned lcl2 = BAD_VAR_NUM; | ||
|
||
if (op1->IsLocal()) | ||
{ | ||
lcl1 = op1->AsLclVarCommon()->GetLclNum(); | ||
} | ||
|
||
if (op2->IsLocal()) | ||
{ | ||
lcl2 = op2->AsLclVarCommon()->GetLclNum(); | ||
} | ||
|
||
if ((lcl1 != BAD_VAR_NUM) && op2->OperIsConst()) | ||
{ | ||
*lclNum = lcl1; | ||
} | ||
else if ((lcl2 != BAD_VAR_NUM) && op1->OperIsConst()) | ||
{ | ||
*lclNum = lcl2; | ||
} | ||
else if ((lcl1 != BAD_VAR_NUM) && (lcl1 == lcl2)) | ||
{ | ||
*lclNum = lcl1; | ||
} | ||
else | ||
{ | ||
return false; | ||
} | ||
|
||
return true; | ||
} | ||
|
||
|
@@ -14648,28 +14742,35 @@ bool Compiler::fgBlockIsGoodTailDuplicationCandidate(BasicBlock* target) | |
// For a block which has an unconditional branch, look to see if its target block | ||
// is a good candidate for tail duplication, and if so do that duplication. | ||
// | ||
// Args: | ||
// Arguments: | ||
// block - block with uncond branch | ||
// target - block which is target of first block | ||
// | ||
// returns: true if changes were made | ||
|
||
// Returns: true if changes were made | ||
// | ||
// Notes: | ||
// This optimization generally reduces code size and path length. | ||
// | ||
bool Compiler::fgOptimizeUncondBranchToSimpleCond(BasicBlock* block, BasicBlock* target) | ||
{ | ||
assert(block->bbJumpKind == BBJ_ALWAYS); | ||
assert(block->bbJumpDest == target); | ||
|
||
if (!BasicBlock::sameEHRegion(block, target)) | ||
{ | ||
return false; | ||
} | ||
|
||
if (!fgBlockIsGoodTailDuplicationCandidate(target)) | ||
unsigned lclNum = BAD_VAR_NUM; | ||
|
||
// First check if the successor tests a local and then branches on the result | ||
// of a test, and obtain the local if so. | ||
// | ||
if (!fgBlockIsGoodTailDuplicationCandidate(target, &lclNum)) | ||
{ | ||
return false; | ||
} | ||
|
||
if (!fgBlockEndFavorsTailDuplication(block)) | ||
// See if this block assigns constant or other interesting tree to that same local. | ||
// | ||
if (!fgBlockEndFavorsTailDuplication(block, lclNum)) | ||
{ | ||
return false; | ||
} | ||
|
@@ -14704,6 +14805,8 @@ bool Compiler::fgOptimizeUncondBranchToSimpleCond(BasicBlock* block, BasicBlock* | |
|
||
JITDUMP("fgOptimizeUncondBranchToSimpleCond(from " FMT_BB " to cond " FMT_BB "), created new uncond " FMT_BB "\n", | ||
block->bbNum, target->bbNum, next->bbNum); | ||
JITDUMP(" expecting opts to key off V%02u, added cloned compare [%06u] to " FMT_BB "\n", lclNum, | ||
dspTreeID(cloned), block->bbNum); | ||
|
||
if (fgStmtListThreaded) | ||
{ | ||
|
@@ -14715,8 +14818,10 @@ bool Compiler::fgOptimizeUncondBranchToSimpleCond(BasicBlock* block, BasicBlock* | |
return true; | ||
} | ||
|
||
//------------------------------------------------------------- | ||
// fgOptimizeBranchToNext: | ||
// Optimize a block which has a branch to the following block | ||
// | ||
// Args: | ||
// block - block with a branch | ||
// bNext - block which is both next and the target of the first block | ||
|
@@ -16743,6 +16848,18 @@ bool Compiler::fgUpdateFlowGraph(bool doTailDuplication) | |
} | ||
} | ||
|
||
if (block->bbJumpKind == BBJ_NONE) | ||
{ | ||
bDest = nullptr; | ||
if (doTailDuplication && fgOptimizeUncondBranchToSimpleCond(block, block->bbNext)) | ||
{ | ||
change = true; | ||
modified = true; | ||
bDest = block->bbJumpDest; | ||
bNext = block->bbNext; | ||
} | ||
} | ||
|
||
// Remove JUMPS to the following block | ||
// and optimize any JUMPS to JUMPS | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It would be worth a comment here that
GetPrevStmt()
of the fist stmt returns the last stmt. I had forgotten and was initially confused.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, this confuses me at times too. I suppose it saves a pointer in
BasicBlock
but it makes reverse iteration through statements a bit clunky.Seems like we could have
GetPrevStmt()
return null for the first statement, and then haveBasicBlock::LastStatement()
call some other API or have friend access to the underlying field.