Skip to content

Commit a7b5c22

Browse files
emit branchless form of (i >= 0 && j >= 0)/(i!=0&& j!= 0) for signed integers (#62689)
* emit branchless form of (i >= 0 && j >= 0)/(i!=0&& j!= 0) for signed integers * drop unsigned ops * apply the proposed condition * check if we are having reverse bool operation * delete obsolete comment Co-authored-by: Andy Ayers <andya@microsoft.com>
1 parent a640b08 commit a7b5c22

File tree

2 files changed

+356
-20
lines changed

2 files changed

+356
-20
lines changed

src/coreclr/jit/optimizer.cpp

+89-20
Original file line numberDiff line numberDiff line change
@@ -9049,7 +9049,7 @@ bool OptBoolsDsc::optOptimizeBoolsCondBlock()
90499049
foldType = TYP_I_IMPL;
90509050
}
90519051

9052-
assert(m_testInfo1.compTree->gtOper == GT_EQ || m_testInfo1.compTree->gtOper == GT_NE);
9052+
assert(m_testInfo1.compTree->OperIs(GT_EQ, GT_NE, GT_LT, GT_GE));
90539053

90549054
if (m_sameTarget)
90559055
{
@@ -9068,6 +9068,18 @@ bool OptBoolsDsc::optOptimizeBoolsCondBlock()
90689068
foldOp = GT_AND;
90699069
cmpOp = GT_EQ;
90709070
}
9071+
else if (m_testInfo1.compTree->gtOper == GT_LT)
9072+
{
9073+
// t1:c1<0 t2:c2<0 ==> Branch to BX if either value < 0
9074+
// So we will branch to BX if (c1|c2)<0
9075+
9076+
foldOp = GT_OR;
9077+
cmpOp = GT_LT;
9078+
}
9079+
else if (m_testInfo1.compTree->gtOper == GT_GE)
9080+
{
9081+
return false;
9082+
}
90719083
else
90729084
{
90739085
// t1:c1!=0 t2:c2!=0 ==> Branch to BX if either value is non-0
@@ -9079,30 +9091,43 @@ bool OptBoolsDsc::optOptimizeBoolsCondBlock()
90799091
}
90809092
else
90819093
{
9082-
// The m_b1 condition must be the reverse of the m_b2 condition because the only operators
9083-
// that we will see here are GT_EQ and GT_NE. So, if they are not the same, we have one of each.
9084-
90859094
if (m_testInfo1.compTree->gtOper == m_testInfo2.compTree->gtOper)
90869095
{
90879096
return false;
90889097
}
90899098

9090-
if (m_testInfo1.compTree->gtOper == GT_EQ)
9099+
if (m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_NE)
90919100
{
90929101
// t1:c1==0 t2:c2!=0 ==> Branch to BX if both values are non-0
90939102
// So we will branch to BX if (c1&c2)!=0
90949103

90959104
foldOp = GT_AND;
90969105
cmpOp = GT_NE;
90979106
}
9098-
else
9107+
else if (m_testInfo1.compTree->gtOper == GT_LT && m_testInfo2.compTree->gtOper == GT_GE)
9108+
{
9109+
// t1:c1<0 t2:c2>=0 ==> Branch to BX if both values >= 0
9110+
// So we will branch to BX if (c1|c2)>=0
9111+
9112+
foldOp = GT_OR;
9113+
cmpOp = GT_GE;
9114+
}
9115+
else if (m_testInfo1.compTree->gtOper == GT_GE)
9116+
{
9117+
return false;
9118+
}
9119+
else if (m_testInfo1.compTree->gtOper == GT_NE && m_testInfo2.compTree->gtOper == GT_EQ)
90999120
{
91009121
// t1:c1!=0 t2:c2==0 ==> Branch to BX if both values are 0
91019122
// So we will branch to BX if (c1|c2)==0
91029123

91039124
foldOp = GT_OR;
91049125
cmpOp = GT_EQ;
91059126
}
9127+
else
9128+
{
9129+
return false;
9130+
}
91069131
}
91079132

91089133
// Anding requires both values to be 0 or 1
@@ -9244,8 +9269,8 @@ Statement* OptBoolsDsc::optOptimizeBoolsChkBlkCond()
92449269
//
92459270
bool OptBoolsDsc::optOptimizeBoolsChkTypeCostCond()
92469271
{
9247-
assert(m_testInfo1.compTree->OperIs(GT_EQ, GT_NE) && m_testInfo1.compTree->AsOp()->gtOp1 == m_c1);
9248-
assert(m_testInfo2.compTree->OperIs(GT_EQ, GT_NE) && m_testInfo2.compTree->AsOp()->gtOp1 == m_c2);
9272+
assert(m_testInfo1.compTree->OperIs(GT_EQ, GT_NE, GT_LT, GT_GE) && m_testInfo1.compTree->AsOp()->gtOp1 == m_c1);
9273+
assert(m_testInfo2.compTree->OperIs(GT_EQ, GT_NE, GT_LT, GT_GE) && m_testInfo2.compTree->AsOp()->gtOp1 == m_c2);
92499274

92509275
//
92519276
// Leave out floats where the bit-representation is more complicated
@@ -9516,7 +9541,7 @@ bool OptBoolsDsc::optOptimizeBoolsReturnBlock(BasicBlock* b3)
95169541
}
95179542

95189543
// Get the fold operator (m_foldOp, e.g., GT_OR/GT_AND) and
9519-
// the comparison operator (m_cmpOp, e.g., GT_EQ/GT_NE)
9544+
// the comparison operator (m_cmpOp, e.g., GT_EQ/GT_NE/GT_GE/GT_LT)
95209545

95219546
var_types foldType = m_c1->TypeGet();
95229547
if (varTypeIsGC(foldType))
@@ -9553,6 +9578,16 @@ bool OptBoolsDsc::optOptimizeBoolsReturnBlock(BasicBlock* b3)
95539578
foldOp = GT_AND;
95549579
cmpOp = GT_NE;
95559580
}
9581+
else if ((m_testInfo1.compTree->gtOper == GT_LT && m_testInfo2.compTree->gtOper == GT_GE) &&
9582+
(it1val == 0 && it2val == 0 && it3val == 0))
9583+
{
9584+
// Case: x >= 0 && y >= 0
9585+
// t1:c1<0 t2:c2>=0 t3:c3==0
9586+
// ==> true if (c1|c2)>=0
9587+
9588+
foldOp = GT_OR;
9589+
cmpOp = GT_GE;
9590+
}
95569591
else if ((m_testInfo1.compTree->gtOper == GT_EQ && m_testInfo2.compTree->gtOper == GT_EQ) &&
95579592
(it1val == 0 && it2val == 0 && it3val == 1))
95589593
{
@@ -9571,13 +9606,23 @@ bool OptBoolsDsc::optOptimizeBoolsReturnBlock(BasicBlock* b3)
95719606
foldOp = GT_OR;
95729607
cmpOp = GT_NE;
95739608
}
9609+
else if ((m_testInfo1.compTree->gtOper == GT_LT && m_testInfo2.compTree->gtOper == GT_LT) &&
9610+
(it1val == 0 && it2val == 0 && it3val == 1))
9611+
{
9612+
// Case: x < 0 || y < 0
9613+
// t1:c1<0 t2:c2<0 t3:c3==1
9614+
// ==> true if (c1|c2)<0
9615+
9616+
foldOp = GT_OR;
9617+
cmpOp = GT_LT;
9618+
}
95749619
else
95759620
{
95769621
// Require NOT operation for operand(s). Do Not fold.
95779622
return false;
95789623
}
95799624

9580-
if ((foldOp == GT_AND || cmpOp == GT_NE) && (!m_testInfo1.isBool || !m_testInfo2.isBool))
9625+
if ((foldOp == GT_AND || (cmpOp == GT_NE && foldOp != GT_OR)) && (!m_testInfo1.isBool || !m_testInfo2.isBool))
95819626
{
95829627
// x == 1 && y == 1: Skip cases where x or y is greater than 1, e.g., x=3, y=1
95839628
// x == 0 || y == 0: Skip cases where x and y have opposite bits set, e.g., x=2, y=1
@@ -9676,15 +9721,15 @@ void OptBoolsDsc::optOptimizeBoolsGcStress()
96769721
//
96779722
// Notes:
96789723
// On entry, testTree is set.
9679-
// On success, compTree is set to the compare node (i.e. GT_EQ or GT_NE) of the testTree.
9724+
// On success, compTree is set to the compare node (i.e. GT_EQ or GT_NE or GT_LT or GT_GE) of the testTree.
96809725
// isBool is set to true if the comparand (i.e., operand 1 of compTree is boolean. Otherwise, false.
96819726
//
96829727
// Given a GT_JTRUE or GT_RETURN node, this method checks if it is a boolean comparison
9683-
// of the form "if (boolVal ==/!= 0/1)".This is translated into
9684-
// a GT_EQ/GT_NE node with "opr1" being a boolean lclVar and "opr2" the const 0/1.
9728+
// of the form "if (boolVal ==/!=/>=/< 0/1)".This is translated into
9729+
// a GT_EQ/GT_NE/GT_GE/GT_LT node with "opr1" being a boolean lclVar and "opr2" the const 0/1.
96859730
//
96869731
// When isBool == true, if the comparison was against a 1 (i.e true)
9687-
// then we morph the tree by reversing the GT_EQ/GT_NE and change the 1 to 0.
9732+
// then we morph the tree by reversing the GT_EQ/GT_NE/GT_GE/GT_LT and change the 1 to 0.
96889733
//
96899734
GenTree* OptBoolsDsc::optIsBoolComp(OptTestInfo* pOptTest)
96909735
{
@@ -9693,9 +9738,9 @@ GenTree* OptBoolsDsc::optIsBoolComp(OptTestInfo* pOptTest)
96939738
assert(pOptTest->testTree->gtOper == GT_JTRUE || pOptTest->testTree->gtOper == GT_RETURN);
96949739
GenTree* cond = pOptTest->testTree->AsOp()->gtOp1;
96959740

9696-
// The condition must be "!= 0" or "== 0"
9697-
9698-
if ((cond->gtOper != GT_EQ) && (cond->gtOper != GT_NE))
9741+
// The condition must be "!= 0" or "== 0" or >=0 or <0
9742+
// we don't optimize unsigned < and >= operations
9743+
if (!cond->OperIs(GT_EQ, GT_NE) && (!cond->OperIs(GT_LT, GT_GE) || cond->IsUnsigned()))
96999744
{
97009745
return nullptr;
97019746
}
@@ -9772,9 +9817,9 @@ GenTree* OptBoolsDsc::optIsBoolComp(OptTestInfo* pOptTest)
97729817
// suitable phase status
97739818
//
97749819
// Notes:
9775-
// If the operand of GT_JTRUE/GT_RETURN node is GT_EQ/GT_NE of the form
9776-
// "if (boolVal ==/!= 0/1)", the GT_EQ/GT_NE nodes are translated into a
9777-
// GT_EQ/GT_NE node with
9820+
// If the operand of GT_JTRUE/GT_RETURN node is GT_EQ/GT_NE/GT_GE/GT_LT of the form
9821+
// "if (boolVal ==/!=/>=/< 0/1)", the GT_EQ/GT_NE/GT_GE/GT_LT nodes are translated into a
9822+
// GT_EQ/GT_NE/GT_GE/GT_LT node with
97789823
// "op1" being a boolean GT_OR/GT_AND lclVar and
97799824
// "op2" the const 0/1.
97809825
// For example, the folded tree for the below boolean optimization is shown below:
@@ -9816,6 +9861,30 @@ GenTree* OptBoolsDsc::optIsBoolComp(OptTestInfo* pOptTest)
98169861
// | \--* LCL_VAR int V03 arg3
98179862
// \--* CNS_INT int 0
98189863
//
9864+
// Case 5: (x != 0 && y != 0) => (x | y) != 0
9865+
// * RETURN int
9866+
// \--* NE int
9867+
// +--* OR int
9868+
// | +--* LCL_VAR int V00 arg0
9869+
// | \--* LCL_VAR int V01 arg1
9870+
// \--* CNS_INT int 0
9871+
//
9872+
// Case 6: (x >= 0 && y >= 0) => (x | y) >= 0
9873+
// * RETURN int
9874+
// \--* GE int
9875+
// +--* OR int
9876+
// | +--* LCL_VAR int V00 arg0
9877+
// | \--* LCL_VAR int V01 arg1
9878+
// \--* CNS_INT int 0
9879+
//
9880+
// Case 7: (x < 0 || y < 0) => (x & y) < 0
9881+
// * RETURN int
9882+
// \--* LT int
9883+
// +--* AND int
9884+
// | +--* LCL_VAR int V00 arg0
9885+
// | \--* LCL_VAR int V01 arg1
9886+
// \--* CNS_INT int 0
9887+
//
98199888
// Patterns that are not optimized include (x == 1 && y == 1), (x == 1 || y == 1),
98209889
// (x == 0 || y == 0) because currently their comptree is not marked as boolean expression.
98219890
// When m_foldOp == GT_AND or m_cmpOp == GT_NE, both compTrees must be boolean expression

0 commit comments

Comments
 (0)