@@ -20861,12 +20861,6 @@ GenTree* Compiler::gtNewSimdBinOpNode(
20861
20861
}
20862
20862
}
20863
20863
}
20864
-
20865
- if (op == GT_AND_NOT)
20866
- {
20867
- // GT_AND_NOT expects `op1 & ~op2`, but xarch does `~op1 & op2`
20868
- needsReverseOps = true;
20869
- }
20870
20864
break;
20871
20865
}
20872
20866
#endif // TARGET_XARCH
@@ -20897,11 +20891,34 @@ GenTree* Compiler::gtNewSimdBinOpNode(
20897
20891
20898
20892
if (intrinsic != NI_Illegal)
20899
20893
{
20894
+ if (op == GT_AND_NOT)
20895
+ {
20896
+ assert(fgNodeThreading == NodeThreading::LIR);
20897
+
20898
+ #if defined(TARGET_XARCH)
20899
+ // GT_AND_NOT expects `op1 & ~op2`, but xarch does `~op1 & op2`
20900
+ // We specially handle this here since we're only producing a
20901
+ // native intrinsic node in LIR
20902
+
20903
+ std::swap(op1, op2);
20904
+ #endif // TARGET_XARCH
20905
+ }
20900
20906
return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize);
20901
20907
}
20902
20908
20903
20909
switch (op)
20904
20910
{
20911
+ case GT_AND_NOT:
20912
+ {
20913
+ // Prior to LIR, we want to explicitly decompose this operation so that downstream phases can
20914
+ // appropriately optimize around the individual operations being performed, particularly ~op2,
20915
+ // and produce overall better codegen.
20916
+ assert(fgNodeThreading != NodeThreading::LIR);
20917
+
20918
+ op2 = gtNewSimdUnOpNode(GT_NOT, type, op2, simdBaseJitType, simdSize);
20919
+ return gtNewSimdBinOpNode(GT_AND, type, op1, op2, simdBaseJitType, simdSize);
20920
+ }
20921
+
20905
20922
#if defined(TARGET_XARCH)
20906
20923
case GT_RSZ:
20907
20924
{
@@ -21066,9 +21083,6 @@ GenTree* Compiler::gtNewSimdBinOpNode(
21066
21083
vecCon1->gtSimdVal.u64[i] = 0x00FF00FF00FF00FF;
21067
21084
}
21068
21085
21069
- // Validate we can't use AVX512F_VL_TernaryLogic here
21070
- assert(!canUseEvexEncodingDebugOnly());
21071
-
21072
21086
// Vector256<short> maskedProduct = Avx2.And(widenedProduct, vecCon1).AsInt16()
21073
21087
GenTree* maskedProduct = gtNewSimdBinOpNode(GT_AND, widenedType, widenedProduct, vecCon1,
21074
21088
widenedSimdBaseJitType, widenedSimdSize);
@@ -22033,9 +22047,6 @@ GenTree* Compiler::gtNewSimdCmpOpNode(
22033
22047
v = gtNewSimdHWIntrinsicNode(type, v, gtNewIconNode(SHUFFLE_ZZXX, TYP_INT), NI_SSE2_Shuffle,
22034
22048
CORINFO_TYPE_INT, simdSize);
22035
22049
22036
- // Validate we can't use AVX512F_VL_TernaryLogic here
22037
- assert(!canUseEvexEncodingDebugOnly());
22038
-
22039
22050
op2 = gtNewSimdBinOpNode(GT_AND, type, u, v, simdBaseJitType, simdSize);
22040
22051
return gtNewSimdBinOpNode(GT_OR, type, op1, op2, simdBaseJitType, simdSize);
22041
22052
}
@@ -24146,9 +24157,6 @@ GenTree* Compiler::gtNewSimdNarrowNode(
24146
24157
24147
24158
GenTree* vecCon2 = gtCloneExpr(vecCon1);
24148
24159
24149
- // Validate we can't use AVX512F_VL_TernaryLogic here
24150
- assert(!canUseEvexEncodingDebugOnly());
24151
-
24152
24160
tmp1 = gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseJitType, simdSize);
24153
24161
tmp2 = gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseJitType, simdSize);
24154
24162
tmp3 = gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_AVX2_PackUnsignedSaturate, CORINFO_TYPE_UBYTE,
@@ -24187,9 +24195,6 @@ GenTree* Compiler::gtNewSimdNarrowNode(
24187
24195
24188
24196
GenTree* vecCon2 = gtCloneExpr(vecCon1);
24189
24197
24190
- // Validate we can't use AVX512F_VL_TernaryLogic here
24191
- assert(!canUseEvexEncodingDebugOnly());
24192
-
24193
24198
tmp1 = gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseJitType, simdSize);
24194
24199
tmp2 = gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseJitType, simdSize);
24195
24200
tmp3 = gtNewSimdHWIntrinsicNode(type, tmp1, tmp2, NI_AVX2_PackUnsignedSaturate, CORINFO_TYPE_USHORT,
@@ -24291,9 +24296,6 @@ GenTree* Compiler::gtNewSimdNarrowNode(
24291
24296
24292
24297
GenTree* vecCon2 = gtCloneExpr(vecCon1);
24293
24298
24294
- // Validate we can't use AVX512F_VL_TernaryLogic here
24295
- assert(!canUseEvexEncodingDebugOnly());
24296
-
24297
24299
tmp1 = gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseJitType, simdSize);
24298
24300
tmp2 = gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseJitType, simdSize);
24299
24301
@@ -24330,9 +24332,6 @@ GenTree* Compiler::gtNewSimdNarrowNode(
24330
24332
24331
24333
GenTree* vecCon2 = gtCloneExpr(vecCon1);
24332
24334
24333
- // Validate we can't use AVX512F_VL_TernaryLogic here
24334
- assert(!canUseEvexEncodingDebugOnly());
24335
-
24336
24335
tmp1 = gtNewSimdBinOpNode(GT_AND, type, op1, vecCon1, simdBaseJitType, simdSize);
24337
24336
tmp2 = gtNewSimdBinOpNode(GT_AND, type, op2, vecCon2, simdBaseJitType, simdSize);
24338
24337
@@ -27821,6 +27820,14 @@ NamedIntrinsic GenTreeHWIntrinsic::GetHWIntrinsicIdForBinOp(Compiler* comp,
27821
27820
assert(!isScalar);
27822
27821
assert(op2->TypeIs(simdType));
27823
27822
27823
+ if (comp->fgNodeThreading != NodeThreading::LIR)
27824
+ {
27825
+ // We don't want to support creating AND_NOT nodes prior to LIR
27826
+ // as it can break important optimizations. We'll produces this
27827
+ // in lowering instead.
27828
+ break;
27829
+ }
27830
+
27824
27831
#if defined(TARGET_XARCH)
27825
27832
if (simdSize == 64)
27826
27833
{
@@ -30155,13 +30162,8 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
30155
30162
bool isScalar = false;
30156
30163
genTreeOps oper = tree->GetOperForHWIntrinsicId(&isScalar);
30157
30164
30158
- #if defined(TARGET_XARCH)
30159
- if (oper == GT_AND_NOT)
30160
- {
30161
- // xarch does: ~op1 & op2, we need op1 & ~op2
30162
- std::swap(op1, op2);
30163
- }
30164
- #endif // TARGET_XARCH
30165
+ // We shouldn't find AND_NOT nodes since it should only be produced in lowering
30166
+ assert(oper != GT_AND_NOT);
30165
30167
30166
30168
GenTree* cnsNode = nullptr;
30167
30169
GenTree* otherNode = nullptr;
@@ -30674,31 +30676,6 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
30674
30676
break;
30675
30677
}
30676
30678
30677
- case GT_AND_NOT:
30678
- {
30679
- // Handle `x & ~0 == x` and `0 & ~x == 0`
30680
- if (cnsNode->IsVectorZero())
30681
- {
30682
- if (cnsNode == op1)
30683
- {
30684
- resultNode = gtWrapWithSideEffects(cnsNode, otherNode, GTF_ALL_EFFECT);
30685
- break;
30686
- }
30687
- else
30688
- {
30689
- resultNode = otherNode;
30690
- }
30691
- break;
30692
- }
30693
-
30694
- // Handle `x & ~AllBitsSet == 0`
30695
- if (cnsNode->IsVectorAllBitsSet() && (cnsNode == op2))
30696
- {
30697
- resultNode = gtWrapWithSideEffects(cnsNode, otherNode, GTF_ALL_EFFECT);
30698
- }
30699
- break;
30700
- }
30701
-
30702
30679
case GT_DIV:
30703
30680
{
30704
30681
if (varTypeIsFloating(simdBaseType))
@@ -31089,12 +31066,12 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
31089
31066
{
31090
31067
switch (ni)
31091
31068
{
31092
- case NI_Vector128_ConditionalSelect:
31093
31069
#if defined(TARGET_XARCH)
31070
+ case NI_Vector128_ConditionalSelect:
31094
31071
case NI_Vector256_ConditionalSelect:
31095
31072
case NI_Vector512_ConditionalSelect:
31096
31073
#elif defined(TARGET_ARM64)
31097
- case NI_Vector64_ConditionalSelect :
31074
+ case NI_AdvSimd_BitwiseSelect :
31098
31075
case NI_Sve_ConditionalSelect:
31099
31076
#endif
31100
31077
{
0 commit comments