Skip to content

Commit

Permalink
Centralize the folding logic for ConditionalSelect and ensure side ef…
Browse files Browse the repository at this point in the history
…fects aren't dropped (#104175)

* Centralize the folding logic for ConditionalSelect and ensure side effects aren't dropped

* Ensure CndSel handles 64-bit operands where possible

* Don't fold if op3 has side effects

* Ensure that operands are passed into the lowered not->ternarylogic correctly
  • Loading branch information
tannergooding authored Jun 30, 2024
1 parent a7709e5 commit fcdb6db
Show file tree
Hide file tree
Showing 6 changed files with 406 additions and 193 deletions.
121 changes: 77 additions & 44 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23479,18 +23479,14 @@ GenTree* Compiler::gtNewSimdCndSelNode(
NamedIntrinsic intrinsic = NI_Illegal;

#if defined(TARGET_XARCH)
assert((simdSize != 32) || compIsaSupportedDebugOnly(InstructionSet_AVX));

if (canUseEvexEncoding())
if (simdSize == 64)
{
GenTree* control = gtNewIconNode(0xCA); // (B & A) | (C & ~A)
return gtNewSimdTernaryLogicNode(type, op1, op2, op3, control, simdBaseJitType, simdSize);
assert(canUseEvexEncodingDebugOnly());
intrinsic = NI_Vector512_ConditionalSelect;
}

assert(simdSize != 64);

if (simdSize == 32)
else if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
intrinsic = NI_Vector256_ConditionalSelect;
}
else
Expand Down Expand Up @@ -26606,45 +26602,19 @@ GenTree* Compiler::gtNewSimdUnOpNode(
{
if (simdSize == 64)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));

if (genTypeSize(simdBaseType) >= 4)
{
intrinsic = NI_AVX512F_TernaryLogic;
}
}
else
{
if (simdSize == 32)
{
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
}
if ((genTypeSize(simdBaseType) >= 4) && compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
{
intrinsic = NI_AVX10v1_TernaryLogic;
}
else if ((genTypeSize(simdBaseType) >= 4) && compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL))
{
intrinsic = NI_AVX512F_VL_TernaryLogic;
}
assert(canUseEvexEncodingDebugOnly());
intrinsic = NI_Vector512_op_OnesComplement;
}

if (intrinsic != NI_Illegal)
else if (simdSize == 32)
{
// AVX512 allows performing `not` without requiring a memory access
assert(canUseEvexEncodingDebugOnly());

op2 = gtNewZeroConNode(type);
op3 = gtNewZeroConNode(type);

GenTree* cns = gtNewIconNode(static_cast<uint8_t>(~0xAA)); // ~C
return gtNewSimdTernaryLogicNode(type, op3, op2, op1, cns, simdBaseJitType, simdSize);
assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
intrinsic = NI_Vector256_op_OnesComplement;
}
else
{
op2 = gtNewAllBitsSetConNode(type);
return gtNewSimdBinOpNode(GT_XOR, type, op1, op2, simdBaseJitType, simdSize);
intrinsic = NI_Vector128_op_OnesComplement;
}
return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize);
}
#elif defined(TARGET_ARM64)
case GT_NEG:
Expand Down Expand Up @@ -28194,12 +28164,16 @@ genTreeOps GenTreeHWIntrinsic::HWOperGet(bool* isScalar) const
return GT_AND;
}

#if defined(TARGET_ARM64)
#if defined(TARGET_XARCH)
case NI_Vector128_op_OnesComplement:
case NI_Vector256_op_OnesComplement:
case NI_Vector512_op_OnesComplement:
#elif defined(TARGET_ARM64)
case NI_AdvSimd_Not:
#endif
{
return GT_NOT;
}
#endif

#if defined(TARGET_XARCH)
case NI_SSE_Xor:
Expand Down Expand Up @@ -30556,6 +30530,65 @@ GenTree* Compiler::gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree)
}
}

if (op3 != nullptr)
{
switch (ni)
{
case NI_Vector128_ConditionalSelect:
#if defined(TARGET_XARCH)
case NI_Vector256_ConditionalSelect:
case NI_Vector512_ConditionalSelect:
#elif defined(TARGET_ARM64)
case NI_Vector64_ConditionalSelect:
case NI_Sve_ConditionalSelect:
#endif
{
if (cnsNode != op1)
{
break;
}

if (op1->IsVectorAllBitsSet() || op1->IsMaskAllBitsSet())
{
if ((op3->gtFlags & GTF_SIDE_EFFECT) != 0)
{
// op3 has side effects, this would require us to append a new statement
// to ensure that it isn't lost, which isn't safe to do from the general
// purpose handler here. We'll recognize this and mark it in VN instead
}

// op3 has no side effects, so we can return op2 directly
return op2;
}

if (op1->IsVectorZero())
{
return gtWrapWithSideEffects(op3, op2, GTF_ALL_EFFECT);
}

if (op2->IsCnsVec() && op3->IsCnsVec())
{
// op2 = op2 & op1
op2->AsVecCon()->EvaluateBinaryInPlace(GT_AND, false, simdBaseType, op1->AsVecCon());

// op3 = op2 & ~op1
op3->AsVecCon()->EvaluateBinaryInPlace(GT_AND_NOT, false, simdBaseType, op1->AsVecCon());

// op2 = op2 | op3
op2->AsVecCon()->EvaluateBinaryInPlace(GT_OR, false, simdBaseType, op3->AsVecCon());

resultNode = op2;
}
break;
}

default:
{
break;
}
}
}

if (resultNode != tree)
{
if (fgGlobalMorph)
Expand Down
13 changes: 0 additions & 13 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1911,19 +1911,6 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,

switch (intrinsic)
{
case NI_Sve_ConditionalSelect:
{
if (op1->IsVectorAllBitsSet() || op1->IsMaskAllBitsSet())
{
return retNode->AsHWIntrinsic()->Op(2);
}
else if (op1->IsVectorZero())
{
return retNode->AsHWIntrinsic()->Op(3);
}
break;
}

case NI_Sve_CreateMaskForFirstActiveElement:
case NI_Sve_CreateMaskForNextActiveElement:
case NI_Sve_GetActiveElementCount:
Expand Down
6 changes: 3 additions & 3 deletions src/coreclr/jit/hwintrinsiclistxarch.h
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ HARDWARE_INTRINSIC(Vector128, op_ExclusiveOr,
HARDWARE_INTRINSIC(Vector128, op_Inequality, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp)
HARDWARE_INTRINSIC(Vector128, op_LeftShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, op_Multiply, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, op_OnesComplement, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, op_OnesComplement, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector128, op_RightShift, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, op_Subtraction, 16, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, op_UnaryNegation, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
Expand Down Expand Up @@ -221,7 +221,7 @@ HARDWARE_INTRINSIC(Vector256, op_ExclusiveOr,
HARDWARE_INTRINSIC(Vector256, op_Inequality, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp)
HARDWARE_INTRINSIC(Vector256, op_LeftShift, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector256, op_Multiply, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector256, op_OnesComplement, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_AvxOnlyCompatible)
HARDWARE_INTRINSIC(Vector256, op_OnesComplement, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_AvxOnlyCompatible)
HARDWARE_INTRINSIC(Vector256, op_RightShift, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector256, op_Subtraction, 32, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector256, op_UnaryNegation, 32, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
Expand Down Expand Up @@ -321,7 +321,7 @@ HARDWARE_INTRINSIC(Vector512, op_ExclusiveOr,
HARDWARE_INTRINSIC(Vector512, op_Inequality, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp)
HARDWARE_INTRINSIC(Vector512, op_LeftShift, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector512, op_Multiply, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector512, op_OnesComplement, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector512, op_OnesComplement, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector512, op_RightShift, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector512, op_Subtraction, 64, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector512, op_UnaryNegation, 64, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
Expand Down
Loading

0 comments on commit fcdb6db

Please sign in to comment.