Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Arm64/Sve: Rewrite how ConditionalSelect wraps the embedded mask operations #104248

Merged
merged 30 commits into from
Jul 3, 2024
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
d832b42
All current embedded scenarios work
kunalspathak Jun 29, 2024
90df0aa
Add test coverage for _SveUnaryOpTestTemplate
kunalspathak Jun 29, 2024
310e93d
wip: review comments
kunalspathak Jul 2, 2024
6a77306
Merge remote-tracking branch 'origin/main' into cndsel
kunalspathak Jul 2, 2024
2ecd169
fix the assert for GatherPrefetch
kunalspathak Jul 3, 2024
b4a6328
review commetns
kunalspathak Jul 3, 2024
c58ada2
jit format
kunalspathak Jul 3, 2024
c5ee94e
Add coverage for _SveBinaryOpTestTemplate.template
kunalspathak Jul 3, 2024
02f5cf5
review comments
kunalspathak Jul 3, 2024
dd8f935
Add test coverage in _SveTernOpTestTemplate.template
kunalspathak Jul 3, 2024
89040a4
Add for _SveUnaryOpDifferentRetTypeTestTemplate
kunalspathak Jul 3, 2024
7a0a854
Add _SveBinaryMaskOpTestTemplate.template
kunalspathak Jul 3, 2024
e5e45c0
minor feedback
kunalspathak Jul 3, 2024
15ca5a0
coverge for _SveBinaryOpDifferentTypesTestTemplate
kunalspathak Jul 3, 2024
6d5f6ef
Add for _SveBinaryOpDifferentTypesTestTemplate.template
kunalspathak Jul 3, 2024
2626609
Add for _SveImmBinaryOpTestTemplate.template
kunalspathak Jul 3, 2024
a1bfc5e
Add for _SveImmTernOpFirstArgTestTemplate.template
kunalspathak Jul 3, 2024
ccb3a6f
Add for _SveImmTernOpTestTemplate.template
kunalspathak Jul 3, 2024
4344d96
Add for _SveImmUnaryOpTestTemplate.template
kunalspathak Jul 3, 2024
d38199d
Add for _SveTernOpFirstArgTestTemplate.template
kunalspathak Jul 3, 2024
3655ecc
Add for _SveTernOpMaskedOpTestTemplate.template
kunalspathak Jul 3, 2024
9c6ea01
Add for SveLoadNonFaultingUnOpTest.template
kunalspathak Jul 3, 2024
e7660f0
Add for SveGatherVectorVectorBases.template
kunalspathak Jul 3, 2024
f5b34ff
Add for SveGatherVectorIndices.template
kunalspathak Jul 3, 2024
d49611d
Add for SveGatherVectorByteOffsets.template
kunalspathak Jul 3, 2024
492ab8e
fix the typos
kunalspathak Jul 3, 2024
8584e9e
minor test feedback
kunalspathak Jul 3, 2024
c5bd12e
jit format
kunalspathak Jul 3, 2024
c5a5709
revert fix for GatherPrefetch*
kunalspathak Jul 3, 2024
4a365f6
missed a change
kunalspathak Jul 3, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 8 additions & 6 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27546,12 +27546,14 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
{
#ifdef TARGET_ARM64
static_assert_no_msg(
AreContiguous(NI_Sve_GatherVector, NI_Sve_GatherVectorByteZeroExtend, NI_Sve_GatherVectorInt16SignExtend,
NI_Sve_GatherVectorInt16WithByteOffsetsSignExtend, NI_Sve_GatherVectorInt32SignExtend,
NI_Sve_GatherVectorInt32WithByteOffsetsSignExtend, NI_Sve_GatherVectorSByteSignExtend,
NI_Sve_GatherVectorUInt16WithByteOffsetsZeroExtend, NI_Sve_GatherVectorUInt16ZeroExtend,
NI_Sve_GatherVectorUInt32WithByteOffsetsZeroExtend, NI_Sve_GatherVectorUInt32ZeroExtend));
assert(varTypeIsI(addr) || (varTypeIsSIMD(addr) && ((intrinsicId >= NI_Sve_GatherVector) &&
AreContiguous(NI_Sve_GatherPrefetch16Bit, NI_Sve_GatherPrefetch32Bit, NI_Sve_GatherPrefetch64Bit,
NI_Sve_GatherPrefetch8Bit, NI_Sve_GatherVector, NI_Sve_GatherVectorByteZeroExtend,
NI_Sve_GatherVectorInt16SignExtend, NI_Sve_GatherVectorInt16WithByteOffsetsSignExtend,
NI_Sve_GatherVectorInt32SignExtend, NI_Sve_GatherVectorInt32WithByteOffsetsSignExtend,
NI_Sve_GatherVectorSByteSignExtend, NI_Sve_GatherVectorUInt16WithByteOffsetsZeroExtend,
NI_Sve_GatherVectorUInt16ZeroExtend, NI_Sve_GatherVectorUInt32WithByteOffsetsZeroExtend,
NI_Sve_GatherVectorUInt32ZeroExtend));
assert(varTypeIsI(addr) || (varTypeIsSIMD(addr) && ((intrinsicId >= NI_Sve_GatherPrefetch16Bit) &&
(intrinsicId <= NI_Sve_GatherVectorUInt32ZeroExtend))));
#else
assert(varTypeIsI(addr));
Expand Down
26 changes: 26 additions & 0 deletions src/coreclr/jit/gentree.h
Original file line number Diff line number Diff line change
Expand Up @@ -1761,6 +1761,7 @@ struct GenTree
inline bool IsVectorAllBitsSet() const;
inline bool IsVectorBroadcast(var_types simdBaseType) const;
inline bool IsMaskAllBitsSet() const;
inline bool IsMaskZero() const;
inline bool IsVectorConst();

inline uint64_t GetIntegralVectorConstElement(size_t index, var_types simdBaseType);
Expand Down Expand Up @@ -9680,6 +9681,31 @@ inline bool GenTree::IsMaskAllBitsSet() const
return false;
}

inline bool GenTree::IsMaskZero() const
{
#ifdef TARGET_ARM64
static_assert_no_msg(AreContiguous(NI_Sve_CreateFalseMaskByte, NI_Sve_CreateFalseMaskDouble,
NI_Sve_CreateFalseMaskInt16, NI_Sve_CreateFalseMaskInt32,
NI_Sve_CreateFalseMaskInt64, NI_Sve_CreateFalseMaskSByte,
NI_Sve_CreateFalseMaskSingle, NI_Sve_CreateFalseMaskUInt16,
NI_Sve_CreateFalseMaskUInt32, NI_Sve_CreateFalseMaskUInt64));

if (OperIsHWIntrinsic())
{
NamedIntrinsic id = AsHWIntrinsic()->GetHWIntrinsicId();
if (id == NI_Sve_ConvertMaskToVector)
{
GenTree* op1 = AsHWIntrinsic()->Op(1);
assert(op1->OperIsHWIntrinsic());
id = op1->AsHWIntrinsic()->GetHWIntrinsicId();
}
return ((id >= NI_Sve_CreateFalseMaskByte) && (id <= NI_Sve_CreateFalseMaskUInt64));
}
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved

#endif
return false;
}

//-------------------------------------------------------------------
// IsVectorConst: returns true if this node is a HWIntrinsic that represents a constant.
//
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/lower.h
Original file line number Diff line number Diff line change
Expand Up @@ -410,11 +410,11 @@ class Lowering final : public Phase
GenTree* LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cmpOp);
GenTree* LowerHWIntrinsicCreate(GenTreeHWIntrinsic* node);
GenTree* LowerHWIntrinsicDot(GenTreeHWIntrinsic* node);
GenTree* LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node);
#if defined(TARGET_XARCH)
void LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node);
GenTree* LowerHWIntrinsicToScalar(GenTreeHWIntrinsic* node);
GenTree* LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node);
GenTree* LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node);
GenTree* LowerHWIntrinsicTernaryLogic(GenTreeHWIntrinsic* node);
GenTree* LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node);
GenTree* TryLowerAndOpToResetLowestSetBit(GenTreeOp* andNode);
Expand Down
114 changes: 93 additions & 21 deletions src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1312,7 +1312,8 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
case NI_AdvSimd_FusedMultiplyAddScalar:
LowerHWIntrinsicFusedMultiplyAddScalar(node);
break;

case NI_Sve_ConditionalSelect:
return LowerHWIntrinsicCndSel(node);
default:
break;
}
Expand All @@ -1323,25 +1324,34 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
if (BlockRange().TryGetUse(node, &use))
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
{
GenTree* user = use.User();
// Wrap the intrinsic in ConditionalSelect only if it is not already inside another ConditionalSelect
if (!user->OperIsHWIntrinsic() || (user->AsHWIntrinsic()->GetHWIntrinsicId() != NI_Sve_ConditionalSelect))
{
CorInfoType simdBaseJitType = node->GetSimdBaseJitType();
unsigned simdSize = node->GetSimdSize();
var_types simdType = Compiler::getSIMDTypeForSize(simdSize);
GenTree* trueMask = comp->gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize);
GenTree* trueVal = node;
GenTree* falseVal = comp->gtNewZeroConNode(simdType);

GenTreeHWIntrinsic* condSelNode =
comp->gtNewSimdHWIntrinsicNode(simdType, trueMask, trueVal, falseVal, NI_Sve_ConditionalSelect,
simdBaseJitType, simdSize);

BlockRange().InsertBefore(node, trueMask);
BlockRange().InsertBefore(node, falseVal);
BlockRange().InsertAfter(node, condSelNode);
use.ReplaceWith(condSelNode);
}

JITDUMP("lowering EmbeddedMasked HWIntrinisic (before):\n");
DISPTREERANGE(BlockRange(), node);
JITDUMP("\n");

CorInfoType simdBaseJitType = node->GetSimdBaseJitType();
unsigned simdSize = node->GetSimdSize();
var_types simdType = Compiler::getSIMDTypeForSize(simdSize);
GenTree* trueMask = comp->gtNewSimdAllTrueMaskNode(simdBaseJitType, simdSize);
GenTree* falseVal = comp->gtNewZeroConNode(simdType);

BlockRange().InsertBefore(node, trueMask);
BlockRange().InsertBefore(node, falseVal);

GenTreeHWIntrinsic* condSelNode =
comp->gtNewSimdHWIntrinsicNode(simdType, trueMask, node, falseVal, NI_Sve_ConditionalSelect,
simdBaseJitType, simdSize);
BlockRange().InsertAfter(node, condSelNode);

use.ReplaceWith(condSelNode);

JITDUMP("lowering EmbeddedMasked HWIntrinisic (after):\n");
DISPTREERANGE(BlockRange(), condSelNode);
JITDUMP("\n");
}
else
{
assert(!"Embedded mask operation is not used anywhere.");
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
}
}

Expand Down Expand Up @@ -3369,7 +3379,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
}

// Handle op2
if (op2->OperIsHWIntrinsic())
if (op2->OperIsHWIntrinsic() && !op2->IsEmbMaskOp())
{
if (IsInvariantInRange(op2, node) && op2->isEmbeddedMaskingCompatibleHWIntrinsic())
{
Expand Down Expand Up @@ -3480,6 +3490,68 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
}
}
}
//----------------------------------------------------------------------------------------------
// Lowering::LowerHWIntrinsicCndSel: Lowers a Sve ConditionalSelect call
//
// Arguments:
// node - The hardware intrinsic node of the form
// ConditionalSelect(mask, trueValue, falseValue)
//
GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* cndSelNode)
{
assert(cndSelNode->OperIsHWIntrinsic(NI_Sve_ConditionalSelect));

GenTree* op1 = cndSelNode->Op(1);
GenTree* op2 = cndSelNode->Op(2);
GenTree* op3 = cndSelNode->Op(3);
GenTree* lowerCndSel = cndSelNode;

if (op2->OperIsHWIntrinsic(NI_Sve_ConditionalSelect))
{
// Handle cases where there is a nested ConditionalSelect for
// `trueValue`
GenTreeHWIntrinsic* nestedCndSel = op2->AsHWIntrinsic();
GenTree* nestedOp1 = nestedCndSel->Op(1);
assert(varTypeIsMask(nestedOp1));

if (nestedOp1->IsMaskAllBitsSet())
{
GenTree* nestedOp2 = nestedCndSel->Op(2);
GenTree* nestedOp3 = nestedCndSel->Op(3);

JITDUMP("lowering ConditionalSelect HWIntrinisic (before):\n");
DISPTREERANGE(BlockRange(), cndSelNode);
JITDUMP("\n");

// Transform:
//
// CndSel(mask, CndSel(AllTrue, embeddedMask(trueValOp2), trueValOp3), op3) to
// CndSel(mask, embedded(trueValOp2), op3)
//
cndSelNode->Op(2) = nestedCndSel->Op(2);
if (nestedOp3->IsMaskZero())
{
BlockRange().Remove(nestedOp3);
}
else
{
nestedOp3->SetUnusedValue();
}

nestedOp1->SetUnusedValue();
kunalspathak marked this conversation as resolved.
Show resolved Hide resolved
BlockRange().Remove(nestedCndSel);

JITDUMP("lowering ConditionalSelect HWIntrinisic (after):\n");
DISPTREERANGE(BlockRange(), cndSelNode);
JITDUMP("\n");

return cndSelNode;
}
}

ContainCheckHWIntrinsic(cndSelNode);
return cndSelNode->gtNext;
}
#endif // FEATURE_HW_INTRINSICS

#endif // TARGET_ARMARCH
Loading
Loading