Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 31 additions & 4 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28311,8 +28311,8 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
case NI_Sve_GatherVectorUInt32WithByteOffsetsZeroExtendFirstFaulting:
case NI_Sve_GatherVectorUInt32ZeroExtend:
case NI_Sve_GatherVectorUInt32ZeroExtendFirstFaulting:
case NI_Sve_GatherVectorWithByteOffsetFirstFaulting:
case NI_Sve_GatherVectorWithByteOffsets:
case NI_Sve_GatherVectorWithByteOffsetFirstFaulting:
case NI_Sve_LoadVector:
case NI_Sve_LoadVectorNonTemporal:
case NI_Sve_LoadVector128AndReplicateToVector:
Expand Down Expand Up @@ -28375,6 +28375,18 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
case NI_Sve_LoadVectorUInt16NonFaultingZeroExtendToUInt64:
case NI_Sve_LoadVectorUInt32NonFaultingZeroExtendToInt64:
case NI_Sve_LoadVectorUInt32NonFaultingZeroExtendToUInt64:
case NI_Sve2_GatherVectorByteZeroExtendNonTemporal:
case NI_Sve2_GatherVectorInt16SignExtendNonTemporal:
case NI_Sve2_GatherVectorInt16WithByteOffsetsSignExtendNonTemporal:
case NI_Sve2_GatherVectorInt32SignExtendNonTemporal:
case NI_Sve2_GatherVectorInt32WithByteOffsetsSignExtendNonTemporal:
case NI_Sve2_GatherVectorNonTemporal:
case NI_Sve2_GatherVectorSByteSignExtendNonTemporal:
case NI_Sve2_GatherVectorUInt16WithByteOffsetsZeroExtendNonTemporal:
case NI_Sve2_GatherVectorUInt16ZeroExtendNonTemporal:
case NI_Sve2_GatherVectorUInt32WithByteOffsetsZeroExtendNonTemporal:
case NI_Sve2_GatherVectorUInt32ZeroExtendNonTemporal:
case NI_Sve2_GatherVectorWithByteOffsetsNonTemporal:
addr = Op(2);
break;

Expand Down Expand Up @@ -28467,9 +28479,24 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
NI_Sve_GatherVectorUInt32WithByteOffsetsZeroExtend,
NI_Sve_GatherVectorUInt32WithByteOffsetsZeroExtendFirstFaulting,
NI_Sve_GatherVectorUInt32ZeroExtend, NI_Sve_GatherVectorUInt32ZeroExtendFirstFaulting));
assert(varTypeIsI(addr) ||
(varTypeIsSIMD(addr) && ((intrinsicId >= NI_Sve_GatherVector) &&
(intrinsicId <= NI_Sve_GatherVectorUInt32ZeroExtendFirstFaulting))));

static_assert(AreContiguous(NI_Sve2_GatherVectorByteZeroExtendNonTemporal,
NI_Sve2_GatherVectorInt16SignExtendNonTemporal,
NI_Sve2_GatherVectorInt16WithByteOffsetsSignExtendNonTemporal,
NI_Sve2_GatherVectorInt32SignExtendNonTemporal,
NI_Sve2_GatherVectorInt32WithByteOffsetsSignExtendNonTemporal,
NI_Sve2_GatherVectorNonTemporal, NI_Sve2_GatherVectorSByteSignExtendNonTemporal,
NI_Sve2_GatherVectorUInt16WithByteOffsetsZeroExtendNonTemporal,
NI_Sve2_GatherVectorUInt16ZeroExtendNonTemporal,
NI_Sve2_GatherVectorUInt32WithByteOffsetsZeroExtendNonTemporal,
NI_Sve2_GatherVectorUInt32ZeroExtendNonTemporal,
NI_Sve2_GatherVectorWithByteOffsetsNonTemporal));

bool isSveGatherLoad =
(intrinsicId >= NI_Sve_GatherVector) && (intrinsicId <= NI_Sve_GatherVectorUInt32ZeroExtendFirstFaulting);
bool isSve2GatherLoad = (intrinsicId >= NI_Sve2_GatherVectorByteZeroExtendNonTemporal) &&
(intrinsicId <= NI_Sve2_GatherVectorWithByteOffsetsNonTemporal);
assert(varTypeIsI(addr) || (varTypeIsSIMD(addr) && (isSveGatherLoad || isSve2GatherLoad)));
#else
assert(varTypeIsI(addr));
#endif
Expand Down
16 changes: 14 additions & 2 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2488,9 +2488,21 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
case NI_Sve_GatherVectorUInt32WithByteOffsetsZeroExtend:
case NI_Sve_GatherVectorUInt32WithByteOffsetsZeroExtendFirstFaulting:
case NI_Sve_GatherVectorUInt32ZeroExtend:
case NI_Sve_GatherVectorWithByteOffsetFirstFaulting:
case NI_Sve_GatherVectorWithByteOffsets:
case NI_Sve_GatherVectorUInt32ZeroExtendFirstFaulting:
case NI_Sve_GatherVectorWithByteOffsets:
case NI_Sve_GatherVectorWithByteOffsetFirstFaulting:
case NI_Sve2_GatherVectorByteZeroExtendNonTemporal:
case NI_Sve2_GatherVectorInt16SignExtendNonTemporal:
case NI_Sve2_GatherVectorInt16WithByteOffsetsSignExtendNonTemporal:
case NI_Sve2_GatherVectorInt32SignExtendNonTemporal:
case NI_Sve2_GatherVectorInt32WithByteOffsetsSignExtendNonTemporal:
case NI_Sve2_GatherVectorNonTemporal:
case NI_Sve2_GatherVectorSByteSignExtendNonTemporal:
case NI_Sve2_GatherVectorUInt16WithByteOffsetsZeroExtendNonTemporal:
case NI_Sve2_GatherVectorUInt16ZeroExtendNonTemporal:
case NI_Sve2_GatherVectorUInt32WithByteOffsetsZeroExtendNonTemporal:
case NI_Sve2_GatherVectorUInt32ZeroExtendNonTemporal:
case NI_Sve2_GatherVectorWithByteOffsetsNonTemporal:
assert(varTypeIsSIMD(op3->TypeGet()));
if (numArgs == 3)
{
Expand Down
46 changes: 46 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2331,6 +2331,52 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}

case NI_Sve2_GatherVectorByteZeroExtendNonTemporal:
case NI_Sve2_GatherVectorInt16SignExtendNonTemporal:
case NI_Sve2_GatherVectorInt16WithByteOffsetsSignExtendNonTemporal:
case NI_Sve2_GatherVectorInt32SignExtendNonTemporal:
case NI_Sve2_GatherVectorInt32WithByteOffsetsSignExtendNonTemporal:
case NI_Sve2_GatherVectorNonTemporal:
case NI_Sve2_GatherVectorSByteSignExtendNonTemporal:
case NI_Sve2_GatherVectorUInt16WithByteOffsetsZeroExtendNonTemporal:
case NI_Sve2_GatherVectorUInt16ZeroExtendNonTemporal:
case NI_Sve2_GatherVectorUInt32WithByteOffsetsZeroExtendNonTemporal:
case NI_Sve2_GatherVectorUInt32ZeroExtendNonTemporal:
case NI_Sve2_GatherVectorWithByteOffsetsNonTemporal:
{
if (!varTypeIsSIMD(intrin.op2->gtType))
{
// GatherVector...(Vector<T> mask, T* address, Vector<T2> offsets)

// Calculate the byte offsets if using indices.
if ((intrin.id == NI_Sve2_GatherVectorInt16SignExtendNonTemporal) ||
(intrin.id == NI_Sve2_GatherVectorUInt16ZeroExtendNonTemporal))
{
GetEmitter()->emitIns_R_R_I(INS_sve_lsl, emitSize, op3Reg, op3Reg, 1, opt);
}
else if ((intrin.id == NI_Sve2_GatherVectorInt32SignExtendNonTemporal) ||
(intrin.id == NI_Sve2_GatherVectorUInt32ZeroExtendNonTemporal))
{
GetEmitter()->emitIns_R_R_I(INS_sve_lsl, emitSize, op3Reg, op3Reg, 2, opt);
}
else if (intrin.id == NI_Sve2_GatherVectorNonTemporal)
{
assert(emitActualTypeSize(intrin.baseType) == 8);
GetEmitter()->emitIns_R_R_I(INS_sve_lsl, emitSize, op3Reg, op3Reg, 3, opt);
}

// op2Reg and op3Reg are swapped
GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op3Reg, op2Reg, opt);
}
else
{
// GatherVector...(Vector<T> mask, Vector<T2> addresses)

GetEmitter()->emitIns_R_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, REG_ZR, opt);
}
break;
}

case NI_Sve_ReverseElement:
// Use non-predicated version explicitly
GetEmitter()->emitIns_R_R(ins, emitSize, targetReg, op1Reg, opt);
Expand Down
12 changes: 12 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -349,6 +349,18 @@ HARDWARE_INTRINSIC(Sve2, DotProductRotateComplexBySelectedIndex,
HARDWARE_INTRINSIC(Sve2, FusedAddHalving, -1, -1, {INS_sve_shadd, INS_sve_uhadd, INS_sve_shadd, INS_sve_uhadd, INS_sve_shadd, INS_sve_uhadd, INS_sve_shadd, INS_sve_uhadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve2, FusedAddRoundedHalving, -1, -1, {INS_sve_srhadd, INS_sve_urhadd, INS_sve_srhadd, INS_sve_urhadd, INS_sve_srhadd, INS_sve_urhadd, INS_sve_srhadd, INS_sve_urhadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve2, FusedSubtractHalving, -1, -1, {INS_sve_shsub, INS_sve_uhsub, INS_sve_shsub, INS_sve_uhsub, INS_sve_shsub, INS_sve_uhsub, INS_sve_shsub, INS_sve_uhsub, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve2, GatherVectorByteZeroExtendNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1b, INS_sve_ldnt1b, INS_sve_ldnt1b, INS_sve_ldnt1b, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation)
HARDWARE_INTRINSIC(Sve2, GatherVectorInt16SignExtendNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1sh, INS_sve_ldnt1sh, INS_sve_ldnt1sh, INS_sve_ldnt1sh, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation)
HARDWARE_INTRINSIC(Sve2, GatherVectorInt16WithByteOffsetsSignExtendNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1sh, INS_sve_ldnt1sh, INS_sve_ldnt1sh, INS_sve_ldnt1sh, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation)
HARDWARE_INTRINSIC(Sve2, GatherVectorInt32SignExtendNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1sw, INS_sve_ldnt1sw, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation)
HARDWARE_INTRINSIC(Sve2, GatherVectorInt32WithByteOffsetsSignExtendNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1sw, INS_sve_ldnt1sw, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation)
HARDWARE_INTRINSIC(Sve2, GatherVectorNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1w, INS_sve_ldnt1w, INS_sve_ldnt1d, INS_sve_ldnt1d, INS_sve_ldnt1w, INS_sve_ldnt1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation)
HARDWARE_INTRINSIC(Sve2, GatherVectorSByteSignExtendNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1sb, INS_sve_ldnt1sb, INS_sve_ldnt1sb, INS_sve_ldnt1sb, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation)
HARDWARE_INTRINSIC(Sve2, GatherVectorUInt16WithByteOffsetsZeroExtendNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1h, INS_sve_ldnt1h, INS_sve_ldnt1h, INS_sve_ldnt1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation)
HARDWARE_INTRINSIC(Sve2, GatherVectorUInt16ZeroExtendNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1h, INS_sve_ldnt1h, INS_sve_ldnt1h, INS_sve_ldnt1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation)
HARDWARE_INTRINSIC(Sve2, GatherVectorUInt32WithByteOffsetsZeroExtendNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1w, INS_sve_ldnt1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation)
HARDWARE_INTRINSIC(Sve2, GatherVectorUInt32ZeroExtendNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1w, INS_sve_ldnt1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation)
HARDWARE_INTRINSIC(Sve2, GatherVectorWithByteOffsetsNonTemporal, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnt1w, INS_sve_ldnt1w, INS_sve_ldnt1d, INS_sve_ldnt1d, INS_sve_ldnt1w, INS_sve_ldnt1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_ZeroingMaskedOperation)
HARDWARE_INTRINSIC(Sve2, InterleavingXorEvenOdd, -1, 3, {INS_sve_eorbt, INS_sve_eorbt, INS_sve_eorbt, INS_sve_eorbt, INS_sve_eorbt, INS_sve_eorbt, INS_sve_eorbt, INS_sve_eorbt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve2, InterleavingXorOddEven, -1, 3, {INS_sve_eortb, INS_sve_eortb, INS_sve_eortb, INS_sve_eortb, INS_sve_eortb, INS_sve_eortb, INS_sve_eortb, INS_sve_eortb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve2, Log2, -1, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_flogb, INS_invalid, INS_sve_flogb, INS_invalid, INS_sve_flogb, INS_sve_flogb}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
Expand Down
Loading
Loading