Skip to content

Commit

Permalink
JIT: Added `Sve.LoadVectorNonTemporal/NonFaulting/128AndReplicateToVe…
Browse files Browse the repository at this point in the history
…ctor` APIs (#103392)

* Added Sve LoadVectorNonTemporal/NonFaulting/128AndReplicateToVector APIs

* Feedback
  • Loading branch information
TIHan committed Jun 14, 2024
1 parent 2b0e517 commit 5508f79
Show file tree
Hide file tree
Showing 10 changed files with 750 additions and 32 deletions.
9 changes: 9 additions & 0 deletions src/coreclr/jit/emitarm64sve.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4390,6 +4390,15 @@ void emitter::emitInsSve_R_R_R(instruction ins,
case INS_sve_ldnf1sh:
case INS_sve_ldnf1w:
case INS_sve_ldnf1sw:
case INS_sve_ldnf1d:
case INS_sve_ldnt1b:
case INS_sve_ldnt1h:
case INS_sve_ldnt1w:
case INS_sve_ldnt1d:
case INS_sve_ld1rqb:
case INS_sve_ld1rqh:
case INS_sve_ld1rqw:
case INS_sve_ld1rqd:
return emitIns_R_R_R_I(ins, size, reg1, reg2, reg3, 0, opt);

default:
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/gentree.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27476,6 +27476,8 @@ bool GenTreeHWIntrinsic::OperIsMemoryLoad(GenTree** pAddr) const
break;

case NI_Sve_LoadVector:
case NI_Sve_LoadVectorNonTemporal:
case NI_Sve_LoadVector128AndReplicateToVector:
case NI_Sve_LoadVectorByteZeroExtendToInt16:
case NI_Sve_LoadVectorByteZeroExtendToInt32:
case NI_Sve_LoadVectorByteZeroExtendToInt64:
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ HARDWARE_INTRINSIC(Sve, GetActiveElementCount,
HARDWARE_INTRINSIC(Sve, LeadingSignCount, -1, -1, false, {INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_sve_cls, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LeadingZeroCount, -1, -1, false, {INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_sve_clz, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, true, {INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorNonFaulting, -1, 1, true, {INS_sve_ldnf1b, INS_sve_ldnf1b, INS_sve_ldnf1h, INS_sve_ldnf1h, INS_sve_ldnf1w, INS_sve_ldnf1w, INS_sve_ldnf1d, INS_sve_ldnf1d, INS_sve_ldnf1w, INS_sve_ldnf1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorNonTemporal, -1, 2, true, {INS_sve_ldnt1b, INS_sve_ldnt1b, INS_sve_ldnt1h, INS_sve_ldnt1h, INS_sve_ldnt1w, INS_sve_ldnt1w, INS_sve_ldnt1d, INS_sve_ldnt1d, INS_sve_ldnt1w, INS_sve_ldnt1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVector128AndReplicateToVector, -1, 2, true, {INS_sve_ld1rqb, INS_sve_ld1rqb, INS_sve_ld1rqh, INS_sve_ld1rqh, INS_sve_ld1rqw, INS_sve_ld1rqw, INS_sve_ld1rqd, INS_sve_ld1rqd, INS_sve_ld1rqw, INS_sve_ld1rqd}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorByteNonFaultingZeroExtendToInt16, -1, 1, false, {INS_invalid, INS_invalid, INS_sve_ldnf1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorByteNonFaultingZeroExtendToInt32, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, LoadVectorByteNonFaultingZeroExtendToInt64, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ldnf1b, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1975,6 +1975,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
{
switch (intrin.id)
{
case NI_Sve_LoadVectorNonTemporal:
case NI_Sve_LoadVector128AndReplicateToVector:
case NI_Sve_StoreAndZip:
case NI_Sve_PrefetchBytes:
case NI_Sve_PrefetchInt16:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1842,6 +1842,189 @@ internal Arm64() { }
public static unsafe Vector<double> LoadVector(Vector<double> mask, double* address) { throw new PlatformNotSupportedException(); }


/// <summary>
/// svuint8_t svldnf1[_u8](svbool_t pg, const uint8_t *base)
/// LDNF1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<byte> LoadVectorNonFaulting(byte* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat64_t svldnf1[_f64](svbool_t pg, const float64_t *base)
/// LDNF1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<double> LoadVectorNonFaulting(double* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint16_t svldnf1[_s16](svbool_t pg, const int16_t *base)
/// LDNF1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<short> LoadVectorNonFaulting(short* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svldnf1[_s32](svbool_t pg, const int32_t *base)
/// LDNF1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<int> LoadVectorNonFaulting(int* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svldnf1[_s64](svbool_t pg, const int64_t *base)
/// LDNF1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<long> LoadVectorNonFaulting(long* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint8_t svldnf1[_s8](svbool_t pg, const int8_t *base)
/// LDNF1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<sbyte> LoadVectorNonFaulting(sbyte* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat32_t svldnf1[_f32](svbool_t pg, const float32_t *base)
/// LDNF1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<float> LoadVectorNonFaulting(float* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint16_t svldnf1[_u16](svbool_t pg, const uint16_t *base)
/// LDNF1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<ushort> LoadVectorNonFaulting(ushort* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svldnf1[_u32](svbool_t pg, const uint32_t *base)
/// LDNF1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<uint> LoadVectorNonFaulting(uint* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svldnf1[_u64](svbool_t pg, const uint64_t *base)
/// LDNF1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<ulong> LoadVectorNonFaulting(ulong* address) { throw new PlatformNotSupportedException(); }


/// <summary>
/// svuint8_t svldnt1[_u8](svbool_t pg, const uint8_t *base)
/// LDNT1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<byte> LoadVectorNonTemporal(Vector<byte> mask, byte* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat64_t svldnt1[_f64](svbool_t pg, const float64_t *base)
/// LDNT1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<double> LoadVectorNonTemporal(Vector<double> mask, double* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint16_t svldnt1[_s16](svbool_t pg, const int16_t *base)
/// LDNT1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<short> LoadVectorNonTemporal(Vector<short> mask, short* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svldnt1[_s32](svbool_t pg, const int32_t *base)
/// LDNT1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<int> LoadVectorNonTemporal(Vector<int> mask, int* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svldnt1[_s64](svbool_t pg, const int64_t *base)
/// LDNT1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<long> LoadVectorNonTemporal(Vector<long> mask, long* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint8_t svldnt1[_s8](svbool_t pg, const int8_t *base)
/// LDNT1B Zresult.B, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<sbyte> LoadVectorNonTemporal(Vector<sbyte> mask, sbyte* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat32_t svldnt1[_f32](svbool_t pg, const float32_t *base)
/// LDNT1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<float> LoadVectorNonTemporal(Vector<float> mask, float* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint16_t svldnt1[_u16](svbool_t pg, const uint16_t *base)
/// LDNT1H Zresult.H, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<ushort> LoadVectorNonTemporal(Vector<ushort> mask, ushort* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svldnt1[_u32](svbool_t pg, const uint32_t *base)
/// LDNT1W Zresult.S, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<uint> LoadVectorNonTemporal(Vector<uint> mask, uint* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svldnt1[_u64](svbool_t pg, const uint64_t *base)
/// LDNT1D Zresult.D, Pg/Z, [Xbase, #0, MUL VL]
/// </summary>
public static unsafe Vector<ulong> LoadVectorNonTemporal(Vector<ulong> mask, ulong* address) { throw new PlatformNotSupportedException(); }


/// <summary>
/// svuint8_t svld1rq[_u8](svbool_t pg, const uint8_t *base)
/// LD1RQB Zresult.B, Pg/Z, [Xbase, #0]
/// </summary>
public static unsafe Vector<byte> LoadVector128AndReplicateToVector(Vector<byte> mask, byte* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat64_t svld1rq[_f64](svbool_t pg, const float64_t *base)
/// LD1RQD Zresult.D, Pg/Z, [Xbase, #0]
/// </summary>
public static unsafe Vector<double> LoadVector128AndReplicateToVector(Vector<double> mask, double* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint16_t svld1rq[_s16](svbool_t pg, const int16_t *base)
/// LD1RQH Zresult.H, Pg/Z, [Xbase, #0]
/// </summary>
public static unsafe Vector<short> LoadVector128AndReplicateToVector(Vector<short> mask, short* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svld1rq[_s32](svbool_t pg, const int32_t *base)
/// LD1RQW Zresult.S, Pg/Z, [Xbase, #0]
/// </summary>
public static unsafe Vector<int> LoadVector128AndReplicateToVector(Vector<int> mask, int* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svld1rq[_s64](svbool_t pg, const int64_t *base)
/// LD1RQD Zresult.D, Pg/Z, [Xbase, #0]
/// </summary>
public static unsafe Vector<long> LoadVector128AndReplicateToVector(Vector<long> mask, long* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint8_t svld1rq[_s8](svbool_t pg, const int8_t *base)
/// LD1RQB Zresult.B, Pg/Z, [Xbase, #0]
/// </summary>
public static unsafe Vector<sbyte> LoadVector128AndReplicateToVector(Vector<sbyte> mask, sbyte* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat32_t svld1rq[_f32](svbool_t pg, const float32_t *base)
/// LD1RQW Zresult.S, Pg/Z, [Xbase, #0]
/// </summary>
public static unsafe Vector<float> LoadVector128AndReplicateToVector(Vector<float> mask, float* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint16_t svld1rq[_u16](svbool_t pg, const uint16_t *base)
/// LD1RQH Zresult.H, Pg/Z, [Xbase, #0]
/// </summary>
public static unsafe Vector<ushort> LoadVector128AndReplicateToVector(Vector<ushort> mask, ushort* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svld1rq[_u32](svbool_t pg, const uint32_t *base)
/// LD1RQW Zresult.S, Pg/Z, [Xbase, #0]
/// </summary>
public static unsafe Vector<uint> LoadVector128AndReplicateToVector(Vector<uint> mask, uint* address) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svld1rq[_u64](svbool_t pg, const uint64_t *base)
/// LD1RQD Zresult.D, Pg/Z, [Xbase, #0]
/// </summary>
public static unsafe Vector<ulong> LoadVector128AndReplicateToVector(Vector<ulong> mask, ulong* address) { throw new PlatformNotSupportedException(); }


/// LoadVectorByteZeroExtendToInt16 : Load 8-bit data and zero-extend

/// <summary>
Expand Down
Loading

0 comments on commit 5508f79

Please sign in to comment.