Skip to content

Commit

Permalink
Add support for Sve.ReverseBits() and Sve.Splice()
Browse files Browse the repository at this point in the history
  • Loading branch information
SwapnilGaikwad committed Jun 17, 2024
1 parent bcd9cb8 commit 2e693e6
Show file tree
Hide file tree
Showing 9 changed files with 1,066 additions and 9 deletions.
28 changes: 22 additions & 6 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -840,15 +840,31 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
case 3:
if (isRMW)
{
if (targetReg != op1Reg)
if (HWIntrinsicInfo::IsExplicitMaskedOperation(intrin.id))
{
assert(targetReg != op2Reg);
assert(targetReg != op3Reg);
if (targetReg != op2Reg)
{
assert(targetReg != op1Reg);
assert(targetReg != op3Reg);

GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg,
/* canSkip */ true);
GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op2Reg,
/* canSkip */ true);
}

GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op3Reg, opt);
}
else
{
if (targetReg != op1Reg)
{
assert(targetReg != op2Reg);
assert(targetReg != op3Reg);

GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg,
/* canSkip */ true);
}
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op2Reg, op3Reg, opt);
}
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op2Reg, op3Reg, opt);
}
else
{
Expand Down
2 changes: 2 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ HARDWARE_INTRINSIC(Sve, PrefetchBytes,
HARDWARE_INTRINSIC(Sve, PrefetchInt16, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_prfh, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand)
HARDWARE_INTRINSIC(Sve, PrefetchInt32, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfw, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand)
HARDWARE_INTRINSIC(Sve, PrefetchInt64, -1, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_prfd, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasEnumOperand)
HARDWARE_INTRINSIC(Sve, ReverseBits, -1, -1, false, {INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_sve_rbit, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, ReverseElement, -1, 1, true, {INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev, INS_sve_rev}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, ReverseElement16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_revh, INS_sve_revh, INS_sve_revh, INS_sve_revh, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, ReverseElement32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_revw, INS_sve_revw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
Expand All @@ -184,6 +185,7 @@ HARDWARE_INTRINSIC(Sve, SignExtend32,
HARDWARE_INTRINSIC(Sve, SignExtend8, -1, -1, false, {INS_invalid, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SignExtendWideningLower, -1, 1, true, {INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, SignExtendWideningUpper, -1, 1, true, {INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, Splice, -1, 3, true, {INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, StoreAndZip, -1, 3, true, {INS_sve_st1b, INS_sve_st1b, INS_sve_st1h, INS_sve_st1h, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_sve_st1w, INS_sve_st1d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, StoreNarrowing, -1, 3, true, {INS_sve_st1b, INS_sve_st1b, INS_sve_st1h, INS_sve_st1h, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, StoreNonTemporal, -1, 3, true, {INS_sve_stnt1b, INS_sve_stnt1b, INS_sve_stnt1h, INS_sve_stnt1h, INS_sve_stnt1w, INS_sve_stnt1w, INS_sve_stnt1d, INS_sve_stnt1d, INS_sve_stnt1w, INS_sve_stnt1d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation)
Expand Down
34 changes: 31 additions & 3 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1514,6 +1514,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
const bool isRMW = intrinsicTree->isRMWHWIntrinsic(compiler);

bool tgtPrefOp1 = false;
bool op2IsTarget = (isRMW && HWIntrinsicInfo::IsExplicitMaskedOperation(intrin.id));
bool delayFreeMultiple = false;
if (intrin.op1 != nullptr)
{
Expand Down Expand Up @@ -1568,7 +1569,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou

// If we have an RMW intrinsic or an intrinsic with simple move semantic between two SIMD registers,
// we want to preference op1Reg to the target if op1 is not contained.
if (isRMW || simdRegToSimdRegMove)

if ((isRMW || simdRegToSimdRegMove) && !HWIntrinsicInfo::IsExplicitMaskedOperation(intrin.id))
{
tgtPrefOp1 = !intrin.op1->isContained();
}
Expand Down Expand Up @@ -1617,6 +1619,12 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
predMask = RBM_LOWMASK.GetPredicateRegSet();
}

if (HWIntrinsicInfo::IsExplicitMaskedOperation(intrin.id) && isRMW)
{
srcCount += BuildDelayFreeUses(intrin.op1, intrin.op2, predMask);
}
else
{
srcCount += BuildOperandUses(intrin.op1, predMask);
}
}
Expand Down Expand Up @@ -1978,6 +1986,18 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
(argNum == lowVectorOperandNum) ? lowVectorCandidates : RBM_NONE);
}
}
else if (op2IsTarget)
{
if (!intrin.op2->isContained())
{
tgtPrefUse = BuildUse(intrin.op2);
srcCount ++;
}
else
{
srcCount += BuildOperandUses(intrin.op2);
}
}
else
{
switch (intrin.id)
Expand Down Expand Up @@ -2021,12 +2041,20 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
{
SingleTypeRegSet candidates = lowVectorOperandNum == 3 ? lowVectorCandidates : RBM_NONE;

srcCount += isRMW ? BuildDelayFreeUses(intrin.op3, intrin.op1, candidates)
: BuildOperandUses(intrin.op3, candidates);
if (op2IsTarget)
{
srcCount += BuildDelayFreeUses(intrin.op3, intrin.op2, candidates);
}
else
{
srcCount += isRMW ? BuildDelayFreeUses(intrin.op3, intrin.op1, candidates)
: BuildOperandUses(intrin.op3, candidates);
}

if (intrin.op4 != nullptr)
{
assert(lowVectorOperandNum != 4);
assert(!op2IsTarget);
srcCount += isRMW ? BuildDelayFreeUses(intrin.op4, intrin.op1) : BuildOperandUses(intrin.op4);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4207,6 +4207,57 @@ internal Arm64() { }
public static unsafe void PrefetchInt64(Vector<ulong> mask, void* address, [ConstantExpected] SvePrefetchType prefetchType) { throw new PlatformNotSupportedException(); }


/// Reverse bits

/// <summary>
/// svuint8_t svrbit[_u8]_m(svuint8_t inactive, svbool_t pg, svuint8_t op)
/// RBIT Ztied.B, Pg/M, Zop.B
/// </summary>
public static unsafe Vector<byte> ReverseBits(Vector<byte> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint16_t svrbit[_s16]_m(svint16_t inactive, svbool_t pg, svint16_t op)
/// RBIT Ztied.H, Pg/M, Zop.H
/// </summary>
public static unsafe Vector<short> ReverseBits(Vector<short> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svrbit[_s32]_m(svint32_t inactive, svbool_t pg, svint32_t op)
/// RBIT Ztied.S, Pg/M, Zop.S
/// </summary>
public static unsafe Vector<int> ReverseBits(Vector<int> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svrbit[_s64]_m(svint64_t inactive, svbool_t pg, svint64_t op)
/// RBIT Ztied.D, Pg/M, Zop.D
/// </summary>
public static unsafe Vector<long> ReverseBits(Vector<long> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint8_t svrbit[_s8]_m(svint8_t inactive, svbool_t pg, svint8_t op)
/// RBIT Ztied.B, Pg/M, Zop.B
/// </summary>
public static unsafe Vector<sbyte> ReverseBits(Vector<sbyte> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint16_t svrbit[_u16]_m(svuint16_t inactive, svbool_t pg, svuint16_t op)
/// RBIT Ztied.H, Pg/M, Zop.H
/// </summary>
public static unsafe Vector<ushort> ReverseBits(Vector<ushort> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svrbit[_u32]_m(svuint32_t inactive, svbool_t pg, svuint32_t op)
/// RBIT Ztied.S, Pg/M, Zop.S
/// </summary>
public static unsafe Vector<uint> ReverseBits(Vector<uint> value) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svrbit[_u64]_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
/// RBIT Ztied.D, Pg/M, Zop.D
/// </summary>
public static unsafe Vector<ulong> ReverseBits(Vector<ulong> value) { throw new PlatformNotSupportedException(); }


/// Reverse all elements

/// <summary>
Expand Down Expand Up @@ -5002,6 +5053,69 @@ internal Arm64() { }
public static unsafe Vector<long> SignExtend8(Vector<long> value) { throw new PlatformNotSupportedException(); }


/// Splice two vectors under predicate control

/// <summary>
/// svuint8_t svsplice[_u8](svbool_t pg, svuint8_t op1, svuint8_t op2)
/// SPLICE Ztied1.B, Pg, Ztied1.B, Zop2.B
/// </summary>
public static unsafe Vector<byte> Splice(Vector<byte> mask, Vector<byte> left, Vector<byte> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat64_t svsplice[_f64](svbool_t pg, svfloat64_t op1, svfloat64_t op2)
/// SPLICE Ztied1.D, Pg, Ztied1.D, Zop2.D
/// </summary>
public static unsafe Vector<double> Splice(Vector<double> mask, Vector<double> left, Vector<double> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint16_t svsplice[_s16](svbool_t pg, svint16_t op1, svint16_t op2)
/// SPLICE Ztied1.H, Pg, Ztied1.H, Zop2.H
/// </summary>
public static unsafe Vector<short> Splice(Vector<short> mask, Vector<short> left, Vector<short> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svsplice[_s32](svbool_t pg, svint32_t op1, svint32_t op2)
/// SPLICE Ztied1.S, Pg, Ztied1.S, Zop2.S
/// </summary>
public static unsafe Vector<int> Splice(Vector<int> mask, Vector<int> left, Vector<int> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svsplice[_s64](svbool_t pg, svint64_t op1, svint64_t op2)
/// SPLICE Ztied1.D, Pg, Ztied1.D, Zop2.D
/// </summary>
public static unsafe Vector<long> Splice(Vector<long> mask, Vector<long> left, Vector<long> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint8_t svsplice[_s8](svbool_t pg, svint8_t op1, svint8_t op2)
/// SPLICE Ztied1.B, Pg, Ztied1.B, Zop2.B
/// </summary>
public static unsafe Vector<sbyte> Splice(Vector<sbyte> mask, Vector<sbyte> left, Vector<sbyte> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat32_t svsplice[_f32](svbool_t pg, svfloat32_t op1, svfloat32_t op2)
/// SPLICE Ztied1.S, Pg, Ztied1.S, Zop2.S
/// </summary>
public static unsafe Vector<float> Splice(Vector<float> mask, Vector<float> left, Vector<float> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint16_t svsplice[_u16](svbool_t pg, svuint16_t op1, svuint16_t op2)
/// SPLICE Ztied1.H, Pg, Ztied1.H, Zop2.H
/// </summary>
public static unsafe Vector<ushort> Splice(Vector<ushort> mask, Vector<ushort> left, Vector<ushort> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svsplice[_u32](svbool_t pg, svuint32_t op1, svuint32_t op2)
/// SPLICE Ztied1.S, Pg, Ztied1.S, Zop2.S
/// </summary>
public static unsafe Vector<uint> Splice(Vector<uint> mask, Vector<uint> left, Vector<uint> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svsplice[_u64](svbool_t pg, svuint64_t op1, svuint64_t op2)
/// SPLICE Ztied1.D, Pg, Ztied1.D, Zop2.D
/// </summary>
public static unsafe Vector<ulong> Splice(Vector<ulong> mask, Vector<ulong> left, Vector<ulong> right) { throw new PlatformNotSupportedException(); }


/// Non-truncating store

/// <summary>
Expand Down
Loading

0 comments on commit 2e693e6

Please sign in to comment.