Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for Sve.Splice() #103567

Merged
merged 11 commits into from
Jun 25, 2024
10 changes: 6 additions & 4 deletions src/coreclr/jit/codegenarm64test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6237,10 +6237,12 @@ void CodeGen::genArm64EmitterUnitTestsSve()
INS_OPTS_SCALABLE_D); // REVW <Zd>.D, <Pg>/M, <Zn>.D

// IF_SVE_CV_3A
theEmitter->emitIns_R_R_R(INS_sve_splice, EA_SCALABLE, REG_V0, REG_P0, REG_V30, INS_OPTS_SCALABLE_B,
INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // SPLICE <Zd>.<T>, <Pv>, {<Zn1>.<T>, <Zn2>.<T>}
theEmitter->emitIns_R_R_R(INS_sve_splice, EA_SCALABLE, REG_V3, REG_P7, REG_V27, INS_OPTS_SCALABLE_D,
INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // SPLICE <Zd>.<T>, <Pv>, {<Zn1>.<T>, <Zn2>.<T>}
// TODO-SVE: Currently not supporting the constructive version of splice. Uncomment the tests on closing
// https://github.com/dotnet/runtime/issues/103850.
// theEmitter->emitIns_R_R_R(INS_sve_splice, EA_SCALABLE, REG_V0,REG_P0, REG_V30, INS_OPTS_SCALABLE_B,
// INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // SPLICE <Zd>.<T>, <Pv>, {<Zn1>.<T>, <Zn2>.<T>}
// theEmitter->emitIns_R_R_R(INS_sve_splice, EA_SCALABLE, REG_V3, REG_P7, REG_V27, INS_OPTS_SCALABLE_D,
// INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // SPLICE <Zd>.<T>, <Pv>, {<Zn1>.<T>, <Zn2>.<T>}

// IF_SVE_CV_3B
theEmitter->emitIns_R_R_R(INS_sve_splice, EA_SCALABLE, REG_V1, REG_P1, REG_V29,
Expand Down
11 changes: 7 additions & 4 deletions src/coreclr/jit/emitarm64sve.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3858,6 +3858,9 @@ void emitter::emitInsSve_R_R_R(instruction ins,
assert(isLowPredicateRegister(reg2));
assert(isVectorRegister(reg3));
assert(insOptsScalableStandard(opt));
// TODO-SVE: We currently support only the destructive version of splice. Remove the following assert when
// the constructive version is added, as described in https://github.com/dotnet/runtime/issues/103850.
assert(sopt != INS_SCALABLE_OPTS_WITH_VECTOR_PAIR);
fmt = (sopt == INS_SCALABLE_OPTS_WITH_VECTOR_PAIR) ? IF_SVE_CV_3A : IF_SVE_CV_3B;
break;

Expand Down Expand Up @@ -10294,7 +10297,7 @@ BYTE* emitter::emitOutput_InstrSve(BYTE* dst, instrDesc* id)
dst += emitOutput_Instr(dst, code);
break;

case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (destructive)
case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (constructive)
case IF_SVE_CV_3B: // ........xx...... ...VVVmmmmmddddd -- SVE vector splice (destructive)
code = emitInsCodeSve(ins, fmt);
code |= insEncodeReg_V<4, 0>(id->idReg1()); // ddddd
Expand Down Expand Up @@ -13257,7 +13260,7 @@ void emitter::emitInsSveSanityCheck(instrDesc* id)
assert(isScalableVectorSize(id->idOpSize()));
break;

case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (destructive)
case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (constructive)
case IF_SVE_CV_3B: // ........xx...... ...VVVmmmmmddddd -- SVE vector splice (destructive)
assert(isScalableVectorSize(id->idOpSize())); // xx
assert(insOptsScalableStandard(id->idInsOpt()));
Expand Down Expand Up @@ -14944,7 +14947,7 @@ void emitter::emitDispInsSveHelp(instrDesc* id)
break;

// <Zd>.<T>, <Pv>, {<Zn1>.<T>, <Zn2>.<T>}
case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (destructive)
case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (constructive)
emitDispSveReg(id->idReg1(), id->idInsOpt(), true); // ddddd
emitDispPredicateReg(id->idReg2(), insGetPredicateType(fmt), id->idInsOpt(), true); // VVV
emitDispSveConsecutiveRegList(id->idReg3(), insGetSveReg1ListSize(ins), id->idInsOpt(), false); // nnnnn
Expand Down Expand Up @@ -16805,7 +16808,7 @@ void emitter::getInsSveExecutionCharacteristics(instrDesc* id, insExecutionChara
result.insLatency = PERFSCORE_LATENCY_140C;
break;

case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (destructive)
case IF_SVE_CV_3A: // ........xx...... ...VVVnnnnnddddd -- SVE vector splice (constructive)
case IF_SVE_CV_3B: // ........xx...... ...VVVmmmmmddddd -- SVE vector splice (destructive)
result.insLatency = PERFSCORE_LATENCY_3C;
result.insThroughput = PERFSCORE_THROUGHPUT_1C;
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/emitfmtsarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ IF_DEF(SVE_CR_3A, IS_NONE, NONE) // SVE_CR_3A ........xx...... ...gggnnnnnddd
IF_DEF(SVE_CS_3A, IS_NONE, NONE) // SVE_CS_3A ........xx...... ...gggnnnnnddddd -- SVE extract element to general register
IF_DEF(SVE_CT_3A, IS_NONE, NONE) // SVE_CT_3A ................ ...gggnnnnnddddd -- SVE reverse doublewords
IF_DEF(SVE_CU_3A, IS_NONE, NONE) // SVE_CU_3A ........xx...... ...gggnnnnnddddd -- SVE reverse within elements
IF_DEF(SVE_CV_3A, IS_NONE, NONE) // SVE_CV_3A ........xx...... ...VVVnnnnnddddd -- SVE vector splice (destructive)
IF_DEF(SVE_CV_3A, IS_NONE, NONE) // SVE_CV_3A ........xx...... ...VVVnnnnnddddd -- SVE vector splice (constructive)
IF_DEF(SVE_CV_3B, IS_NONE, NONE) // SVE_CV_3B ........xx...... ...VVVmmmmmddddd -- SVE vector splice (destructive)
IF_DEF(SVE_CW_4A, IS_NONE, NONE) // SVE_CW_4A ........xx.mmmmm ..VVVVnnnnnddddd -- SVE select vector elements (predicated)
IF_DEF(SVE_CX_4A, IS_NONE, NONE) // SVE_CX_4A ........xx.mmmmm ...gggnnnnn.DDDD -- SVE integer compare vectors
Expand Down
28 changes: 22 additions & 6 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -853,15 +853,31 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
case 3:
if (isRMW)
{
if (targetReg != op1Reg)
if (HWIntrinsicInfo::IsExplicitMaskedOperation(intrin.id))
{
assert(targetReg != op2Reg);
assert(targetReg != op3Reg);
if (targetReg != op2Reg)
SwapnilGaikwad marked this conversation as resolved.
Show resolved Hide resolved
{
assert(targetReg != op1Reg);
assert(targetReg != op3Reg);

GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg,
/* canSkip */ true);
GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op2Reg,
/* canSkip */ true);
}

GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op3Reg, opt);
}
else
{
if (targetReg != op1Reg)
{
assert(targetReg != op2Reg);
assert(targetReg != op3Reg);

GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, op1Reg,
/* canSkip */ true);
}
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op2Reg, op3Reg, opt);
}
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op2Reg, op3Reg, opt);
}
else
{
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ HARDWARE_INTRINSIC(Sve, SignExtend32,
HARDWARE_INTRINSIC(Sve, SignExtend8, -1, -1, false, {INS_invalid, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SignExtendWideningLower, -1, 1, true, {INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_sve_sunpklo, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, SignExtendWideningUpper, -1, 1, true, {INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_sve_sunpkhi, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Sve, Splice, -1, 3, true, {INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice, INS_sve_splice}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, Sqrt, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fsqrt, INS_sve_fsqrt}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, StoreAndZip, -1, 3, true, {INS_sve_st1b, INS_sve_st1b, INS_sve_st1h, INS_sve_st1h, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_sve_st1w, INS_sve_st1d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, StoreNarrowing, -1, 3, true, {INS_sve_st1b, INS_sve_st1b, INS_sve_st1h, INS_sve_st1h, INS_sve_st1w, INS_sve_st1w, INS_sve_st1d, INS_sve_st1d, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation)
Expand Down
39 changes: 35 additions & 4 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1508,6 +1508,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
const bool isRMW = intrinsicTree->isRMWHWIntrinsic(compiler);

bool tgtPrefOp1 = false;
bool tgtPrefOp2 = false;
bool delayFreeMultiple = false;
if (intrin.op1 != nullptr)
{
Expand Down Expand Up @@ -1562,9 +1563,19 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou

// If we have an RMW intrinsic or an intrinsic with simple move semantic between two SIMD registers,
// we want to preference op1Reg to the target if op1 is not contained.
if (isRMW || simdRegToSimdRegMove)

if ((isRMW || simdRegToSimdRegMove))
{
tgtPrefOp1 = !intrin.op1->isContained();
if (HWIntrinsicInfo::IsExplicitMaskedOperation(intrin.id))
{
assert(!simdRegToSimdRegMove);
// Prefer op2Reg for the masked operation as mask would be the op1Reg
tgtPrefOp2 = !intrin.op1->isContained();
}
else
{
tgtPrefOp1 = !intrin.op1->isContained();
}
}

if (delayFreeMultiple)
Expand Down Expand Up @@ -1947,6 +1958,19 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
(argNum == lowVectorOperandNum) ? lowVectorCandidates : RBM_NONE);
}
}
else if (tgtPrefOp2)
{
if (!intrin.op2->isContained())
{
assert(tgtPrefUse == nullptr);
tgtPrefUse2 = BuildUse(intrin.op2);
srcCount++;
}
else
{
srcCount += BuildOperandUses(intrin.op2);
}
}
else
{
switch (intrin.id)
Expand Down Expand Up @@ -1990,12 +2014,19 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
{
SingleTypeRegSet candidates = lowVectorOperandNum == 3 ? lowVectorCandidates : RBM_NONE;

srcCount += isRMW ? BuildDelayFreeUses(intrin.op3, intrin.op1, candidates)
: BuildOperandUses(intrin.op3, candidates);
if (isRMW)
{
srcCount += BuildDelayFreeUses(intrin.op3, (tgtPrefOp2 ? intrin.op2 : intrin.op1), candidates);
}
else
{
srcCount += BuildOperandUses(intrin.op3, candidates);
}

if (intrin.op4 != nullptr)
{
assert(lowVectorOperandNum != 4);
assert(!tgtPrefOp2);
srcCount += isRMW ? BuildDelayFreeUses(intrin.op4, intrin.op1) : BuildOperandUses(intrin.op4);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5496,6 +5496,69 @@ internal Arm64() { }
public static unsafe Vector<long> SignExtend8(Vector<long> value) { throw new PlatformNotSupportedException(); }


/// Splice two vectors under predicate control

/// <summary>
/// svuint8_t svsplice[_u8](svbool_t pg, svuint8_t op1, svuint8_t op2)
/// SPLICE Ztied1.B, Pg, Ztied1.B, Zop2.B
/// </summary>
public static unsafe Vector<byte> Splice(Vector<byte> mask, Vector<byte> left, Vector<byte> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat64_t svsplice[_f64](svbool_t pg, svfloat64_t op1, svfloat64_t op2)
/// SPLICE Ztied1.D, Pg, Ztied1.D, Zop2.D
/// </summary>
public static unsafe Vector<double> Splice(Vector<double> mask, Vector<double> left, Vector<double> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint16_t svsplice[_s16](svbool_t pg, svint16_t op1, svint16_t op2)
/// SPLICE Ztied1.H, Pg, Ztied1.H, Zop2.H
/// </summary>
public static unsafe Vector<short> Splice(Vector<short> mask, Vector<short> left, Vector<short> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint32_t svsplice[_s32](svbool_t pg, svint32_t op1, svint32_t op2)
/// SPLICE Ztied1.S, Pg, Ztied1.S, Zop2.S
/// </summary>
public static unsafe Vector<int> Splice(Vector<int> mask, Vector<int> left, Vector<int> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint64_t svsplice[_s64](svbool_t pg, svint64_t op1, svint64_t op2)
/// SPLICE Ztied1.D, Pg, Ztied1.D, Zop2.D
/// </summary>
public static unsafe Vector<long> Splice(Vector<long> mask, Vector<long> left, Vector<long> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svint8_t svsplice[_s8](svbool_t pg, svint8_t op1, svint8_t op2)
/// SPLICE Ztied1.B, Pg, Ztied1.B, Zop2.B
/// </summary>
public static unsafe Vector<sbyte> Splice(Vector<sbyte> mask, Vector<sbyte> left, Vector<sbyte> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svfloat32_t svsplice[_f32](svbool_t pg, svfloat32_t op1, svfloat32_t op2)
/// SPLICE Ztied1.S, Pg, Ztied1.S, Zop2.S
/// </summary>
public static unsafe Vector<float> Splice(Vector<float> mask, Vector<float> left, Vector<float> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint16_t svsplice[_u16](svbool_t pg, svuint16_t op1, svuint16_t op2)
/// SPLICE Ztied1.H, Pg, Ztied1.H, Zop2.H
/// </summary>
public static unsafe Vector<ushort> Splice(Vector<ushort> mask, Vector<ushort> left, Vector<ushort> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint32_t svsplice[_u32](svbool_t pg, svuint32_t op1, svuint32_t op2)
/// SPLICE Ztied1.S, Pg, Ztied1.S, Zop2.S
/// </summary>
public static unsafe Vector<uint> Splice(Vector<uint> mask, Vector<uint> left, Vector<uint> right) { throw new PlatformNotSupportedException(); }

/// <summary>
/// svuint64_t svsplice[_u64](svbool_t pg, svuint64_t op1, svuint64_t op2)
/// SPLICE Ztied1.D, Pg, Ztied1.D, Zop2.D
/// </summary>
public static unsafe Vector<ulong> Splice(Vector<ulong> mask, Vector<ulong> left, Vector<ulong> right) { throw new PlatformNotSupportedException(); }


/// Sqrt : Square root

/// <summary>
Expand Down
Loading
Loading