Skip to content

Commit

Permalink
JIT: Added SVE APIs - Test*, ExtractVector (#103739)
Browse files Browse the repository at this point in the history
* Initial work for Test* and Extract* SVE apis

* Properly handling Test APIs

* Added Sve.ExtractVector tests and they pass

* Added more extract tests. Still getting asserts.

* Fixed tests. Still working on assertion failures for LastScalar.

* Removed ExtractLast and ExtractAfterLast APIs for now. Added more test coverage for TestAnyTrue, TestFirstTrue, TestLastTrue.

* Fixing test

* Fixed test again. Formatting.

* Feedback

* Fix ordering
  • Loading branch information
TIHan authored Jun 25, 2024
1 parent 64f7f95 commit 31cc133
Show file tree
Hide file tree
Showing 14 changed files with 1,197 additions and 15 deletions.
1 change: 1 addition & 0 deletions src/coreclr/jit/emitarm64sve.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2727,6 +2727,7 @@ void emitter::emitInsSve_R_R_I(instruction ins,
if (sopt == INS_SCALABLE_OPTS_WITH_VECTOR_PAIR)
{
fmt = IF_SVE_BQ_2A;
unreached(); // Not supported yet.
}
else
{
Expand Down
43 changes: 28 additions & 15 deletions src/coreclr/jit/hwintrinsic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1900,27 +1900,40 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
{
assert(numArgs > 0);
GenTree* op1 = retNode->AsHWIntrinsic()->Op(1);
if (intrinsic == NI_Sve_ConditionalSelect)

switch (intrinsic)
{
if (op1->IsVectorAllBitsSet() || op1->IsMaskAllBitsSet())
{
return retNode->AsHWIntrinsic()->Op(2);
}
else if (op1->IsVectorZero())
case NI_Sve_ConditionalSelect:
{
return retNode->AsHWIntrinsic()->Op(3);
if (op1->IsVectorAllBitsSet() || op1->IsMaskAllBitsSet())
{
return retNode->AsHWIntrinsic()->Op(2);
}
else if (op1->IsVectorZero())
{
return retNode->AsHWIntrinsic()->Op(3);
}
break;
}
}
else if (intrinsic == NI_Sve_GetActiveElementCount)
{
GenTree* op2 = retNode->AsHWIntrinsic()->Op(2);

// HWInstrinsic requires a mask for op2
if (!varTypeIsMask(op2))
case NI_Sve_GetActiveElementCount:
case NI_Sve_TestAnyTrue:
case NI_Sve_TestFirstTrue:
case NI_Sve_TestLastTrue:
{
retNode->AsHWIntrinsic()->Op(2) =
gtNewSimdCvtVectorToMaskNode(TYP_MASK, op2, simdBaseJitType, simdSize);
GenTree* op2 = retNode->AsHWIntrinsic()->Op(2);

// HWInstrinsic requires a mask for op2
if (!varTypeIsMask(op2))
{
retNode->AsHWIntrinsic()->Op(2) =
gtNewSimdCvtVectorToMaskNode(TYP_MASK, op2, simdBaseJitType, simdSize);
}
break;
}

default:
break;
}

if (!varTypeIsMask(op1))
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/hwintrinsicarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -403,6 +403,7 @@ void HWIntrinsicInfo::lookupImmBounds(
case NI_AdvSimd_Arm64_InsertSelectedScalar:
case NI_Sve_FusedMultiplyAddBySelectedScalar:
case NI_Sve_FusedMultiplySubtractBySelectedScalar:
case NI_Sve_ExtractVector:
immUpperBound = Compiler::getSIMDVectorLength(simdSize, baseType) - 1;
break;

Expand Down
30 changes: 30 additions & 0 deletions src/coreclr/jit/hwintrinsiccodegenarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2111,6 +2111,36 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}

case NI_Sve_TestAnyTrue:
case NI_Sve_TestFirstTrue:
case NI_Sve_TestLastTrue:
assert(targetReg == REG_NA);
GetEmitter()->emitIns_R_R(ins, EA_SCALABLE, op1Reg, op2Reg, INS_OPTS_SCALABLE_B);
break;

case NI_Sve_ExtractVector:
{
assert(isRMW);

if (targetReg != op1Reg)
{
assert(targetReg != op2Reg);

GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, op1Reg);
}

HWIntrinsicImmOpHelper helper(this, intrin.op3, node);

for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd())
{
const int elementIndex = helper.ImmValue();
const int byteIndex = genTypeSize(intrin.baseType) * elementIndex;

GetEmitter()->emitIns_R_R_I(ins, emitSize, targetReg, op2Reg, byteIndex, INS_OPTS_SCALABLE_B);
}
break;
}

case NI_Sve_InsertIntoShiftedVector:
{
assert(isRMW);
Expand Down
4 changes: 4 additions & 0 deletions src/coreclr/jit/hwintrinsiclistarm64sve.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask8Bit,
HARDWARE_INTRINSIC(Sve, Divide, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdiv, INS_sve_udiv, INS_sve_sdiv, INS_sve_udiv, INS_sve_fdiv, INS_sve_fdiv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, DotProduct, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdot, INS_sve_udot, INS_sve_sdot, INS_sve_udot, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasRMWSemantics)
HARDWARE_INTRINSIC(Sve, DotProductBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdot, INS_sve_udot, INS_sve_sdot, INS_sve_udot, INS_invalid, INS_invalid}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_LowVectorOperation)
HARDWARE_INTRINSIC(Sve, ExtractVector, -1, 3, true, {INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext, INS_sve_ext}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, FusedMultiplyAdd, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, FusedMultiplyAddBySelectedScalar, -1, 4, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmla, INS_sve_fmla}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_HasRMWSemantics|HW_Flag_FmaIntrinsic|HW_Flag_LowVectorOperation)
HARDWARE_INTRINSIC(Sve, FusedMultiplyAddNegated, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fnmla, INS_sve_fnmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen)
Expand Down Expand Up @@ -204,6 +205,9 @@ HARDWARE_INTRINSIC(Sve, StoreNarrowing,
HARDWARE_INTRINSIC(Sve, StoreNonTemporal, -1, 3, true, {INS_sve_stnt1b, INS_sve_stnt1b, INS_sve_stnt1h, INS_sve_stnt1h, INS_sve_stnt1w, INS_sve_stnt1w, INS_sve_stnt1d, INS_sve_stnt1d, INS_sve_stnt1w, INS_sve_stnt1d}, HW_Category_MemoryStore, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_ExplicitMaskedOperation|HW_Flag_SpecialCodeGen|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, Subtract, -1, 2, true, {INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_fsub, INS_sve_fsub}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, SubtractSaturate, -1, 2, true, {INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_sve_sqsub, INS_sve_uqsub, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
HARDWARE_INTRINSIC(Sve, TestAnyTrue, -1, 2, true, {INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, TestFirstTrue, -1, 2, true, {INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, TestLastTrue, -1, 2, true, {INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_sve_ptest, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, TransposeEven, -1, 2, true, {INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1, INS_sve_trn1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, TransposeOdd, -1, 2, true, {INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2, INS_sve_trn2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Sve, UnzipEven, -1, 2, true, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
Expand Down
22 changes: 22 additions & 0 deletions src/coreclr/jit/lowerarmarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1266,6 +1266,27 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
return LowerHWIntrinsicCmpOp(node, GT_NE);
}

case NI_Sve_TestAnyTrue:
{
LowerNodeCC(node, GenCondition::NE);
node->gtType = TYP_VOID;
return node->gtNext;
}

case NI_Sve_TestFirstTrue:
{
LowerNodeCC(node, GenCondition::SLT);
node->gtType = TYP_VOID;
return node->gtNext;
}

case NI_Sve_TestLastTrue:
{
LowerNodeCC(node, GenCondition::ULT);
node->gtType = TYP_VOID;
return node->gtNext;
}

case NI_Vector128_WithLower:
case NI_Vector128_WithUpper:
{
Expand Down Expand Up @@ -3192,6 +3213,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
case NI_Sve_PrefetchInt16:
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
case NI_Sve_ExtractVector:
assert(hasImmediateOperand);
assert(varTypeIsIntegral(intrin.op3));
if (intrin.op3->IsCnsIntOrI())
Expand Down
1 change: 1 addition & 0 deletions src/coreclr/jit/lsraarm64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1445,6 +1445,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
case NI_Sve_PrefetchInt16:
case NI_Sve_PrefetchInt32:
case NI_Sve_PrefetchInt64:
case NI_Sve_ExtractVector:
needBranchTargetReg = !intrin.op3->isContainedIntOrIImmed();
break;

Expand Down
Loading

0 comments on commit 31cc133

Please sign in to comment.