Skip to content

Commit 717e940

Browse files
a74nhmichaelgsharp
authored andcommitted
JIT ARM64-SVE: Add CreateWhileLessThan* (dotnet#100949)
* JIT ARM64-SVE: Add CreateWhileLessThan* * Set simdBaseJitType to type of input args * Hardcode opt in codegen * Fix gtNewSimdConvertMaskToVectorNode types * Use HW_Flag_BaseTypeFromFirstArg * Set base type to return type and auxiliary type to input type
1 parent baf1ea1 commit 717e940

File tree

9 files changed

+810
-2
lines changed

9 files changed

+810
-2
lines changed

src/coreclr/jit/hwintrinsic.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -1539,6 +1539,17 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
15391539
}
15401540
break;
15411541

1542+
case NI_Sve_CreateWhileLessThanMask8Bit:
1543+
case NI_Sve_CreateWhileLessThanOrEqualMask8Bit:
1544+
case NI_Sve_CreateWhileLessThanMask16Bit:
1545+
case NI_Sve_CreateWhileLessThanOrEqualMask16Bit:
1546+
case NI_Sve_CreateWhileLessThanMask32Bit:
1547+
case NI_Sve_CreateWhileLessThanOrEqualMask32Bit:
1548+
case NI_Sve_CreateWhileLessThanMask64Bit:
1549+
case NI_Sve_CreateWhileLessThanOrEqualMask64Bit:
1550+
retNode->AsHWIntrinsic()->SetAuxiliaryJitType(sigReader.op1JitType);
1551+
break;
1552+
15421553
default:
15431554
break;
15441555
}

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

+34
Original file line numberDiff line numberDiff line change
@@ -1409,6 +1409,40 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
14091409
GetEmitter()->emitIns_R_PATTERN(ins, emitSize, targetReg, opt, SVE_PATTERN_ALL);
14101410
break;
14111411

1412+
case NI_Sve_CreateWhileLessThanMask8Bit:
1413+
case NI_Sve_CreateWhileLessThanMask16Bit:
1414+
case NI_Sve_CreateWhileLessThanMask32Bit:
1415+
case NI_Sve_CreateWhileLessThanMask64Bit:
1416+
{
1417+
// Emit size and instruction is based on the scalar operands.
1418+
var_types auxType = node->GetAuxiliaryType();
1419+
emitSize = emitActualTypeSize(auxType);
1420+
if (varTypeIsUnsigned(auxType))
1421+
{
1422+
ins = INS_sve_whilelo;
1423+
}
1424+
1425+
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt);
1426+
break;
1427+
}
1428+
1429+
case NI_Sve_CreateWhileLessThanOrEqualMask8Bit:
1430+
case NI_Sve_CreateWhileLessThanOrEqualMask16Bit:
1431+
case NI_Sve_CreateWhileLessThanOrEqualMask32Bit:
1432+
case NI_Sve_CreateWhileLessThanOrEqualMask64Bit:
1433+
{
1434+
// Emit size and instruction is based on the scalar operands.
1435+
var_types auxType = node->GetAuxiliaryType();
1436+
emitSize = emitActualTypeSize(auxType);
1437+
if (varTypeIsUnsigned(auxType))
1438+
{
1439+
ins = INS_sve_whilels;
1440+
}
1441+
1442+
GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt);
1443+
break;
1444+
}
1445+
14121446
default:
14131447
unreached();
14141448
}

src/coreclr/jit/hwintrinsiclistarm64sve.h

+8-2
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,14 @@ HARDWARE_INTRINSIC(Sve, CreateTrueMaskSingle,
3030
HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt16, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
3131
HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt32, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
3232
HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt64, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask)
33-
33+
HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask16Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
34+
HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask32Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
35+
HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask64Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
36+
HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask8Bit, -1, 2, false, {INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
37+
HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask16Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
38+
HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask32Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
39+
HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask64Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
40+
HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask8Bit, -1, 2, false, {INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
3441
HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, true, {INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
3542
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt16, -1, 2, false, {INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
3643
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
@@ -58,7 +65,6 @@ HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToInt64,
5865
HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
5966

6067

61-
6268
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
6369
// ISA Function name SIMD size NumArg EncodesExtraTypeArg Instructions Category Flags
6470
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}

0 commit comments

Comments
 (0)