Skip to content

Commit b36d67e

Browse files
kunalspathakmichaelgsharp
authored andcommitted
Arm64/Sve: Add SignExtend* and ZeroExtend* math APIs (dotnet#101702)
* Add [Sign|Zero]Extend[8|16|32] APIs: * Add API to instruction mapping * eliminate extra movprfx for AllBitsSetMask * Add test cases
1 parent a578366 commit b36d67e

File tree

7 files changed

+386
-10
lines changed

7 files changed

+386
-10
lines changed

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

+1-2
Original file line numberDiff line numberDiff line change
@@ -448,14 +448,13 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
448448
if (intrin.op3->isContained())
449449
{
450450
assert(intrin.op3->IsVectorZero());
451-
if (intrin.op1->isContained())
451+
if (intrin.op1->isContained() || intrin.op1->IsMaskAllBitsSet())
452452
{
453453
// We already skip importing ConditionalSelect if op1 == trueAll, however
454454
// if we still see it here, it is because we wrapped the predicated instruction
455455
// inside ConditionalSelect.
456456
// As such, no need to move the `falseReg` to `targetReg`
457457
// because the predicated instruction will eventually set it.
458-
assert(intrin.op1->IsMaskAllBitsSet());
459458
}
460459
else
461460
{

src/coreclr/jit/hwintrinsiclistarm64sve.h

+6-1
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,15 @@ HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt64,
6969
HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
7070
HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
7171
HARDWARE_INTRINSIC(Sve, Multiply, -1, 2, true, {INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics)
72+
HARDWARE_INTRINSIC(Sve, SignExtend16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxth, INS_invalid, INS_sve_sxth, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
73+
HARDWARE_INTRINSIC(Sve, SignExtend32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sxtw, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
74+
HARDWARE_INTRINSIC(Sve, SignExtend8, -1, -1, false, {INS_invalid, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_sve_sxtb, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
7275
HARDWARE_INTRINSIC(Sve, Subtract, -1, 2, true, {INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_fsub, INS_sve_fsub}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics)
73-
7476
HARDWARE_INTRINSIC(Sve, UnzipEven, -1, 2, true, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
7577
HARDWARE_INTRINSIC(Sve, UnzipOdd, -1, 2, true, {INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
78+
HARDWARE_INTRINSIC(Sve, ZeroExtend16, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_uxth, INS_invalid, INS_sve_uxth, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
79+
HARDWARE_INTRINSIC(Sve, ZeroExtend32, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_uxtw, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
80+
HARDWARE_INTRINSIC(Sve, ZeroExtend8, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_uxtb, INS_invalid, INS_sve_uxtb, INS_invalid, INS_sve_uxtb, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation)
7681
HARDWARE_INTRINSIC(Sve, ZipHigh, -1, 2, true, {INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2, INS_sve_zip2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
7782
HARDWARE_INTRINSIC(Sve, ZipLow, -1, 2, true, {INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1, INS_sve_zip1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
7883

src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs

+158
Original file line numberDiff line numberDiff line change
@@ -1010,6 +1010,86 @@ internal Arm64() { }
10101010
/// </summary>
10111011
public static unsafe Vector<double> Multiply(Vector<double> left, Vector<double> right) { throw new PlatformNotSupportedException(); }
10121012

1013+
/// SignExtend16 : Sign-extend the low 16 bits
1014+
1015+
/// <summary>
1016+
/// svint32_t svexth[_s32]_m(svint32_t inactive, svbool_t pg, svint32_t op)
1017+
/// SXTH Ztied.S, Pg/M, Zop.S
1018+
/// MOVPRFX Zresult, Zinactive; SXTH Zresult.S, Pg/M, Zop.S
1019+
/// svint32_t svexth[_s32]_x(svbool_t pg, svint32_t op)
1020+
/// SXTH Ztied.S, Pg/M, Ztied.S
1021+
/// MOVPRFX Zresult, Zop; SXTH Zresult.S, Pg/M, Zop.S
1022+
/// svint32_t svexth[_s32]_z(svbool_t pg, svint32_t op)
1023+
/// MOVPRFX Zresult.S, Pg/Z, Zop.S; SXTH Zresult.S, Pg/M, Zop.S
1024+
/// </summary>
1025+
public static unsafe Vector<int> SignExtend16(Vector<int> value) { throw new PlatformNotSupportedException(); }
1026+
1027+
/// <summary>
1028+
/// svint64_t svexth[_s64]_m(svint64_t inactive, svbool_t pg, svint64_t op)
1029+
/// SXTH Ztied.D, Pg/M, Zop.D
1030+
/// MOVPRFX Zresult, Zinactive; SXTH Zresult.D, Pg/M, Zop.D
1031+
/// svint64_t svexth[_s64]_x(svbool_t pg, svint64_t op)
1032+
/// SXTH Ztied.D, Pg/M, Ztied.D
1033+
/// MOVPRFX Zresult, Zop; SXTH Zresult.D, Pg/M, Zop.D
1034+
/// svint64_t svexth[_s64]_z(svbool_t pg, svint64_t op)
1035+
/// MOVPRFX Zresult.D, Pg/Z, Zop.D; SXTH Zresult.D, Pg/M, Zop.D
1036+
/// </summary>
1037+
public static unsafe Vector<long> SignExtend16(Vector<long> value) { throw new PlatformNotSupportedException(); }
1038+
1039+
1040+
/// SignExtend32 : Sign-extend the low 32 bits
1041+
1042+
/// <summary>
1043+
/// svint64_t svextw[_s64]_m(svint64_t inactive, svbool_t pg, svint64_t op)
1044+
/// SXTW Ztied.D, Pg/M, Zop.D
1045+
/// MOVPRFX Zresult, Zinactive; SXTW Zresult.D, Pg/M, Zop.D
1046+
/// svint64_t svextw[_s64]_x(svbool_t pg, svint64_t op)
1047+
/// SXTW Ztied.D, Pg/M, Ztied.D
1048+
/// MOVPRFX Zresult, Zop; SXTW Zresult.D, Pg/M, Zop.D
1049+
/// svint64_t svextw[_s64]_z(svbool_t pg, svint64_t op)
1050+
/// MOVPRFX Zresult.D, Pg/Z, Zop.D; SXTW Zresult.D, Pg/M, Zop.D
1051+
/// </summary>
1052+
public static unsafe Vector<long> SignExtend32(Vector<long> value) { throw new PlatformNotSupportedException(); }
1053+
1054+
1055+
/// SignExtend8 : Sign-extend the low 8 bits
1056+
1057+
/// <summary>
1058+
/// svint16_t svextb[_s16]_m(svint16_t inactive, svbool_t pg, svint16_t op)
1059+
/// SXTB Ztied.H, Pg/M, Zop.H
1060+
/// MOVPRFX Zresult, Zinactive; SXTB Zresult.H, Pg/M, Zop.H
1061+
/// svint16_t svextb[_s16]_x(svbool_t pg, svint16_t op)
1062+
/// SXTB Ztied.H, Pg/M, Ztied.H
1063+
/// MOVPRFX Zresult, Zop; SXTB Zresult.H, Pg/M, Zop.H
1064+
/// svint16_t svextb[_s16]_z(svbool_t pg, svint16_t op)
1065+
/// MOVPRFX Zresult.H, Pg/Z, Zop.H; SXTB Zresult.H, Pg/M, Zop.H
1066+
/// </summary>
1067+
public static unsafe Vector<short> SignExtend8(Vector<short> value) { throw new PlatformNotSupportedException(); }
1068+
1069+
/// <summary>
1070+
/// svint32_t svextb[_s32]_m(svint32_t inactive, svbool_t pg, svint32_t op)
1071+
/// SXTB Ztied.S, Pg/M, Zop.S
1072+
/// MOVPRFX Zresult, Zinactive; SXTB Zresult.S, Pg/M, Zop.S
1073+
/// svint32_t svextb[_s32]_x(svbool_t pg, svint32_t op)
1074+
/// SXTB Ztied.S, Pg/M, Ztied.S
1075+
/// MOVPRFX Zresult, Zop; SXTB Zresult.S, Pg/M, Zop.S
1076+
/// svint32_t svextb[_s32]_z(svbool_t pg, svint32_t op)
1077+
/// MOVPRFX Zresult.S, Pg/Z, Zop.S; SXTB Zresult.S, Pg/M, Zop.S
1078+
/// </summary>
1079+
public static unsafe Vector<int> SignExtend8(Vector<int> value) { throw new PlatformNotSupportedException(); }
1080+
1081+
/// <summary>
1082+
/// svint64_t svextb[_s64]_m(svint64_t inactive, svbool_t pg, svint64_t op)
1083+
/// SXTB Ztied.D, Pg/M, Zop.D
1084+
/// MOVPRFX Zresult, Zinactive; SXTB Zresult.D, Pg/M, Zop.D
1085+
/// svint64_t svextb[_s64]_x(svbool_t pg, svint64_t op)
1086+
/// SXTB Ztied.D, Pg/M, Ztied.D
1087+
/// MOVPRFX Zresult, Zop; SXTB Zresult.D, Pg/M, Zop.D
1088+
/// svint64_t svextb[_s64]_z(svbool_t pg, svint64_t op)
1089+
/// MOVPRFX Zresult.D, Pg/Z, Zop.D; SXTB Zresult.D, Pg/M, Zop.D
1090+
/// </summary>
1091+
public static unsafe Vector<long> SignExtend8(Vector<long> value) { throw new PlatformNotSupportedException(); }
1092+
10131093
/// Subtract : Subtract
10141094

10151095
/// <summary>
@@ -1248,6 +1328,84 @@ internal Arm64() { }
12481328
/// </summary>
12491329
public static unsafe Vector<ulong> UnzipOdd(Vector<ulong> left, Vector<ulong> right) { throw new PlatformNotSupportedException(); }
12501330

1331+
/// ZeroExtend16 : Zero-extend the low 16 bits
1332+
1333+
/// <summary>
1334+
/// svuint32_t svexth[_u32]_m(svuint32_t inactive, svbool_t pg, svuint32_t op)
1335+
/// UXTH Ztied.S, Pg/M, Zop.S
1336+
/// MOVPRFX Zresult, Zinactive; UXTH Zresult.S, Pg/M, Zop.S
1337+
/// svuint32_t svexth[_u32]_x(svbool_t pg, svuint32_t op)
1338+
/// UXTH Ztied.S, Pg/M, Ztied.S
1339+
/// AND Ztied.S, Ztied.S, #65535
1340+
/// svuint32_t svexth[_u32]_z(svbool_t pg, svuint32_t op)
1341+
/// MOVPRFX Zresult.S, Pg/Z, Zop.S; UXTH Zresult.S, Pg/M, Zop.S
1342+
/// </summary>
1343+
public static unsafe Vector<uint> ZeroExtend16(Vector<uint> value) { throw new PlatformNotSupportedException(); }
1344+
1345+
/// <summary>
1346+
/// svuint64_t svexth[_u64]_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
1347+
/// UXTH Ztied.D, Pg/M, Zop.D
1348+
/// MOVPRFX Zresult, Zinactive; UXTH Zresult.D, Pg/M, Zop.D
1349+
/// svuint64_t svexth[_u64]_x(svbool_t pg, svuint64_t op)
1350+
/// UXTH Ztied.D, Pg/M, Ztied.D
1351+
/// AND Ztied.D, Ztied.D, #65535
1352+
/// svuint64_t svexth[_u64]_z(svbool_t pg, svuint64_t op)
1353+
/// MOVPRFX Zresult.D, Pg/Z, Zop.D; UXTH Zresult.D, Pg/M, Zop.D
1354+
/// </summary>
1355+
public static unsafe Vector<ulong> ZeroExtend16(Vector<ulong> value) { throw new PlatformNotSupportedException(); }
1356+
1357+
1358+
/// ZeroExtend32 : Zero-extend the low 32 bits
1359+
1360+
/// <summary>
1361+
/// svuint64_t svextw[_u64]_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
1362+
/// UXTW Ztied.D, Pg/M, Zop.D
1363+
/// MOVPRFX Zresult, Zinactive; UXTW Zresult.D, Pg/M, Zop.D
1364+
/// svuint64_t svextw[_u64]_x(svbool_t pg, svuint64_t op)
1365+
/// UXTW Ztied.D, Pg/M, Ztied.D
1366+
/// AND Ztied.D, Ztied.D, #4294967295
1367+
/// svuint64_t svextw[_u64]_z(svbool_t pg, svuint64_t op)
1368+
/// MOVPRFX Zresult.D, Pg/Z, Zop.D; UXTW Zresult.D, Pg/M, Zop.D
1369+
/// </summary>
1370+
public static unsafe Vector<ulong> ZeroExtend32(Vector<ulong> value) { throw new PlatformNotSupportedException(); }
1371+
1372+
/// ZeroExtend8 : Zero-extend the low 8 bits
1373+
1374+
/// <summary>
1375+
/// svuint16_t svextb[_u16]_m(svuint16_t inactive, svbool_t pg, svuint16_t op)
1376+
/// UXTB Ztied.H, Pg/M, Zop.H
1377+
/// MOVPRFX Zresult, Zinactive; UXTB Zresult.H, Pg/M, Zop.H
1378+
/// svuint16_t svextb[_u16]_x(svbool_t pg, svuint16_t op)
1379+
/// UXTB Ztied.H, Pg/M, Ztied.H
1380+
/// AND Ztied.H, Ztied.H, #255
1381+
/// svuint16_t svextb[_u16]_z(svbool_t pg, svuint16_t op)
1382+
/// MOVPRFX Zresult.H, Pg/Z, Zop.H; UXTB Zresult.H, Pg/M, Zop.H
1383+
/// </summary>
1384+
public static unsafe Vector<ushort> ZeroExtend8(Vector<ushort> value) { throw new PlatformNotSupportedException(); }
1385+
1386+
/// <summary>
1387+
/// svuint32_t svextb[_u32]_m(svuint32_t inactive, svbool_t pg, svuint32_t op)
1388+
/// UXTB Ztied.S, Pg/M, Zop.S
1389+
/// MOVPRFX Zresult, Zinactive; UXTB Zresult.S, Pg/M, Zop.S
1390+
/// svuint32_t svextb[_u32]_x(svbool_t pg, svuint32_t op)
1391+
/// UXTB Ztied.S, Pg/M, Ztied.S
1392+
/// AND Ztied.S, Ztied.S, #255
1393+
/// svuint32_t svextb[_u32]_z(svbool_t pg, svuint32_t op)
1394+
/// MOVPRFX Zresult.S, Pg/Z, Zop.S; UXTB Zresult.S, Pg/M, Zop.S
1395+
/// </summary>
1396+
public static unsafe Vector<uint> ZeroExtend8(Vector<uint> value) { throw new PlatformNotSupportedException(); }
1397+
1398+
/// <summary>
1399+
/// svuint64_t svextb[_u64]_m(svuint64_t inactive, svbool_t pg, svuint64_t op)
1400+
/// UXTB Ztied.D, Pg/M, Zop.D
1401+
/// MOVPRFX Zresult, Zinactive; UXTB Zresult.D, Pg/M, Zop.D
1402+
/// svuint64_t svextb[_u64]_x(svbool_t pg, svuint64_t op)
1403+
/// UXTB Ztied.D, Pg/M, Ztied.D
1404+
/// AND Ztied.D, Ztied.D, #255
1405+
/// svuint64_t svextb[_u64]_z(svbool_t pg, svuint64_t op)
1406+
/// MOVPRFX Zresult.D, Pg/Z, Zop.D; UXTB Zresult.D, Pg/M, Zop.D
1407+
/// </summary>
1408+
public static unsafe Vector<ulong> ZeroExtend8(Vector<ulong> value) { throw new PlatformNotSupportedException(); }
12511409

12521410
/// ZipHigh : Interleave elements from high halves of two inputs
12531411

0 commit comments

Comments
 (0)