Skip to content

Commit 7031052

Browse files
authored
Arm64/Sve: Implement divide/multiply/subtract Math APIs (#101578)
* Add Divide/Multiply/Subtract * Add hwintrinsic support for Divide,Multiply and Subtract * remove space * Add test cases * trim space * handle the scenario for 2 operands predicated-only instruction * Remove *DIVR and *SUBR from comments * fix jit format * fix merge conflict
1 parent 73a428d commit 7031052

File tree

6 files changed

+660
-5
lines changed

6 files changed

+660
-5
lines changed

src/coreclr/jit/hwintrinsiccodegenarm64.cpp

+23-4
Original file line numberDiff line numberDiff line change
@@ -511,11 +511,30 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
511511

512512
if (falseReg != embMaskOp1Reg)
513513
{
514-
// None of targetReg, embMaskOp1Reg and falseReg are same. In such case, use the
515-
// "unpredicated" version of the instruction and then use "sel" to select the active lanes.
514+
// At the point, targetReg != embMaskOp1Reg != falseReg
515+
if (HWIntrinsicInfo::IsOptionalEmbeddedMaskedOperation(intrinEmbMask.id))
516+
{
517+
// If the embedded instruction supports optional mask operation, use the "unpredicated"
518+
// version of the instruction, followed by "sel" to select the active lanes.
519+
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, embMaskOp1Reg,
520+
embMaskOp2Reg, opt, INS_SCALABLE_OPTS_UNPREDICATED);
521+
}
522+
else
523+
{
524+
// If the instruction just has "predicated" version, then move the "embMaskOp1Reg"
525+
// into targetReg. Next, do the predicated operation on the targetReg and last,
526+
// use "sel" to select the active lanes based on mask, and set inactive lanes
527+
// to falseReg.
528+
529+
assert(targetReg != embMaskOp2Reg);
530+
assert(HWIntrinsicInfo::IsEmbeddedMaskedOperation(intrinEmbMask.id));
531+
532+
GetEmitter()->emitIns_R_R(INS_sve_movprfx, EA_SCALABLE, targetReg, embMaskOp1Reg);
533+
534+
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg,
535+
opt);
536+
}
516537

517-
GetEmitter()->emitIns_R_R_R(insEmbMask, emitSize, targetReg, embMaskOp1Reg, embMaskOp2Reg,
518-
opt, INS_SCALABLE_OPTS_UNPREDICATED);
519538
GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, targetReg,
520539
falseReg, opt, INS_SCALABLE_OPTS_UNPREDICATED);
521540
break;

src/coreclr/jit/hwintrinsiclistarm64sve.h

+3
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@ HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask16Bit,
4242
HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask32Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
4343
HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask64Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
4444
HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask8Bit, -1, 2, false, {INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask)
45+
HARDWARE_INTRINSIC(Sve, Divide, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_sdiv, INS_sve_udiv, INS_sve_sdiv, INS_sve_udiv, INS_sve_fdiv, INS_sve_fdiv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation)
4546
HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, true, {INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
4647
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt16, -1, 2, false, {INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
4748
HARDWARE_INTRINSIC(Sve, LoadVectorByteZeroExtendToInt32, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1b, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
@@ -67,6 +68,8 @@ HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt32,
6768
HARDWARE_INTRINSIC(Sve, LoadVectorUInt16ZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1h, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
6869
HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
6970
HARDWARE_INTRINSIC(Sve, LoadVectorUInt32ZeroExtendToUInt64, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ld1w, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_ExplicitMaskedOperation|HW_Flag_LowMaskedOperation)
71+
HARDWARE_INTRINSIC(Sve, Multiply, -1, 2, true, {INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics)
72+
HARDWARE_INTRINSIC(Sve, Subtract, -1, 2, true, {INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_sub, INS_sve_fsub, INS_sve_fsub}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics)
7073

7174
HARDWARE_INTRINSIC(Sve, UnzipEven, -1, 2, true, {INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1, INS_sve_uzp1}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)
7275
HARDWARE_INTRINSIC(Sve, UnzipOdd, -1, 2, true, {INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2, INS_sve_uzp2}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen)

0 commit comments

Comments
 (0)