From e1a5ac91397faca2cb2c29aaedbb2933a8c398eb Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Mon, 15 Jul 2024 12:37:32 -0400 Subject: [PATCH 01/10] Add AddRotateComplex --- src/coreclr/jit/codegenarm64test.cpp | 8 +- src/coreclr/jit/emitarm64sve.cpp | 4 +- src/coreclr/jit/hwintrinsicarm64.cpp | 5 + src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 206 ++++++++++-------- src/coreclr/jit/hwintrinsiclistarm64sve.h | 1 + src/coreclr/jit/lowerarmarch.cpp | 1 + .../Arm/Sve.PlatformNotSupported.cs | 20 ++ .../src/System/Runtime/Intrinsics/Arm/Sve.cs | 20 ++ .../ref/System.Runtime.Intrinsics.cs | 3 + .../GenerateHWIntrinsicTests_Arm.cs | 10 +- .../HardwareIntrinsics/Arm/Shared/Helpers.cs | 38 ++++ .../_SveImmBinaryOpTestTemplate.template | 39 +++- 12 files changed, 246 insertions(+), 109 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 0406bc4a6e19a..588c968846c9f 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -8391,13 +8391,13 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_OPTS_SCALABLE_D); // ST1B {.D }, , [, .D] // IF_SVE_GP_3A - theEmitter->emitIns_R_R_R_I(INS_sve_fcadd, EA_SCALABLE, REG_V0, REG_P1, REG_V2, 90, + theEmitter->emitIns_R_R_R_I(INS_sve_fcadd, EA_SCALABLE, REG_V0, REG_P1, REG_V2, 0, INS_OPTS_SCALABLE_H); // FCADD ., /M, ., ., - theEmitter->emitIns_R_R_R_I(INS_sve_fcadd, EA_SCALABLE, REG_V0, REG_P1, REG_V2, 270, + theEmitter->emitIns_R_R_R_I(INS_sve_fcadd, EA_SCALABLE, REG_V0, REG_P1, REG_V2, 1, INS_OPTS_SCALABLE_H); // FCADD ., /M, ., ., - theEmitter->emitIns_R_R_R_I(INS_sve_fcadd, EA_SCALABLE, REG_V0, REG_P1, REG_V2, 270, + theEmitter->emitIns_R_R_R_I(INS_sve_fcadd, EA_SCALABLE, REG_V0, REG_P1, REG_V2, 1, INS_OPTS_SCALABLE_S); // FCADD ., /M, ., ., - theEmitter->emitIns_R_R_R_I(INS_sve_fcadd, EA_SCALABLE, REG_V0, REG_P1, REG_V2, 270, + theEmitter->emitIns_R_R_R_I(INS_sve_fcadd, EA_SCALABLE, REG_V0, REG_P1, REG_V2, 1, INS_OPTS_SCALABLE_D); // FCADD ., /M, ., ., // IF_SVE_GT_4A diff --git a/src/coreclr/jit/emitarm64sve.cpp b/src/coreclr/jit/emitarm64sve.cpp index 589cc1b29bae6..22cb131827e48 100644 --- a/src/coreclr/jit/emitarm64sve.cpp +++ b/src/coreclr/jit/emitarm64sve.cpp @@ -4410,7 +4410,6 @@ void emitter::emitInsSve_R_R_R(instruction ins, /***************************************************************************** * * Add a SVE instruction referencing three registers and a constant. - * Do not call this directly. Use 'emitIns_R_R_R_I' instead. */ void emitter::emitInsSve_R_R_R_I(instruction ins, @@ -5577,7 +5576,7 @@ void emitter::emitInsSve_R_R_R_I(instruction ins, assert(isLowPredicateRegister(reg2)); assert(isVectorRegister(reg3)); assert(isScalableVectorSize(size)); - imm = emitEncodeRotationImm90_or_270(imm); + assert(emitIsValidEncodedRotationImm90_or_270(imm)); fmt = IF_SVE_GP_3A; break; @@ -5860,7 +5859,6 @@ void emitter::emitInsSve_R_R_R_I_I(instruction ins, /***************************************************************************** * * Add a SVE instruction referencing four registers. - * Do not call this directly. Use 'emitIns_R_R_R_R' instead. */ void emitter::emitInsSve_R_R_R_R(instruction ins, diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index a3fed94ee71a8..d1e33b3bc00ab 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -504,6 +504,11 @@ void HWIntrinsicInfo::lookupImmBounds( immUpperBound = (int)SVE_PRFOP_CONST15; break; + case NI_Sve_AddRotateComplex: + immLowerBound = 0; + immUpperBound = 1; + break; + default: unreached(); } diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index bb9e340d03d37..5bd8685cd28ad 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -706,102 +706,105 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case 3: { assert(instrIsRMW); - assert(HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id)); - assert(falseReg != embMaskOp3Reg); - - // For FMA, the operation we are trying to perform is: - // result = op1 + (op2 * op3) - // - // There are two instructions that can be used depending on which operand's register, - // optionally, will store the final result. - // - // 1. If the result is stored in the operand that was used as an "addend" in the operation, - // then we use `FMLA` format: - // reg1 = reg1 + (reg2 * reg3) - // - // 2. If the result is stored in the operand that was used as a "multiplicand" in the operation, - // then we use `FMAD` format: - // reg1 = (reg1 * reg2) + reg3 - // - // Check if the result's register is same as that of one of the operand's register and accordingly - // pick the appropriate format. Suppose `targetReg` holds the result, then we have following cases: - // - // Case# 1: Result is stored in the operand that held the "addend" - // targetReg == reg1 - // - // We generate the FMLA instruction format and no further changes are needed. - // - // Case# 2: Result is stored in the operand `op2` that held the "multiplicand" - // targetReg == reg2 - // - // So we basically have an operation: - // reg2 = reg1 + (reg2 * reg3) - // - // Since, the result will be stored in the "multiplicand", we pick format `FMAD`. - // Then, we rearrange the operands to ensure that the operation is done correctly. - // reg2 = reg1 + (reg2 * reg3) // to start with - // reg2 = reg3 + (reg2 * reg1) // swap reg1 <--> reg3 - // reg1 = reg3 + (reg1 * reg2) // swap reg1 <--> reg2 - // reg1 = (reg1 * reg2) + reg3 // rearrange to get FMAD format - // - // Case# 3: Result is stored in the operand `op3` that held the "multiplier" - // targetReg == reg3 - // - // So we basically have an operation: - // reg3 = reg1 + (reg2 * reg3) - // Since, the result will be stored in the "multiplier", we again pick format `FMAD`. - // Then, we rearrange the operands to ensure that the operation is done correctly. - // reg3 = reg1 + (reg2 * reg3) // to start with - // reg1 = reg3 + (reg2 * reg1) // swap reg1 <--> reg3 - // reg1 = (reg1 * reg2) + reg3 // rearrange to get FMAD format - - bool useAddend = true; - if (targetReg == embMaskOp2Reg) - { - // Case# 2 - useAddend = false; - std::swap(embMaskOp1Reg, embMaskOp3Reg); - std::swap(embMaskOp1Reg, embMaskOp2Reg); - } - else if (targetReg == embMaskOp3Reg) - { - // Case# 3 - useAddend = false; - std::swap(embMaskOp1Reg, embMaskOp3Reg); - } - else - { - // Case# 1 - } - switch (intrinEmbMask.id) + if (HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id)) { - case NI_Sve_FusedMultiplyAdd: - insEmbMask = useAddend ? INS_sve_fmla : INS_sve_fmad; - break; + assert(falseReg != embMaskOp3Reg); + // For FMA, the operation we are trying to perform is: + // result = op1 + (op2 * op3) + // + // There are two instructions that can be used depending on which operand's register, + // optionally, will store the final result. + // + // 1. If the result is stored in the operand that was used as an "addend" in the operation, + // then we use `FMLA` format: + // reg1 = reg1 + (reg2 * reg3) + // + // 2. If the result is stored in the operand that was used as a "multiplicand" in the operation, + // then we use `FMAD` format: + // reg1 = (reg1 * reg2) + reg3 + // + // Check if the result's register is same as that of one of the operand's register and + // accordingly pick the appropriate format. Suppose `targetReg` holds the result, then we have + // following cases: + // + // Case# 1: Result is stored in the operand that held the "addend" + // targetReg == reg1 + // + // We generate the FMLA instruction format and no further changes are needed. + // + // Case# 2: Result is stored in the operand `op2` that held the "multiplicand" + // targetReg == reg2 + // + // So we basically have an operation: + // reg2 = reg1 + (reg2 * reg3) + // + // Since, the result will be stored in the "multiplicand", we pick format `FMAD`. + // Then, we rearrange the operands to ensure that the operation is done correctly. + // reg2 = reg1 + (reg2 * reg3) // to start with + // reg2 = reg3 + (reg2 * reg1) // swap reg1 <--> reg3 + // reg1 = reg3 + (reg1 * reg2) // swap reg1 <--> reg2 + // reg1 = (reg1 * reg2) + reg3 // rearrange to get FMAD format + // + // Case# 3: Result is stored in the operand `op3` that held the "multiplier" + // targetReg == reg3 + // + // So we basically have an operation: + // reg3 = reg1 + (reg2 * reg3) + // Since, the result will be stored in the "multiplier", we again pick format `FMAD`. + // Then, we rearrange the operands to ensure that the operation is done correctly. + // reg3 = reg1 + (reg2 * reg3) // to start with + // reg1 = reg3 + (reg2 * reg1) // swap reg1 <--> reg3 + // reg1 = (reg1 * reg2) + reg3 // rearrange to get FMAD format + + bool useAddend = true; + if (targetReg == embMaskOp2Reg) + { + // Case# 2 + useAddend = false; + std::swap(embMaskOp1Reg, embMaskOp3Reg); + std::swap(embMaskOp1Reg, embMaskOp2Reg); + } + else if (targetReg == embMaskOp3Reg) + { + // Case# 3 + useAddend = false; + std::swap(embMaskOp1Reg, embMaskOp3Reg); + } + else + { + // Case# 1 + } - case NI_Sve_FusedMultiplyAddNegated: - insEmbMask = useAddend ? INS_sve_fnmla : INS_sve_fnmad; - break; + switch (intrinEmbMask.id) + { + case NI_Sve_FusedMultiplyAdd: + insEmbMask = useAddend ? INS_sve_fmla : INS_sve_fmad; + break; - case NI_Sve_FusedMultiplySubtract: - insEmbMask = useAddend ? INS_sve_fmls : INS_sve_fmsb; - break; + case NI_Sve_FusedMultiplyAddNegated: + insEmbMask = useAddend ? INS_sve_fnmla : INS_sve_fnmad; + break; - case NI_Sve_FusedMultiplySubtractNegated: - insEmbMask = useAddend ? INS_sve_fnmls : INS_sve_fnmsb; - break; + case NI_Sve_FusedMultiplySubtract: + insEmbMask = useAddend ? INS_sve_fmls : INS_sve_fmsb; + break; - case NI_Sve_MultiplyAdd: - insEmbMask = useAddend ? INS_sve_mla : INS_sve_mad; - break; + case NI_Sve_FusedMultiplySubtractNegated: + insEmbMask = useAddend ? INS_sve_fnmls : INS_sve_fnmsb; + break; - case NI_Sve_MultiplySubtract: - insEmbMask = useAddend ? INS_sve_mls : INS_sve_msb; - break; + case NI_Sve_MultiplyAdd: + insEmbMask = useAddend ? INS_sve_mla : INS_sve_mad; + break; - default: - unreached(); + case NI_Sve_MultiplySubtract: + insEmbMask = useAddend ? INS_sve_mls : INS_sve_msb; + break; + + default: + unreached(); + } } if (intrin.op3->IsVectorZero()) @@ -811,7 +814,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) assert(targetReg != embMaskOp2Reg); assert(intrin.op3->isContained() || !intrin.op1->IsMaskAllBitsSet()); - GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, embMaskOp1Reg, opt); + GetEmitter()->emitInsSve_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, embMaskOp1Reg, + opt); } else { @@ -851,8 +855,8 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // into the targetReg using `sel`. Since this is RMW, the active lanes should // have the value from embMaskOp1Reg - GetEmitter()->emitIns_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, embMaskOp1Reg, - falseReg, opt); + GetEmitter()->emitInsSve_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, + embMaskOp1Reg, falseReg, opt); } } else if (targetReg != embMaskOp1Reg) @@ -860,14 +864,26 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) // If target already contains the values of `falseReg`, just merge the lanes from // `embMaskOp1Reg`, again because this is RMW semantics. - GetEmitter()->emitIns_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, embMaskOp1Reg, - opt, INS_SCALABLE_OPTS_PREDICATE_MERGE); + GetEmitter()->emitInsSve_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, embMaskOp1Reg, + opt, INS_SCALABLE_OPTS_PREDICATE_MERGE); } } // Finally, perform the desired operation. - GetEmitter()->emitIns_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, - embMaskOp3Reg, opt); + if (HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id)) + { + HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op3, op2->AsHWIntrinsic()); + for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) + { + GetEmitter()->emitInsSve_R_R_R_I(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, + helper.ImmValue(), opt); + } + } + else + { + GetEmitter()->emitInsSve_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, + embMaskOp3Reg, opt); + } break; } diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index ab78d2f0f1af6..59cc14902d5d1 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -24,6 +24,7 @@ HARDWARE_INTRINSIC(Sve, AbsoluteCompareLessThanOrEqual, HARDWARE_INTRINSIC(Sve, AbsoluteDifference, -1, -1, false, {INS_sve_sabd, INS_sve_uabd, INS_sve_sabd, INS_sve_uabd, INS_sve_sabd, INS_sve_uabd, INS_sve_sabd, INS_sve_uabd, INS_sve_fabd, INS_sve_fabd}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, Add, -1, -1, false, {INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_add, INS_sve_fadd, INS_sve_fadd}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, AddAcross, -1, 1, true, {INS_sve_saddv, INS_sve_uaddv, INS_sve_saddv, INS_sve_uaddv, INS_sve_saddv, INS_sve_uaddv, INS_sve_uaddv, INS_sve_uaddv, INS_sve_faddv, INS_sve_faddv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) +HARDWARE_INTRINSIC(Sve, AddRotateComplex, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcadd, INS_sve_fcadd}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand) HARDWARE_INTRINSIC(Sve, AddSaturate, -1, 2, true, {INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_sve_sqadd, INS_sve_uqadd, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, AddSequentialAcross, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fadda, INS_sve_fadda}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics) HARDWARE_INTRINSIC(Sve, And, -1, -1, false, {INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_sve_and, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index b04e97844f1f2..2ec3137cb573d 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -3434,6 +3434,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_Sve_PrefetchInt32: case NI_Sve_PrefetchInt64: case NI_Sve_ExtractVector: + case NI_Sve_AddRotateComplex: assert(hasImmediateOperand); assert(varTypeIsIntegral(intrin.op3)); if (intrin.op3->IsCnsIntOrI()) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs index bfb5314bb97a7..19be11cd6c79e 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs @@ -347,6 +347,26 @@ internal Arm64() { } /// public static unsafe Vector AddAcross(Vector value) { throw new PlatformNotSupportedException(); } + /// Complex add with rotate + + /// + /// svfloat64_t svcadd[_f64]_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, uint64_t imm_rotation) + /// FCADD Ztied1.D, Pg/M, Ztied1.D, Zop2.D, #imm_rotation + /// svfloat64_t svcadd[_f64]_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, uint64_t imm_rotation) + /// FCADD Ztied1.D, Pg/M, Ztied1.D, Zop2.D, #imm_rotation + /// svfloat64_t svcadd[_f64]_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, uint64_t imm_rotation) + /// + public static unsafe Vector AddRotateComplex(Vector left, Vector right, [ConstantExpected(Min = 0, Max = (byte)(1))] byte rotation) { throw new PlatformNotSupportedException(); } + + /// + /// svfloat32_t svcadd[_f32]_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, uint64_t imm_rotation) + /// FCADD Ztied1.S, Pg/M, Ztied1.S, Zop2.S, #imm_rotation + /// svfloat32_t svcadd[_f32]_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, uint64_t imm_rotation) + /// FCADD Ztied1.S, Pg/M, Ztied1.S, Zop2.S, #imm_rotation + /// svfloat32_t svcadd[_f32]_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, uint64_t imm_rotation) + /// + public static unsafe Vector AddRotateComplex(Vector left, Vector right, [ConstantExpected(Min = 0, Max = (byte)(1))] byte rotation) { throw new PlatformNotSupportedException(); } + /// AddSaturate : Saturating add /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs index a0b8086b67992..2b678fbdeb41c 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs @@ -377,6 +377,26 @@ internal Arm64() { } /// public static unsafe Vector AddAcross(Vector value) => AddAcross(value); + /// Complex add with rotate + + /// + /// svfloat64_t svcadd[_f64]_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, uint64_t imm_rotation) + /// FCADD Ztied1.D, Pg/M, Ztied1.D, Zop2.D, #imm_rotation + /// svfloat64_t svcadd[_f64]_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, uint64_t imm_rotation) + /// FCADD Ztied1.D, Pg/M, Ztied1.D, Zop2.D, #imm_rotation + /// svfloat64_t svcadd[_f64]_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, uint64_t imm_rotation) + /// + public static unsafe Vector AddRotateComplex(Vector left, Vector right, [ConstantExpected(Min = 0, Max = (byte)(1))] byte rotation) => AddRotateComplex(left, right, rotation); + + /// + /// svfloat32_t svcadd[_f32]_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, uint64_t imm_rotation) + /// FCADD Ztied1.S, Pg/M, Ztied1.S, Zop2.S, #imm_rotation + /// svfloat32_t svcadd[_f32]_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, uint64_t imm_rotation) + /// FCADD Ztied1.S, Pg/M, Ztied1.S, Zop2.S, #imm_rotation + /// svfloat32_t svcadd[_f32]_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, uint64_t imm_rotation) + /// + public static unsafe Vector AddRotateComplex(Vector left, Vector right, [ConstantExpected(Min = 0, Max = (byte)(1))] byte rotation) => AddRotateComplex(left, right, rotation); + /// AddSaturate : Saturating add /// diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 1afa8c2c7f750..3bd94b13edb07 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -4364,6 +4364,9 @@ internal Arm64() { } public static System.Numerics.Vector AddAcross(System.Numerics.Vector value) { throw null; } public static System.Numerics.Vector AddAcross(System.Numerics.Vector value) { throw null; } + public static System.Numerics.Vector AddRotateComplex(System.Numerics.Vector left, System.Numerics.Vector right, [ConstantExpected(Min = 0, Max = (byte)(1))] byte rotation) { throw null; } + public static System.Numerics.Vector AddRotateComplex(System.Numerics.Vector left, System.Numerics.Vector right, [ConstantExpected(Min = 0, Max = (byte)(1))] byte rotation) { throw null; } + public static System.Numerics.Vector AddSaturate(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector AddSaturate(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector AddSaturate(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs index 6e548bd1926db..3076b270cad15 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs @@ -280,6 +280,7 @@ ("_SveBinaryOpDifferentTypesTestTemplate.template", "SveVecBinOpDifferentTypesTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleVecOpTest_ValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleVecOpTest_ValidationLogicForCndSel_FalseValue }), ("_SveBinaryMaskOpTestTemplate.template", "SveMaskVecBinOpConvertTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleVecOpTest_ValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleVecOpTest_ValidationLogicForCndSel_FalseValue }), ("_SveImmBinaryOpTestTemplate.template", "SveVecImmBinOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleVecOpTest_ValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleVecOpTest_ValidationLogicForCndSel_FalseValue }), + ("_SveImmBinaryOpTestTemplate.template", "SveVecImmBinOpVecTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_VectorValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleVecOpTest_VectorValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleVecOpTest_VectorValidationLogicForCndSel_FalseValue }), ("_SveImmUnaryOpTestTemplate.template", "SveVecImmUnOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleVecOpTest_ValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleVecOpTest_ValidationLogicForCndSel_FalseValue }), ("_SveTernOpTestTemplate.template", "SveVecTernOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleTernVecOpTest_ValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleTernVecOpTest_ValidationLogicForCndSel_FalseValue }), ("_SveTernOpTestTemplate.template", "SveVecTernOpVecTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_VectorValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleVecOpTest_VectorValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleVecOpTest_VectorValidationLogicForCndSel_FalseValue }), @@ -3113,6 +3114,11 @@ ("SveVecReduceUnOpTest.template", new Dictionary { ["TestName"] = "Sve_AddAcross_ulong_uint", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "AddAcross", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["ValidateReduceOpResult"] = "Helpers.AddAcrossWidening(firstOp) != result[0]", ["ValidateRemainingResults"] = "result[i] != 0"}), ("SveVecReduceUnOpTest.template", new Dictionary { ["TestName"] = "Sve_AddAcross_ulong", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "AddAcross", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["ValidateReduceOpResult"] = "Helpers.AddAcross(firstOp) != result[0]", ["ValidateRemainingResults"] = "result[i] != 0"}), + ("SveVecImmBinOpVecTest.template", new Dictionary { ["TestName"] = "Sve_AddRotateComplex_float_0", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "AddRotateComplex", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "0", ["InvalidImm"] = "2", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.AddRotateComplex(firstOp, secondOp, Imm))", ["GetVectorResult"] = "Helpers.AddRotateComplex(first, second, Imm)"}), + ("SveVecImmBinOpVecTest.template", new Dictionary { ["TestName"] = "Sve_AddRotateComplex_float_1", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "AddRotateComplex", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "1", ["InvalidImm"] = "2", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.AddRotateComplex(firstOp, secondOp, Imm))", ["GetVectorResult"] = "Helpers.AddRotateComplex(first, second, Imm)"}), + ("SveVecImmBinOpVecTest.template", new Dictionary { ["TestName"] = "Sve_AddRotateComplex_double_0", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "AddRotateComplex", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()", ["Imm"] = "0", ["InvalidImm"] = "2", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.AddRotateComplex(firstOp, secondOp, Imm))", ["GetVectorResult"] = "Helpers.AddRotateComplex(first, second, Imm)"}), + ("SveVecImmBinOpVecTest.template", new Dictionary { ["TestName"] = "Sve_AddRotateComplex_double_1", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "AddRotateComplex", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()", ["Imm"] = "1", ["InvalidImm"] = "2", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.AddRotateComplex(firstOp, secondOp, Imm))", ["GetVectorResult"] = "Helpers.AddRotateComplex(first, second, Imm)"}), + ("SveVecBinOpTest.template", new Dictionary { ["TestName"] = "Sve_AddSaturate_sbyte", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "AddSaturate", ["RetVectorType"] = "Vector", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "SByte", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "(sbyte)TestLibrary.Generator.GetSByte()", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["ValidateIterResult"] = "Helpers.AddSaturate(left[i], right[i]) != result[i]", ["GetIterResult"] = "Helpers.AddSaturate(left[i], right[i])"}), ("SveVecBinOpTest.template", new Dictionary { ["TestName"] = "Sve_AddSaturate_short", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "AddSaturate", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int16", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int16", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int16", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "(short)TestLibrary.Generator.GetInt16()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()", ["ValidateIterResult"] = "Helpers.AddSaturate(left[i], right[i]) != result[i]", ["GetIterResult"] = "Helpers.AddSaturate(left[i], right[i])"}), ("SveVecBinOpTest.template", new Dictionary { ["TestName"] = "Sve_AddSaturate_int", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "AddSaturate", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Int32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Int32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "Helpers.AddSaturate(left[i], right[i]) != result[i]", ["GetIterResult"] = "Helpers.AddSaturate(left[i], right[i])"}), @@ -3850,8 +3856,8 @@ ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_uint", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt32", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = ""}), ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_ulong", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt64", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = ""}), - ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_MultiplyBySelectedScalar_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "1", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.Multiply(firstOp[i], secondOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])",["GetIterResult"] = "Helpers.Multiply(firstOp[i], secondOp[Imm])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), - ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_MultiplyBySelectedScalar_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "0", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.Multiply(firstOp[i], secondOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])",["GetIterResult"] = "Helpers.Multiply(firstOp[i], secondOp[Imm])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), + ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_MultiplyBySelectedScalar_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "1", ["InvalidImm"] = "4", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.Multiply(firstOp[i], secondOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])",["GetIterResult"] = "Helpers.Multiply(firstOp[i], secondOp[Imm])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), + ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_MultiplyBySelectedScalar_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()", ["Imm"] = "0", ["InvalidImm"] = "2", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.Multiply(firstOp[i], secondOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])",["GetIterResult"] = "Helpers.Multiply(firstOp[i], secondOp[Imm])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), ("SveVecBinOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyExtended_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyExtended", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "Helpers.MultiplyExtended(left[i], right[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyExtended(left[i], right[i])"}), ("SveVecBinOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyExtended_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyExtended", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "Helpers.MultiplyExtended(left[i], right[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyExtended(left[i], right[i])"}), diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs index 95619d36dfe3b..0efc53fc9e7b2 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs @@ -6030,6 +6030,25 @@ private static ulong Pairwise(Func pairOp, ulong[] op1, ulo public static float AddPairwise(float[] op1, float[] op2, int i) => Pairwise(Add, op1, op2, i); + public static float[] AddRotateComplex(float[] op1, float[] op2, byte rot) + { + for (int i = 0; i < op1.Length; i += 2) + { + if (rot == 0) + { + op1[i] -= op2[i + 1]; + op1[i + 1] += op2[i]; + } + else + { + op1[i] += op2[i + 1]; + op1[i + 1] -= op2[i]; + } + } + + return op1; + } + public static float Max(float op1, float op2) => Math.Max(op1, op2); public static float MaxPairwise(float[] op1, int i) => Pairwise(Max, op1, i); @@ -6080,6 +6099,25 @@ private static float Pairwise(Func pairOp, float[] op1, flo public static double AddPairwise(double[] op1, double[] op2, int i) => Pairwise(Add, op1, op2, i); + public static double[] AddRotateComplex(double[] op1, double[] op2, byte rot) + { + for (int i = 0; i < op1.Length; i += 2) + { + if (rot == 0) + { + op1[i] -= op2[i + 1]; + op1[i + 1] += op2[i]; + } + else + { + op1[i] += op2[i + 1]; + op1[i + 1] -= op2[i]; + } + } + + return op1; + } + public static double Max(double op1, double op2) => Math.Max(op1, op2); public static double MaxPairwise(double[] op1, int i) => Pairwise(Max, op1, i); diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmBinaryOpTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmBinaryOpTestTemplate.template index 33d9c59e8dde1..27e0221bc83eb 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmBinaryOpTestTemplate.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmBinaryOpTestTemplate.template @@ -9,6 +9,7 @@ ******************************************************************************/ using System; +using System.Linq; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -62,6 +63,9 @@ namespace JIT.HardwareIntrinsics.Arm // Validates executing the test inside conditional, with op3 as zero test.ConditionalSelect_ZeroOp(); + + // Validates basic functionality fails with an invalid imm, using Unsafe.ReadUnaligned + test.RunBasicScenario_UnsafeRead_InvalidImm(); } else { @@ -142,7 +146,7 @@ namespace JIT.HardwareIntrinsics.Arm for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); - for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp2}; } Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); return testStruct; @@ -170,7 +174,7 @@ namespace JIT.HardwareIntrinsics.Arm private {Op1VectorType}<{Op1BaseType}> _mask; private {Op1VectorType}<{Op1BaseType}> _fld1; private {Op1VectorType}<{Op1BaseType}> _fld2; - private {Op1VectorType}<{Op1BaseType}> _falseFld; + private {Op1VectorType}<{Op1BaseType}> _falseFld; private DataTable _dataTable; @@ -178,16 +182,16 @@ namespace JIT.HardwareIntrinsics.Arm { Succeeded = true; - for (var i = 0; i < Op1ElementCount; i++) { _maskData[i] = ({Op1BaseType})({NextValueOp1} % 2); } + for (var i = 0; i < Op1ElementCount; i++) { _maskData[i] = ({Op1BaseType})({NextValueMask} % 2); } Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _mask), ref Unsafe.As<{Op1BaseType}, byte>(ref _maskData[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); - for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp2}; } Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _falseFld), ref Unsafe.As<{Op1BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } - for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp1}; } + for (var i = 0; i < Op1ElementCount; i++) { _data2[i] = {NextValueOp2}; } _dataTable = new DataTable(_data1, _data2, new {RetBaseType}[RetElementCount], LargestVectorSize); } @@ -209,6 +213,31 @@ namespace JIT.HardwareIntrinsics.Arm ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr); } + public void RunBasicScenario_UnsafeRead_InvalidImm() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead_InvalidImm)); + + bool succeeded = false; + try + { + var result = {Isa}.{Method}( + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), + {InvalidImm} + ); + Console.WriteLine(result); + } + catch (ArgumentOutOfRangeException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + public void RunBasicScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); From d6a0afe01dad956327fa9b68054e5144a7d9ca63 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Mon, 15 Jul 2024 17:08:59 -0400 Subject: [PATCH 02/10] Add MultiplyAddRotateComplex; fix template --- src/coreclr/jit/codegenarm64test.cpp | 6 +-- src/coreclr/jit/emitarm64sve.cpp | 6 +-- src/coreclr/jit/hwintrinsicarm64.cpp | 5 +++ src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 41 +++++++++++++++++-- src/coreclr/jit/hwintrinsiclistarm64sve.h | 1 + src/coreclr/jit/lowerarmarch.cpp | 1 + src/coreclr/jit/lsraarm64.cpp | 16 +++++++- .../Arm/Sve.PlatformNotSupported.cs | 20 +++++++++ .../src/System/Runtime/Intrinsics/Arm/Sve.cs | 20 +++++++++ .../ref/System.Runtime.Intrinsics.cs | 3 ++ .../GenerateHWIntrinsicTests_Arm.cs | 18 ++++++-- .../HardwareIntrinsics/Arm/Shared/Helpers.cs | 40 ++++++++++++++++++ .../Shared/_SveImmTernOpTestTemplate.template | 32 ++++++++++++++- 13 files changed, 192 insertions(+), 17 deletions(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 588c968846c9f..2bc3da3b9d7f8 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -8403,11 +8403,11 @@ void CodeGen::genArm64EmitterUnitTestsSve() // IF_SVE_GT_4A theEmitter->emitIns_R_R_R_R_I(INS_sve_fcmla, EA_SCALABLE, REG_V2, REG_P1, REG_V3, REG_V4, 0, INS_OPTS_SCALABLE_H); // FCMLA ., /M, ., ., - theEmitter->emitIns_R_R_R_R_I(INS_sve_fcmla, EA_SCALABLE, REG_V0, REG_P2, REG_V1, REG_V5, 90, + theEmitter->emitIns_R_R_R_R_I(INS_sve_fcmla, EA_SCALABLE, REG_V0, REG_P2, REG_V1, REG_V5, 1, INS_OPTS_SCALABLE_S); // FCMLA ., /M, ., ., - theEmitter->emitIns_R_R_R_R_I(INS_sve_fcmla, EA_SCALABLE, REG_V2, REG_P3, REG_V0, REG_V6, 180, + theEmitter->emitIns_R_R_R_R_I(INS_sve_fcmla, EA_SCALABLE, REG_V2, REG_P3, REG_V0, REG_V6, 2, INS_OPTS_SCALABLE_D); // FCMLA ., /M, ., ., - theEmitter->emitIns_R_R_R_R_I(INS_sve_fcmla, EA_SCALABLE, REG_V2, REG_P3, REG_V0, REG_V6, 270, + theEmitter->emitIns_R_R_R_R_I(INS_sve_fcmla, EA_SCALABLE, REG_V2, REG_P3, REG_V0, REG_V6, 3, INS_OPTS_SCALABLE_D); // FCMLA ., /M, ., ., // IF_SVE_GI_4A diff --git a/src/coreclr/jit/emitarm64sve.cpp b/src/coreclr/jit/emitarm64sve.cpp index 22cb131827e48..6fb6723eb6381 100644 --- a/src/coreclr/jit/emitarm64sve.cpp +++ b/src/coreclr/jit/emitarm64sve.cpp @@ -6989,7 +6989,7 @@ void emitter::emitInsSve_R_R_R_R_I(instruction ins, assert(isVectorRegister(reg3)); assert(isVectorRegister(reg4)); assert(isScalableVectorSize(size)); - imm = emitEncodeRotationImm0_to_270(imm); + assert(emitIsValidEncodedRotationImm0_to_270(imm)); fmt = IF_SVE_GT_4A; break; @@ -9796,7 +9796,7 @@ void emitter::emitIns_PRFOP_R_R_I(instruction ins, /*static*/ bool emitter::emitIsValidEncodedRotationImm90_or_270(ssize_t imm) { - return (imm == 0) || (imm == 1); + return isValidUimm<1>(imm); } /************************************************************************ @@ -9865,7 +9865,7 @@ void emitter::emitIns_PRFOP_R_R_I(instruction ins, /*static*/ bool emitter::emitIsValidEncodedRotationImm0_to_270(ssize_t imm) { - return (imm >= 0) && (imm <= 3); + return isValidUimm<2>(imm); } /***************************************************************************** diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index d1e33b3bc00ab..2d494b65fd945 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -509,6 +509,11 @@ void HWIntrinsicInfo::lookupImmBounds( immUpperBound = 1; break; + case NI_Sve_MultiplyAddRotateComplex: + immLowerBound = 0; + immUpperBound = 3; + break; + default: unreached(); } diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 5bd8685cd28ad..595d95520d183 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -484,6 +484,12 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) switch (intrinEmbMask.numOperands) { + case 4: + assert(intrinEmbMask.op4 != nullptr); + assert(HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id)); + assert(HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id)); + FALLTHROUGH; + case 3: assert(intrinEmbMask.op3 != nullptr); embMaskOp3Reg = intrinEmbMask.op3->GetRegNum(); @@ -704,12 +710,15 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } case 3: + assert(!HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id) || (falseReg != embMaskOp3Reg)); + FALLTHROUGH; + + case 4: { assert(instrIsRMW); if (HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id)) { - assert(falseReg != embMaskOp3Reg); // For FMA, the operation we are trying to perform is: // result = op1 + (op2 * op3) // @@ -802,6 +811,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) insEmbMask = useAddend ? INS_sve_mls : INS_sve_msb; break; + case NI_Sve_MultiplyAddRotateComplex: + assert(useAddend); + break; + default: unreached(); } @@ -870,7 +883,28 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } // Finally, perform the desired operation. - if (HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id)) + const bool embHasImmediateOperand = HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id); + + if (HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id)) + { + if (embHasImmediateOperand) + { + assert(intrinEmbMask.id == NI_Sve_MultiplyAddRotateComplex); + HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op4, op2->AsHWIntrinsic()); + for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) + { + GetEmitter()->emitInsSve_R_R_R_R_I(insEmbMask, emitSize, targetReg, maskReg, + embMaskOp2Reg, embMaskOp3Reg, helper.ImmValue(), + opt); + } + } + else + { + GetEmitter()->emitInsSve_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, + embMaskOp3Reg, opt); + } + } + else if (embHasImmediateOperand) { HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op3, op2->AsHWIntrinsic()); for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) @@ -881,8 +915,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } else { - GetEmitter()->emitInsSve_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, - embMaskOp3Reg, opt); + unreached(); } break; diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 59cc14902d5d1..506fb10e7a7be 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -187,6 +187,7 @@ HARDWARE_INTRINSIC(Sve, MinNumber, HARDWARE_INTRINSIC(Sve, MinNumberAcross, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fminnmv, INS_sve_fminnmv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, Multiply, -1, 2, true, {INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, MultiplyAdd, -1, -1, false, {INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(Sve, MultiplyAddRotateComplex, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcmla, INS_sve_fcmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_FmaIntrinsic) HARDWARE_INTRINSIC(Sve, MultiplyBySelectedScalar, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) HARDWARE_INTRINSIC(Sve, MultiplyExtended, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmulx, INS_sve_fmulx}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, MultiplySubtract, -1, -1, false, {INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) diff --git a/src/coreclr/jit/lowerarmarch.cpp b/src/coreclr/jit/lowerarmarch.cpp index 2ec3137cb573d..c28aaf3064d0b 100644 --- a/src/coreclr/jit/lowerarmarch.cpp +++ b/src/coreclr/jit/lowerarmarch.cpp @@ -3638,6 +3638,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node) case NI_Sve_FusedMultiplyAddBySelectedScalar: case NI_Sve_FusedMultiplySubtractBySelectedScalar: + case NI_Sve_MultiplyAddRotateComplex: assert(hasImmediateOperand); assert(varTypeIsIntegral(intrin.op4)); if (intrin.op4->IsCnsIntOrI()) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index d1fb48fc16d93..7a0ee39cc99f4 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1875,8 +1875,9 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (HWIntrinsicInfo::IsFmaIntrinsic(intrinEmb.id)) { + const bool embHasImmediateOperand = HWIntrinsicInfo::HasImmediateOperand(intrinEmb.id); assert(embOp2Node->isRMWHWIntrinsic(compiler)); - assert(numArgs == 3); + assert((numArgs == 3) || (embHasImmediateOperand && (numArgs == 4))); LIR::Use use; GenTree* user = nullptr; @@ -1915,6 +1916,17 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou srcCount += 1; srcCount += BuildDelayFreeUses(emitOp2, emitOp1); srcCount += BuildDelayFreeUses(emitOp3, emitOp1); + + if (embHasImmediateOperand) + { + assert(numArgs == 4); + srcCount += BuildDelayFreeUses(intrinEmb.op4, emitOp1); + if (!embOp2Node->Op(4)->isContainedIntOrIImmed()) + { + buildInternalIntRegisterDefForNode(embOp2Node); + } + } + srcCount += BuildDelayFreeUses(intrin.op3, emitOp1); } else @@ -1926,7 +1938,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou // that encodes the immediate if (intrinEmb.id == NI_Sve_ShiftRightArithmeticForDivide) { - assert(embOp2Node->GetOperandCount() == 2); + assert(numArgs == 2); if (!embOp2Node->Op(2)->isContainedIntOrIImmed()) { buildInternalIntRegisterDefForNode(embOp2Node); diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs index 19be11cd6c79e..2d5ee333213c4 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs @@ -5737,6 +5737,26 @@ internal Arm64() { } /// public static unsafe Vector MultiplyAdd(Vector addend, Vector left, Vector right) { throw new PlatformNotSupportedException(); } + /// Complex multiply-add with rotate + + /// + /// svfloat64_t svcmla[_f64]_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3, uint64_t imm_rotation) + /// FCMLA Ztied1.D, Pg/M, Zop2.D, Zop3.D, #imm_rotation + /// svfloat64_t svcmla[_f64]_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3, uint64_t imm_rotation) + /// FCMLA Ztied1.D, Pg/M, Zop2.D, Zop3.D, #imm_rotation + /// svfloat64_t svcmla[_f64]_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3, uint64_t imm_rotation) + /// + public static unsafe Vector MultiplyAddRotateComplex(Vector addend, Vector left, Vector right, [ConstantExpected(Min = 0, Max = (byte)(3))] byte rotation) { throw new PlatformNotSupportedException(); } + + /// + /// svfloat32_t svcmla[_f32]_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_rotation) + /// FCMLA Ztied1.S, Pg/M, Zop2.S, Zop3.S, #imm_rotation + /// svfloat32_t svcmla[_f32]_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_rotation) + /// FCMLA Ztied1.S, Pg/M, Zop2.S, Zop3.S, #imm_rotation + /// svfloat32_t svcmla[_f32]_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_rotation) + /// + public static unsafe Vector MultiplyAddRotateComplex(Vector addend, Vector left, Vector right, [ConstantExpected(Min = 0, Max = (byte)(3))] byte rotation) { throw new PlatformNotSupportedException(); } + /// MultiplyBySelectedScalar : Multiply /// diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs index 2b678fbdeb41c..21b62f267546b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs @@ -5793,6 +5793,26 @@ internal Arm64() { } /// public static unsafe Vector MultiplyAdd(Vector addend, Vector left, Vector right) => MultiplyAdd(addend, left, right); + /// Complex multiply-add with rotate + + /// + /// svfloat64_t svcmla[_f64]_m(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3, uint64_t imm_rotation) + /// FCMLA Ztied1.D, Pg/M, Zop2.D, Zop3.D, #imm_rotation + /// svfloat64_t svcmla[_f64]_x(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3, uint64_t imm_rotation) + /// FCMLA Ztied1.D, Pg/M, Zop2.D, Zop3.D, #imm_rotation + /// svfloat64_t svcmla[_f64]_z(svbool_t pg, svfloat64_t op1, svfloat64_t op2, svfloat64_t op3, uint64_t imm_rotation) + /// + public static unsafe Vector MultiplyAddRotateComplex(Vector addend, Vector left, Vector right, [ConstantExpected(Min = 0, Max = (byte)(3))] byte rotation) => MultiplyAddRotateComplex(addend, left, right, rotation); + + /// + /// svfloat32_t svcmla[_f32]_m(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_rotation) + /// FCMLA Ztied1.S, Pg/M, Zop2.S, Zop3.S, #imm_rotation + /// svfloat32_t svcmla[_f32]_x(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_rotation) + /// FCMLA Ztied1.S, Pg/M, Zop2.S, Zop3.S, #imm_rotation + /// svfloat32_t svcmla[_f32]_z(svbool_t pg, svfloat32_t op1, svfloat32_t op2, svfloat32_t op3, uint64_t imm_rotation) + /// + public static unsafe Vector MultiplyAddRotateComplex(Vector addend, Vector left, Vector right, [ConstantExpected(Min = 0, Max = (byte)(3))] byte rotation) => MultiplyAddRotateComplex(addend, left, right, rotation); + /// MultiplyBySelectedScalar : Multiply /// diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 3bd94b13edb07..7225bf11491a6 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -5181,6 +5181,9 @@ internal Arm64() { } public static System.Numerics.Vector MultiplyAdd(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector MultiplyAdd(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector MultiplyAddRotateComplex(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right, [ConstantExpected(Min = 0, Max = (byte)(3))] byte rotation) { throw null; } + public static System.Numerics.Vector MultiplyAddRotateComplex(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right, [ConstantExpected(Min = 0, Max = (byte)(3))] byte rotation) { throw null; } + public static System.Numerics.Vector MultiplyBySelectedScalar(System.Numerics.Vector left, System.Numerics.Vector right, [ConstantExpected] byte rightIndex) { throw null; } public static System.Numerics.Vector MultiplyBySelectedScalar(System.Numerics.Vector left, System.Numerics.Vector right, [ConstantExpected] byte rightIndex) { throw null; } diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs index 3076b270cad15..bd2b2f2a5be2a 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs @@ -286,6 +286,7 @@ ("_SveTernOpTestTemplate.template", "SveVecTernOpVecTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_VectorValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleVecOpTest_VectorValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleVecOpTest_VectorValidationLogicForCndSel_FalseValue }), ("_SveTernOpFirstArgTestTemplate.template", "SveVecTernOpFirstArgTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleTernVecOpTest_ValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleTernVecOpTest_ValidationLogicForCndSel_FalseValue }), ("_SveImmTernOpTestTemplate.template", "SveVecImmTernOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleTernVecOpTest_ValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleTernVecOpTest_ValidationLogicForCndSel_FalseValue }), + ("_SveImmTernOpTestTemplate.template", "SveVecImmTernOpVecTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_VectorValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleVecOpTest_VectorValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleVecOpTest_VectorValidationLogicForCndSel_FalseValue }), ("_SveTernOpMaskedOpTestTemplate.template", "SveVecTernOpMaskedTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleTernVecOpTest_ValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleTernVecOpTest_ValidationLogicForCndSel_FalseValue }), ("_SveImmTernOpFirstArgTestTemplate.template", "SveVecImmTernOpFirstArgTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleVecOpTest_ValidationLogic, ["TemplateValidationLogicForCndSel"] = SimpleTernVecOpTest_ValidationLogicForCndSel, ["TemplateValidationLogicForCndSel_FalseValue"] = SimpleTernVecOpTest_ValidationLogicForCndSel_FalseValue }), ("_SveScalarTernOpTestTemplate.template", "SveScalarTernOpTest.template", new Dictionary { ["TemplateName"] = "Simple", ["TemplateValidationLogic"] = SimpleScalarOpTest_ValidationLogic }), @@ -3370,8 +3371,8 @@ ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplyAdd_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2BaseType"] = "Single", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplyAdd_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2BaseType"] = "Double", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), - ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddBySelectedScalar_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "1", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), - ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddBySelectedScalar_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "0", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), + ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddBySelectedScalar_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "1", ["InvalidImm"] = "4", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), + ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddBySelectedScalar_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()", ["Imm"] = "0", ["InvalidImm"] = "2", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAdd(firstOp[i], secondOp[i], thirdOp[Imm])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddNegated_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2BaseType"] = "Single", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplyAddNegated(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAddNegated(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplyAddNegated_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplyAddNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2BaseType"] = "Double", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplyAddNegated(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplyAddNegated(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), @@ -3379,8 +3380,8 @@ ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplySubtract_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2BaseType"] = "Single", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_FusedMultiplySubtract_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtract", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2BaseType"] = "Double", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), - ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractBySelectedScalar_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "1", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), - ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractBySelectedScalar_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "0", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), + ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractBySelectedScalar_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "1", ["InvalidImm"] = "4", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), + ("SveVecImmTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractBySelectedScalar_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()", ["Imm"] = "0", ["InvalidImm"] = "2", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtract(firstOp[i], secondOp[i], thirdOp[Imm])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractNegated_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2BaseType"] = "Single", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.FusedMultiplySubtractNegated(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.SingleToInt32Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtractNegated(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), ("SveVecTernOpTest.template", new Dictionary {["TestName"] = "Sve_FusedMultiplySubtractNegated_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "FusedMultiplySubtractNegated", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2BaseType"] = "Double", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.FusedMultiplySubtractNegated(firstOp[i], secondOp[i], thirdOp[i])) != BitConverter.DoubleToInt64Bits(result[i])", ["GetIterResult"] = "Helpers.FusedMultiplySubtractNegated(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), @@ -3856,6 +3857,15 @@ ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_uint", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt32", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt32", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = ""}), ("SveVecTernOpTest.template", new Dictionary { ["TestName"] = "Sve_MultiplyAdd_ulong", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAdd", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "UInt64", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "UInt64", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp3"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i]) != result[i]", ["GetIterResult"] = "Helpers.MultiplyAdd(firstOp[i], secondOp[i], thirdOp[i])", ["ConvertFunc"] = ""}), + ("SveVecImmTernOpVecTest.template", new Dictionary {["TestName"] = "Sve_MultiplyAddRotateComplex_float_0", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAddRotateComplex", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single",["LargestVectorSize"] = "64",["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "0", ["InvalidImm"] = "4", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.MultiplyAddRotateComplex(firstOp, secondOp, thirdOp, Imm))", ["GetVectorResult"] = "Helpers.MultiplyAddRotateComplex(first, second, third, Imm)"}), + ("SveVecImmTernOpVecTest.template", new Dictionary {["TestName"] = "Sve_MultiplyAddRotateComplex_float_1", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAddRotateComplex", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single",["LargestVectorSize"] = "64",["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "1", ["InvalidImm"] = "4", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.MultiplyAddRotateComplex(firstOp, secondOp, thirdOp, Imm))", ["GetVectorResult"] = "Helpers.MultiplyAddRotateComplex(first, second, third, Imm)"}), + ("SveVecImmTernOpVecTest.template", new Dictionary {["TestName"] = "Sve_MultiplyAddRotateComplex_float_2", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAddRotateComplex", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single",["LargestVectorSize"] = "64",["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "2", ["InvalidImm"] = "4", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.MultiplyAddRotateComplex(firstOp, secondOp, thirdOp, Imm))", ["GetVectorResult"] = "Helpers.MultiplyAddRotateComplex(first, second, third, Imm)"}), + ("SveVecImmTernOpVecTest.template", new Dictionary {["TestName"] = "Sve_MultiplyAddRotateComplex_float_3", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAddRotateComplex", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single",["LargestVectorSize"] = "64",["NextValueOp1"] = "TestLibrary.Generator.GetSingle()",["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",["NextValueOp3"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "3", ["InvalidImm"] = "4", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.MultiplyAddRotateComplex(firstOp, secondOp, thirdOp, Imm))", ["GetVectorResult"] = "Helpers.MultiplyAddRotateComplex(first, second, third, Imm)"}), + ("SveVecImmTernOpVecTest.template", new Dictionary {["TestName"] = "Sve_MultiplyAddRotateComplex_double_0", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAddRotateComplex", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double",["LargestVectorSize"] = "64",["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()", ["Imm"] = "0", ["InvalidImm"] = "4", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.MultiplyAddRotateComplex(firstOp, secondOp, thirdOp, Imm))", ["GetVectorResult"] = "Helpers.MultiplyAddRotateComplex(first, second, third, Imm)"}), + ("SveVecImmTernOpVecTest.template", new Dictionary {["TestName"] = "Sve_MultiplyAddRotateComplex_double_1", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAddRotateComplex", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double",["LargestVectorSize"] = "64",["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()", ["Imm"] = "1", ["InvalidImm"] = "4", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.MultiplyAddRotateComplex(firstOp, secondOp, thirdOp, Imm))", ["GetVectorResult"] = "Helpers.MultiplyAddRotateComplex(first, second, third, Imm)"}), + ("SveVecImmTernOpVecTest.template", new Dictionary {["TestName"] = "Sve_MultiplyAddRotateComplex_double_2", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAddRotateComplex", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double",["LargestVectorSize"] = "64",["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()", ["Imm"] = "2", ["InvalidImm"] = "4", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.MultiplyAddRotateComplex(firstOp, secondOp, thirdOp, Imm))", ["GetVectorResult"] = "Helpers.MultiplyAddRotateComplex(first, second, third, Imm)"}), + ("SveVecImmTernOpVecTest.template", new Dictionary {["TestName"] = "Sve_MultiplyAddRotateComplex_double_3", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyAddRotateComplex", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double",["LargestVectorSize"] = "64",["NextValueOp1"] = "TestLibrary.Generator.GetDouble()",["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",["NextValueOp3"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()", ["Imm"] = "3", ["InvalidImm"] = "4", ["ValidateVectorResult"] = "!result.SequenceEqual(Helpers.MultiplyAddRotateComplex(firstOp, secondOp, thirdOp, Imm))", ["GetVectorResult"] = "Helpers.MultiplyAddRotateComplex(first, second, third, Imm)"}), + ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_MultiplyBySelectedScalar_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "1", ["InvalidImm"] = "4", ["ValidateIterResult"] = "BitConverter.SingleToInt32Bits(Helpers.Multiply(firstOp[i], secondOp[Imm])) != BitConverter.SingleToInt32Bits(result[i])",["GetIterResult"] = "Helpers.Multiply(firstOp[i], secondOp[Imm])", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_MultiplyBySelectedScalar_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "MultiplyBySelectedScalar", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["Op3VectorType"] = "Vector", ["Op3BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()", ["Imm"] = "0", ["InvalidImm"] = "2", ["ValidateIterResult"] = "BitConverter.DoubleToInt64Bits(Helpers.Multiply(firstOp[i], secondOp[Imm])) != BitConverter.DoubleToInt64Bits(result[i])",["GetIterResult"] = "Helpers.Multiply(firstOp[i], secondOp[Imm])", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs index 0efc53fc9e7b2..57b5891c265d9 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs @@ -5234,6 +5234,26 @@ private static (ulong val, bool ovf) ShiftOvf(ulong value, int shift) public static float MinNumberPairwise(float[] op1, float[] op2, int i) => Pairwise(MinNumber, op1, op2, i); + public static float[] MultiplyAddRotateComplex(float[] op1, float[] op2, float[] op3, byte imm) + { + for (int i = 0; i < op1.Length; i += 2) + { + (float ans1, float ans2) = imm switch + { + 0 => (FusedMultiplyAdd(op1[i], op2[i], op3[i]), FusedMultiplyAdd(op1[i + 1], op2[i], op3[i + 1])), + 1 => (FusedMultiplySubtract(op1[i], op2[i + 1], op3[i + 1]), FusedMultiplyAdd(op1[i + 1], op2[i + 1], op3[i])), + 2 => (FusedMultiplySubtract(op1[i], op2[i], op3[i]), FusedMultiplySubtract(op1[i + 1], op2[i], op3[i + 1])), + 3 => (FusedMultiplyAdd(op1[i], op2[i + 1], op3[i + 1]), FusedMultiplySubtract(op1[i + 1], op2[i + 1], op3[i])), + _ => (0.0f, 0.0f) + }; + + op1[i] = ans1; + op1[i + 1] = ans2; + } + + return op1; + } + public static float MultiplyExtended(float op1, float op2) { bool inf1 = float.IsInfinity(op1); @@ -5384,6 +5404,26 @@ public static float FPExponentialAccelerator(uint op1) public static double MinNumberPairwise(double[] op1, double[] op2, int i) => Pairwise(MinNumber, op1, op2, i); + public static double[] MultiplyAddRotateComplex(double[] op1, double[] op2, double[] op3, byte imm) + { + for (int i = 0; i < op1.Length; i += 2) + { + (double ans1, double ans2) = imm switch + { + 0 => (FusedMultiplyAdd(op1[i], op2[i], op3[i]), FusedMultiplyAdd(op1[i + 1], op2[i], op3[i + 1])), + 1 => (FusedMultiplySubtract(op1[i], op2[i + 1], op3[i + 1]), FusedMultiplyAdd(op1[i + 1], op2[i + 1], op3[i])), + 2 => (FusedMultiplySubtract(op1[i], op2[i], op3[i]), FusedMultiplySubtract(op1[i + 1], op2[i], op3[i + 1])), + 3 => (FusedMultiplyAdd(op1[i], op2[i + 1], op3[i + 1]), FusedMultiplySubtract(op1[i + 1], op2[i + 1], op3[i])), + _ => (0.0, 0.0) + }; + + op1[i] = ans1; + op1[i + 1] = ans2; + } + + return op1; + } + public static double MultiplyExtended(double op1, double op2) { bool inf1 = double.IsInfinity(op1); diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmTernOpTestTemplate.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmTernOpTestTemplate.template index c1bcca1eb32c6..9ce3a334f7d7a 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmTernOpTestTemplate.template +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/_SveImmTernOpTestTemplate.template @@ -9,6 +9,7 @@ ******************************************************************************/ using System; +using System.Linq; using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; @@ -65,6 +66,9 @@ namespace JIT.HardwareIntrinsics.Arm // Validates executing the test inside conditional, with op3 as zero test.ConditionalSelect_ZeroOp(); + + // Validates basic functionality fails with an invalid imm, using Unsafe.ReadUnaligned + test.RunBasicScenario_UnsafeRead_InvalidImm(); } else { @@ -194,7 +198,7 @@ namespace JIT.HardwareIntrinsics.Arm { Succeeded = true; - for (var i = 0; i < Op1ElementCount; i++) { _maskData[i] = ({Op1BaseType})({NextValueOp1} % 2); } + for (var i = 0; i < Op1ElementCount; i++) { _maskData[i] = ({Op1BaseType})({NextValueMask} % 2); } Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _mask), ref Unsafe.As<{Op1BaseType}, byte>(ref _maskData[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; } Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1VectorType}<{Op1BaseType}>, byte>(ref _fld1), ref Unsafe.As<{Op1BaseType}, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>()); @@ -229,6 +233,32 @@ namespace JIT.HardwareIntrinsics.Arm ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.inArray3Ptr, _dataTable.outArrayPtr); } + public void RunBasicScenario_UnsafeRead_InvalidImm() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead_InvalidImm)); + + bool succeeded = false; + try + { + var result = {Isa}.{Method}( + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray1Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray2Ptr), + Unsafe.Read<{Op1VectorType}<{Op1BaseType}>>(_dataTable.inArray3Ptr), + {InvalidImm} + ); + Console.WriteLine(result); + } + catch (ArgumentOutOfRangeException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + public void RunBasicScenario_Load() { TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load)); From aecfb4f4460e3765dc6c71605202ba5e02916302 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Mon, 15 Jul 2024 17:32:32 -0400 Subject: [PATCH 03/10] Fix unit tests --- src/coreclr/jit/codegenarm64test.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 2bc3da3b9d7f8..79a5f82754c8c 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -6096,6 +6096,7 @@ void CodeGen::genArm64EmitterUnitTestsSve() theEmitter->emitIns_R_PATTERN_I(INS_sve_uqincw, EA_SCALABLE, REG_V11, SVE_PATTERN_ALL, 16, INS_OPTS_SCALABLE_S); // UQINCW .S{, {, MUL #}} +#ifdef ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED // IF_SVE_BQ_2A theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V0, REG_V1, 0, INS_OPTS_SCALABLE_B, INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // EXT .B, {.B, .B }, # @@ -6105,6 +6106,7 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // EXT .B, {.B, .B }, # theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V6, REG_FP_LAST, 255, INS_OPTS_SCALABLE_B, INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // EXT .B, {.B, .B }, # +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED // IF_SVE_BQ_2B theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V0, REG_V1, 0, From f1841cad6bff54bb48fc180930c78483d2347211 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Mon, 15 Jul 2024 17:34:34 -0400 Subject: [PATCH 04/10] Update trig tests --- .../GenerateHWIntrinsicTests_Arm.cs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs index e443fa2713722..2abaf62dcd680 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs @@ -4155,14 +4155,14 @@ ("SveSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_Sqrt_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "Sqrt", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "result[i] != Helpers.Sqrt(firstOp[i])", ["GetIterResult"] = "Helpers.Sqrt(leftOp[i])"}), ("SveSimpleVecOpTest.template", new Dictionary { ["TestName"] = "Sve_Sqrt_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "Sqrt", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "result[i] != Helpers.Sqrt(firstOp[i])", ["GetIterResult"] = "Helpers.Sqrt(leftOp[i])"}), - ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_float_0", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "TrigonometricMultiplyAddCoefficient", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "0", ["InvalidImm"] = "8", ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}), - ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_float_2", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "TrigonometricMultiplyAddCoefficient", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "2", ["InvalidImm"] = "8", ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}), - ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_float_4", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "TrigonometricMultiplyAddCoefficient", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "4", ["InvalidImm"] = "8", ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}), - ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_float_6", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "TrigonometricMultiplyAddCoefficient", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["Imm"] = "6", ["InvalidImm"] = "8", ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}), - ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_double_1", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "TrigonometricMultiplyAddCoefficient", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "1", ["InvalidImm"] = "8", ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}), - ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_double_3", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "TrigonometricMultiplyAddCoefficient", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "3", ["InvalidImm"] = "8", ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}), - ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_double_5", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "TrigonometricMultiplyAddCoefficient", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "5", ["InvalidImm"] = "8", ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}), - ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_double_7", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "TrigonometricMultiplyAddCoefficient", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["Imm"] = "7", ["InvalidImm"] = "8", ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}), + ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_float_0", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "TrigonometricMultiplyAddCoefficient", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "0", ["InvalidImm"] = "8", ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}), + ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_float_2", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "TrigonometricMultiplyAddCoefficient", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "2", ["InvalidImm"] = "8", ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}), + ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_float_4", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "TrigonometricMultiplyAddCoefficient", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "4", ["InvalidImm"] = "8", ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}), + ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_float_6", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "TrigonometricMultiplyAddCoefficient", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["NextValueMask"] = "Helpers.getMaskSingle()", ["Imm"] = "6", ["InvalidImm"] = "8", ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}), + ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_double_1", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "TrigonometricMultiplyAddCoefficient", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()", ["Imm"] = "1", ["InvalidImm"] = "8", ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}), + ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_double_3", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "TrigonometricMultiplyAddCoefficient", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()", ["Imm"] = "3", ["InvalidImm"] = "8", ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}), + ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_double_5", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "TrigonometricMultiplyAddCoefficient", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()", ["Imm"] = "5", ["InvalidImm"] = "8", ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}), + ("SveVecImmBinOpTest.template", new Dictionary {["TestName"] = "Sve_TrigonometricMultiplyAddCoefficient_double_7", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "TrigonometricMultiplyAddCoefficient", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["NextValueMask"] = "Helpers.getMaskDouble()", ["Imm"] = "7", ["InvalidImm"] = "8", ["ValidateIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) && (Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) != result[i])", ["GetIterResult"] = "((firstOp[i] <= (Math.PI / 4)) && (firstOp[i] > (-Math.PI / 4))) ? Helpers.TrigonometricMultiplyAddCoefficient(firstOp[i], secondOp[i], Imm) : result[i]"}), ("SveVecTernOpMaskedTest.template", new Dictionary { ["TestName"] = "Sve_Splice_float", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "Splice", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "result[i] != Helpers.Splice(first, second, maskArray, i)", ["GetIterResult"] = "Helpers.Splice(left, right, mask, i)", ["ConvertFunc"] = "BitConverter.SingleToInt32Bits"}), ("SveVecTernOpMaskedTest.template", new Dictionary { ["TestName"] = "Sve_Splice_double", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "Splice", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "64", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "result[i] != Helpers.Splice(first, second, maskArray, i)", ["GetIterResult"] = "Helpers.Splice(left, right, mask, i)", ["ConvertFunc"] = "BitConverter.DoubleToInt64Bits"}), From e5ccd78c4cbb1324292f1cbb6762fd2d6475328e Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Mon, 15 Jul 2024 18:30:33 -0400 Subject: [PATCH 05/10] style --- src/coreclr/jit/codegenarm64test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/codegenarm64test.cpp b/src/coreclr/jit/codegenarm64test.cpp index 79a5f82754c8c..e136de5a07595 100644 --- a/src/coreclr/jit/codegenarm64test.cpp +++ b/src/coreclr/jit/codegenarm64test.cpp @@ -6106,7 +6106,7 @@ void CodeGen::genArm64EmitterUnitTestsSve() INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // EXT .B, {.B, .B }, # theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V6, REG_FP_LAST, 255, INS_OPTS_SCALABLE_B, INS_SCALABLE_OPTS_WITH_VECTOR_PAIR); // EXT .B, {.B, .B }, # -#endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED +#endif // ALL_ARM64_EMITTER_UNIT_TESTS_SVE_UNSUPPORTED // IF_SVE_BQ_2B theEmitter->emitIns_R_R_I(INS_sve_ext, EA_SCALABLE, REG_V0, REG_V1, 0, From 9aa87551f2d48b2b5dac18e108327d08be1144cf Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Mon, 15 Jul 2024 19:40:43 -0400 Subject: [PATCH 06/10] Remote FMA intrin flag --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 37 ++++++++------------- src/coreclr/jit/hwintrinsiclistarm64sve.h | 2 +- src/coreclr/jit/lsraarm64.cpp | 37 +++++++++++++++------ 3 files changed, 40 insertions(+), 36 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 026b9084f7f7d..5b0639223b751 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -486,7 +486,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) { case 4: assert(intrinEmbMask.op4 != nullptr); - assert(HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id)); assert(HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id)); FALLTHROUGH; @@ -709,16 +708,15 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } - case 3: - assert(!HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id) || (falseReg != embMaskOp3Reg)); - FALLTHROUGH; + case 3: case 4: { assert(instrIsRMW); if (HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id)) { + assert(falseReg != embMaskOp3Reg); // For FMA, the operation we are trying to perform is: // result = op1 + (op2 * op3) // @@ -811,10 +809,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) insEmbMask = useAddend ? INS_sve_mls : INS_sve_msb; break; - case NI_Sve_MultiplyAddRotateComplex: - assert(useAddend); - break; - default: unreached(); } @@ -883,11 +877,9 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } // Finally, perform the desired operation. - const bool embHasImmediateOperand = HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id); - - if (HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id)) + if (HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id)) { - if (embHasImmediateOperand) + if (intrinEmbMask.numOperands == 4) { assert(intrinEmbMask.id == NI_Sve_MultiplyAddRotateComplex); HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op4, op2->AsHWIntrinsic()); @@ -900,22 +892,19 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } else { - GetEmitter()->emitInsSve_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, - embMaskOp3Reg, opt); - } - } - else if (embHasImmediateOperand) - { - HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op3, op2->AsHWIntrinsic()); - for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) - { - GetEmitter()->emitInsSve_R_R_R_I(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, - helper.ImmValue(), opt); + HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op3, op2->AsHWIntrinsic()); + for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) + { + GetEmitter()->emitInsSve_R_R_R_I(insEmbMask, emitSize, targetReg, maskReg, + embMaskOp2Reg, helper.ImmValue(), opt); + } } } else { - unreached(); + assert(HWIntrinsicInfo::IsFmaIntrinsic(intrinEmbMask.id)); + GetEmitter()->emitInsSve_R_R_R_R(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, + embMaskOp3Reg, opt); } break; diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index dedda44081bcf..c7df23557516e 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -187,7 +187,7 @@ HARDWARE_INTRINSIC(Sve, MinNumber, HARDWARE_INTRINSIC(Sve, MinNumberAcross, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fminnmv, INS_sve_fminnmv}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, Multiply, -1, 2, true, {INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_mul, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_OptionalEmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, MultiplyAdd, -1, -1, false, {INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_sve_mla, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(Sve, MultiplyAddRotateComplex, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcmla, INS_sve_fcmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand|HW_Flag_FmaIntrinsic) +HARDWARE_INTRINSIC(Sve, MultiplyAddRotateComplex, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fcmla, INS_sve_fcmla}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_LowMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_HasImmediateOperand) HARDWARE_INTRINSIC(Sve, MultiplyBySelectedScalar, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmul, INS_sve_fmul}, HW_Category_SIMDByIndexedElement, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_LowVectorOperation) HARDWARE_INTRINSIC(Sve, MultiplyExtended, -1, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_fmulx, INS_sve_fmulx}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation) HARDWARE_INTRINSIC(Sve, MultiplySubtract, -1, -1, false, {INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_sve_mls, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_EmbeddedMaskedOperation|HW_Flag_HasRMWSemantics|HW_Flag_LowMaskedOperation|HW_Flag_FmaIntrinsic|HW_Flag_SpecialCodeGen) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index 0c0c339694a15..f542cf7357912 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1876,9 +1876,8 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou if (HWIntrinsicInfo::IsFmaIntrinsic(intrinEmb.id)) { - const bool embHasImmediateOperand = HWIntrinsicInfo::HasImmediateOperand(intrinEmb.id); assert(embOp2Node->isRMWHWIntrinsic(compiler)); - assert((numArgs == 3) || (embHasImmediateOperand && (numArgs == 4))); + assert(numArgs == 3); LIR::Use use; GenTree* user = nullptr; @@ -1918,7 +1917,6 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou srcCount += BuildDelayFreeUses(emitOp2, emitOp1); srcCount += BuildDelayFreeUses(emitOp3, emitOp1); - if (embHasImmediateOperand) { assert(numArgs == 4); srcCount += BuildDelayFreeUses(intrinEmb.op4, emitOp1); @@ -1932,18 +1930,35 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } else { - assert((numArgs == 1) || (numArgs == 2) || (numArgs == 3)); + const bool embHasImmediateOperand = HWIntrinsicInfo::HasImmediateOperand(intrinEmb.id); + assert((numArgs == 1) || (numArgs == 2) || (numArgs == 3) || (embHasImmediateOperand && (numArgs == 4))); - // Special handling for ShiftRightArithmeticForDivide: + // Special handling for embedded intrinsics with immediates: // We might need an additional register to hold branch targets into the switch table // that encodes the immediate - if (intrinEmb.id == NI_Sve_ShiftRightArithmeticForDivide) + switch (intrinEmb.id) { - assert(numArgs == 2); - if (!embOp2Node->Op(2)->isContainedIntOrIImmed()) - { - buildInternalIntRegisterDefForNode(embOp2Node); - } + case NI_Sve_ShiftRightArithmeticForDivide: + assert(embHasImmediateOperand); + assert(numArgs == 2); + if (!embOp2Node->Op(2)->isContainedIntOrIImmed()) + { + buildInternalIntRegisterDefForNode(embOp2Node); + } + break; + + case NI_Sve_MultiplyAddRotateComplex: + assert(embHasImmediateOperand); + assert(numArgs == 4); + if (!embOp2Node->Op(4)->isContainedIntOrIImmed()) + { + buildInternalIntRegisterDefForNode(embOp2Node); + } + break; + + default: + assert(!embHasImmediateOperand); + break; } tgtPrefUse = BuildUse(embOp2Node->Op(1)); From c8bce7af045ff2684589a1eb897787cde995ddf5 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Mon, 15 Jul 2024 20:07:28 -0400 Subject: [PATCH 07/10] Tweak helpers --- .../HardwareIntrinsics/Arm/Shared/Helpers.cs | 28 +++++++++++-------- 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs index 7bee3741f5cc6..6e6dd313561fa 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs @@ -5238,17 +5238,19 @@ public static float[] MultiplyAddRotateComplex(float[] op1, float[] op2, float[] { for (int i = 0; i < op1.Length; i += 2) { + int real = i; + int img = i + 1; (float ans1, float ans2) = imm switch { - 0 => (FusedMultiplyAdd(op1[i], op2[i], op3[i]), FusedMultiplyAdd(op1[i + 1], op2[i], op3[i + 1])), - 1 => (FusedMultiplySubtract(op1[i], op2[i + 1], op3[i + 1]), FusedMultiplyAdd(op1[i + 1], op2[i + 1], op3[i])), - 2 => (FusedMultiplySubtract(op1[i], op2[i], op3[i]), FusedMultiplySubtract(op1[i + 1], op2[i], op3[i + 1])), - 3 => (FusedMultiplyAdd(op1[i], op2[i + 1], op3[i + 1]), FusedMultiplySubtract(op1[i + 1], op2[i + 1], op3[i])), + 0 => (FusedMultiplyAdd(op1[real], op2[real], op3[real]), FusedMultiplyAdd(op1[img], op2[real], op3[img])), + 1 => (FusedMultiplySubtract(op1[real], op2[img], op3[img]), FusedMultiplyAdd(op1[img], op2[img], op3[i])), + 2 => (FusedMultiplySubtract(op1[real], op2[real], op3[real]), FusedMultiplySubtract(op1[img], op2[real], op3[img])), + 3 => (FusedMultiplyAdd(op1[real], op2[img], op3[img]), FusedMultiplySubtract(op1[img], op2[img], op3[real])), _ => (0.0f, 0.0f) }; - op1[i] = ans1; - op1[i + 1] = ans2; + op1[real] = ans1; + op1[img] = ans2; } return op1; @@ -5435,17 +5437,19 @@ public static double[] MultiplyAddRotateComplex(double[] op1, double[] op2, doub { for (int i = 0; i < op1.Length; i += 2) { + int real = i; + int img = i + 1; (double ans1, double ans2) = imm switch { - 0 => (FusedMultiplyAdd(op1[i], op2[i], op3[i]), FusedMultiplyAdd(op1[i + 1], op2[i], op3[i + 1])), - 1 => (FusedMultiplySubtract(op1[i], op2[i + 1], op3[i + 1]), FusedMultiplyAdd(op1[i + 1], op2[i + 1], op3[i])), - 2 => (FusedMultiplySubtract(op1[i], op2[i], op3[i]), FusedMultiplySubtract(op1[i + 1], op2[i], op3[i + 1])), - 3 => (FusedMultiplyAdd(op1[i], op2[i + 1], op3[i + 1]), FusedMultiplySubtract(op1[i + 1], op2[i + 1], op3[i])), + 0 => (FusedMultiplyAdd(op1[real], op2[real], op3[real]), FusedMultiplyAdd(op1[img], op2[real], op3[img])), + 1 => (FusedMultiplySubtract(op1[real], op2[img], op3[img]), FusedMultiplyAdd(op1[img], op2[img], op3[i])), + 2 => (FusedMultiplySubtract(op1[real], op2[real], op3[real]), FusedMultiplySubtract(op1[img], op2[real], op3[img])), + 3 => (FusedMultiplyAdd(op1[real], op2[img], op3[img]), FusedMultiplySubtract(op1[img], op2[img], op3[real])), _ => (0.0, 0.0) }; - op1[i] = ans1; - op1[i + 1] = ans2; + op1[real] = ans1; + op1[img] = ans2; } return op1; From 3443b5d0bac6a90258f2763cf27043981990e362 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Mon, 15 Jul 2024 20:13:53 -0400 Subject: [PATCH 08/10] Fix LinearScan::BuildHWIntrinsic --- src/coreclr/jit/lsraarm64.cpp | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/lsraarm64.cpp b/src/coreclr/jit/lsraarm64.cpp index f542cf7357912..5fbcc0b468f00 100644 --- a/src/coreclr/jit/lsraarm64.cpp +++ b/src/coreclr/jit/lsraarm64.cpp @@ -1916,16 +1916,6 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou srcCount += 1; srcCount += BuildDelayFreeUses(emitOp2, emitOp1); srcCount += BuildDelayFreeUses(emitOp3, emitOp1); - - { - assert(numArgs == 4); - srcCount += BuildDelayFreeUses(intrinEmb.op4, emitOp1); - if (!embOp2Node->Op(4)->isContainedIntOrIImmed()) - { - buildInternalIntRegisterDefForNode(embOp2Node); - } - } - srcCount += BuildDelayFreeUses(intrin.op3, emitOp1); } else @@ -1947,6 +1937,15 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou } break; + case NI_Sve_AddRotateComplex: + assert(embHasImmediateOperand); + assert(numArgs == 3); + if (!embOp2Node->Op(3)->isContainedIntOrIImmed()) + { + buildInternalIntRegisterDefForNode(embOp2Node); + } + break; + case NI_Sve_MultiplyAddRotateComplex: assert(embHasImmediateOperand); assert(numArgs == 4); From cafe4a5c1625dcb366474a95b57f532d12112615 Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Mon, 15 Jul 2024 21:05:25 -0400 Subject: [PATCH 09/10] Fix helpers --- .../HardwareIntrinsics/Arm/Shared/Helpers.cs | 22 ++++++++++++------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs index 6e6dd313561fa..54ddade391f93 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs @@ -6132,15 +6132,18 @@ public static float[] AddRotateComplex(float[] op1, float[] op2, byte rot) { for (int i = 0; i < op1.Length; i += 2) { + int real = i; + int img = i + 1; + if (rot == 0) { - op1[i] -= op2[i + 1]; - op1[i + 1] += op2[i]; + op1[real] -= op2[img]; + op1[img] += op2[real]; } else { - op1[i] += op2[i + 1]; - op1[i + 1] -= op2[i]; + op1[real] += op2[img]; + op1[img] -= op2[real]; } } @@ -6201,15 +6204,18 @@ public static double[] AddRotateComplex(double[] op1, double[] op2, byte rot) { for (int i = 0; i < op1.Length; i += 2) { + int real = i; + int img = i + 1; + if (rot == 0) { - op1[i] -= op2[i + 1]; - op1[i + 1] += op2[i]; + op1[real] -= op2[img]; + op1[img] += op2[real]; } else { - op1[i] += op2[i + 1]; - op1[i + 1] -= op2[i]; + op1[real] += op2[img]; + op1[img] -= op2[real]; } } From de2ae6d7c870f36884f15d81ea6910b004631c2e Mon Sep 17 00:00:00 2001 From: "Aman Khalid (from Dev Box)" Date: Mon, 15 Jul 2024 21:24:20 -0400 Subject: [PATCH 10/10] JIT feedback --- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 171 ++++++++++---------- 1 file changed, 90 insertions(+), 81 deletions(-) diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 5b0639223b751..17f929fef2bb7 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -480,13 +480,14 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) regNumber embMaskOp1Reg = REG_NA; regNumber embMaskOp2Reg = REG_NA; regNumber embMaskOp3Reg = REG_NA; + regNumber embMaskOp4Reg = REG_NA; regNumber falseReg = op3Reg; switch (intrinEmbMask.numOperands) { case 4: assert(intrinEmbMask.op4 != nullptr); - assert(HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id)); + embMaskOp4Reg = intrinEmbMask.op4->GetRegNum(); FALLTHROUGH; case 3: @@ -508,6 +509,70 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) unreached(); } + // Shared code for setting up embedded mask arg for intrinsics with 3+ operands + auto emitEmbeddedMaskSetup = [&] { + if (intrin.op3->IsVectorZero()) + { + // If `falseReg` is zero, then move the first operand of `intrinEmbMask` in the + // destination using /Z. + + assert(targetReg != embMaskOp2Reg); + assert(intrin.op3->isContained() || !intrin.op1->IsMaskAllBitsSet()); + GetEmitter()->emitInsSve_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, embMaskOp1Reg, opt); + } + else + { + // Below are the considerations we need to handle: + // + // targetReg == falseReg && targetReg == embMaskOp1Reg + // fmla Zd, P/m, Zn, Zm + // + // targetReg == falseReg && targetReg != embMaskOp1Reg + // movprfx target, P/m, embMaskOp1Reg + // fmla target, P/m, embMaskOp2Reg, embMaskOp3Reg + // + // targetReg != falseReg && targetReg == embMaskOp1Reg + // sel target, P/m, embMaskOp1Reg, falseReg + // fmla target, P/m, embMaskOp2Reg, embMaskOp3Reg + // + // targetReg != falseReg && targetReg != embMaskOp1Reg + // sel target, P/m, embMaskOp1Reg, falseReg + // fmla target, P/m, embMaskOp2Reg, embMaskOp3Reg + // + // Note that, we just check if the targetReg/falseReg or targetReg/embMaskOp1Reg + // coincides or not. + + if (targetReg != falseReg) + { + if (falseReg == embMaskOp1Reg) + { + // If falseReg value and embMaskOp1Reg value are same, then just mov the value + // to the target. + + GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, embMaskOp1Reg, + /* canSkip */ true); + } + else + { + // If falseReg value is not present in targetReg yet, move the inactive lanes + // into the targetReg using `sel`. Since this is RMW, the active lanes should + // have the value from embMaskOp1Reg + + GetEmitter()->emitInsSve_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, embMaskOp1Reg, + falseReg, opt); + } + } + else if (targetReg != embMaskOp1Reg) + { + // If target already contains the values of `falseReg`, just merge the lanes from + // `embMaskOp1Reg`, again because this is RMW semantics. + + GetEmitter()->emitInsSve_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, embMaskOp1Reg, + opt, INS_SCALABLE_OPTS_PREDICATE_MERGE); + } + } + }; + switch (intrinEmbMask.numOperands) { case 1: @@ -710,7 +775,6 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } case 3: - case 4: { assert(instrIsRMW); @@ -814,90 +878,16 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) } } - if (intrin.op3->IsVectorZero()) - { - // If `falseReg` is zero, then move the first operand of `intrinEmbMask` in the - // destination using /Z. - - assert(targetReg != embMaskOp2Reg); - assert(intrin.op3->isContained() || !intrin.op1->IsMaskAllBitsSet()); - GetEmitter()->emitInsSve_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, embMaskOp1Reg, - opt); - } - else - { - // Below are the considerations we need to handle: - // - // targetReg == falseReg && targetReg == embMaskOp1Reg - // fmla Zd, P/m, Zn, Zm - // - // targetReg == falseReg && targetReg != embMaskOp1Reg - // movprfx target, P/m, embMaskOp1Reg - // fmla target, P/m, embMaskOp2Reg, embMaskOp3Reg - // - // targetReg != falseReg && targetReg == embMaskOp1Reg - // sel target, P/m, embMaskOp1Reg, falseReg - // fmla target, P/m, embMaskOp2Reg, embMaskOp3Reg - // - // targetReg != falseReg && targetReg != embMaskOp1Reg - // sel target, P/m, embMaskOp1Reg, falseReg - // fmla target, P/m, embMaskOp2Reg, embMaskOp3Reg - // - // Note that, we just check if the targetReg/falseReg or targetReg/embMaskOp1Reg - // coincides or not. - - if (targetReg != falseReg) - { - if (falseReg == embMaskOp1Reg) - { - // If falseReg value and embMaskOp1Reg value are same, then just mov the value - // to the target. - - GetEmitter()->emitIns_Mov(INS_mov, emitTypeSize(node), targetReg, embMaskOp1Reg, - /* canSkip */ true); - } - else - { - // If falseReg value is not present in targetReg yet, move the inactive lanes - // into the targetReg using `sel`. Since this is RMW, the active lanes should - // have the value from embMaskOp1Reg - - GetEmitter()->emitInsSve_R_R_R_R(INS_sve_sel, emitSize, targetReg, maskReg, - embMaskOp1Reg, falseReg, opt); - } - } - else if (targetReg != embMaskOp1Reg) - { - // If target already contains the values of `falseReg`, just merge the lanes from - // `embMaskOp1Reg`, again because this is RMW semantics. - - GetEmitter()->emitInsSve_R_R_R(INS_sve_movprfx, emitSize, targetReg, maskReg, embMaskOp1Reg, - opt, INS_SCALABLE_OPTS_PREDICATE_MERGE); - } - } + emitEmbeddedMaskSetup(); // Finally, perform the desired operation. if (HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id)) { - if (intrinEmbMask.numOperands == 4) - { - assert(intrinEmbMask.id == NI_Sve_MultiplyAddRotateComplex); - HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op4, op2->AsHWIntrinsic()); - for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) - { - GetEmitter()->emitInsSve_R_R_R_R_I(insEmbMask, emitSize, targetReg, maskReg, - embMaskOp2Reg, embMaskOp3Reg, helper.ImmValue(), - opt); - } - } - else + HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op3, op2->AsHWIntrinsic()); + for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) { - HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op3, op2->AsHWIntrinsic()); - for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) - { - GetEmitter()->emitInsSve_R_R_R_I(insEmbMask, emitSize, targetReg, maskReg, - embMaskOp2Reg, helper.ImmValue(), opt); - } + GetEmitter()->emitInsSve_R_R_R_I(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, + helper.ImmValue(), opt); } } else @@ -909,6 +899,25 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; } + + case 4: + { + assert(instrIsRMW); + assert(intrinEmbMask.op4->isContained() == (embMaskOp4Reg == REG_NA)); + assert(HWIntrinsicInfo::HasImmediateOperand(intrinEmbMask.id)); + + emitEmbeddedMaskSetup(); + + HWIntrinsicImmOpHelper helper(this, intrinEmbMask.op4, op2->AsHWIntrinsic()); + for (helper.EmitBegin(); !helper.Done(); helper.EmitCaseEnd()) + { + GetEmitter()->emitInsSve_R_R_R_R_I(insEmbMask, emitSize, targetReg, maskReg, embMaskOp2Reg, + embMaskOp3Reg, helper.ImmValue(), opt); + } + + break; + } + default: unreached(); }