From 944467a3e4fbe31d280422e22f71b35b86e74f36 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Tue, 9 Apr 2024 16:08:41 +0100 Subject: [PATCH 1/6] JIT ARM64-SVE: Add CreateWhileLessThan* --- src/coreclr/jit/hwintrinsicarm64.cpp | 38 +++ src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 22 ++ src/coreclr/jit/hwintrinsiclistarm64sve.h | 8 + .../Arm/Sve.PlatformNotSupported.cs | 215 +++++++++++++++++ .../src/System/Runtime/Intrinsics/Arm/Sve.cs | 215 +++++++++++++++++ .../ref/System.Runtime.Intrinsics.cs | 32 +++ .../GenerateHWIntrinsicTests_Arm.cs | 33 +++ .../HardwareIntrinsics/Arm/Shared/Helpers.cs | 40 ++++ .../Arm/Shared/ScalarBinOpRetVecTest.template | 216 ++++++++++++++++++ 9 files changed, 819 insertions(+) create mode 100644 src/tests/JIT/HardwareIntrinsics/Arm/Shared/ScalarBinOpRetVecTest.template diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 8e3288f75d7090..c785f8d7c0488d 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2194,6 +2194,44 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } + case NI_Sve_CreateWhileLessThanMask8Bit: + case NI_Sve_CreateWhileLessThanMask16Bit: + case NI_Sve_CreateWhileLessThanMask32Bit: + case NI_Sve_CreateWhileLessThanMask64Bit: + case NI_Sve_CreateWhileLessThanOrEqualMask8Bit: + case NI_Sve_CreateWhileLessThanOrEqualMask16Bit: + case NI_Sve_CreateWhileLessThanOrEqualMask32Bit: + case NI_Sve_CreateWhileLessThanOrEqualMask64Bit: + { + // Target instruction is dependent on whether the inputs are signed or unsigned. + // This information is lost when the type is converted from CorInfoType to var_type. + // Ensure this is marked using GTF_UNSIGNED. + + CORINFO_ARG_LIST_HANDLE arg1 = sig->args; + CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); + var_types argType = TYP_UNKNOWN; + CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; + CorInfoType argCoreInfoType = strip(info.compCompHnd->getArgType(sig, arg2, &argClass)); + + assert(sig->numArgs == 2); + argType = JITtype2varType(argCoreInfoType); + op2 = getArgForHWIntrinsic(argType, argClass); + argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); + op1 = impPopStack().val; + + retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); + + if (argCoreInfoType == CORINFO_TYPE_ULONG || argCoreInfoType == CORINFO_TYPE_UINT) + { + retNode->gtFlags |= GTF_UNSIGNED; + } + else + { + assert(argCoreInfoType == CORINFO_TYPE_LONG || argCoreInfoType == CORINFO_TYPE_INT); + } + } + break; + default: { return nullptr; diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 9a3a98e087a274..9c8c80c46bec63 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -1300,6 +1300,28 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) GetEmitter()->emitIns_R_PATTERN(ins, emitSize, targetReg, opt, SVE_PATTERN_ALL); break; + case NI_Sve_CreateWhileLessThanMask8Bit: + case NI_Sve_CreateWhileLessThanMask16Bit: + case NI_Sve_CreateWhileLessThanMask32Bit: + case NI_Sve_CreateWhileLessThanMask64Bit: + // Emit size is the size of the scalar operands. + emitSize = emitActualTypeSize(intrin.op1->TypeGet()); + // Instruction is dependent on whether the inputs are signed or unsigned. + ins = ((node->gtFlags & GTF_UNSIGNED) != 0) ? INS_sve_whilelo : INS_sve_whilelt; + GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); + break; + + case NI_Sve_CreateWhileLessThanOrEqualMask8Bit: + case NI_Sve_CreateWhileLessThanOrEqualMask16Bit: + case NI_Sve_CreateWhileLessThanOrEqualMask32Bit: + case NI_Sve_CreateWhileLessThanOrEqualMask64Bit: + // Emit size is the size of the scalar operands. + emitSize = emitActualTypeSize(intrin.op1->TypeGet()); + // Instruction is dependent on whether the inputs are signed or unsigned. + ins = ((node->gtFlags & GTF_UNSIGNED) != 0) ? INS_sve_whilels : INS_sve_whilele; + GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); + break; + default: unreached(); } diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index ac110c2a0e1b5b..b2ba46b1432bf3 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -27,6 +27,14 @@ HARDWARE_INTRINSIC(Sve, CreateTrueMaskSingle, HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt16, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt32, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt64, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask16Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask32Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask64Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask8Bit, -1, 2, false, {INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask16Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask32Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask64Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask8Bit, -1, 2, false, {INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, true, {INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_LowMaskedOperation) diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs index fbd5ee65ca748f..b88c065b271c1b 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.PlatformNotSupported.cs @@ -121,6 +121,221 @@ internal Arm64() { } public static unsafe Vector CreateTrueMaskUInt64([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw new PlatformNotSupportedException(); } + /// CreateWhileLessThanMask16Bit : While incrementing scalar is less than + + /// + /// svbool_t svwhilelt_b16[_s32](int32_t op1, int32_t op2) + /// WHILELT Presult.H, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanMask16Bit(int left, int right) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svwhilelt_b16[_s64](int64_t op1, int64_t op2) + /// WHILELT Presult.H, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanMask16Bit(long left, long right) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svwhilelt_b16[_u32](uint32_t op1, uint32_t op2) + /// WHILELO Presult.H, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanMask16Bit(uint left, uint right) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svwhilelt_b16[_u64](uint64_t op1, uint64_t op2) + /// WHILELO Presult.H, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanMask16Bit(ulong left, ulong right) { throw new PlatformNotSupportedException(); } + + + /// CreateWhileLessThanMask32Bit : While incrementing scalar is less than + + /// + /// svbool_t svwhilelt_b32[_s32](int32_t op1, int32_t op2) + /// WHILELT Presult.S, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanMask32Bit(int left, int right) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svwhilelt_b32[_s64](int64_t op1, int64_t op2) + /// WHILELT Presult.S, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanMask32Bit(long left, long right) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svwhilelt_b32[_u32](uint32_t op1, uint32_t op2) + /// WHILELO Presult.S, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanMask32Bit(uint left, uint right) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svwhilelt_b32[_u64](uint64_t op1, uint64_t op2) + /// WHILELO Presult.S, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanMask32Bit(ulong left, ulong right) { throw new PlatformNotSupportedException(); } + + + /// CreateWhileLessThanMask64Bit : While incrementing scalar is less than + + /// + /// svbool_t svwhilelt_b64[_s32](int32_t op1, int32_t op2) + /// WHILELT Presult.D, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanMask64Bit(int left, int right) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svwhilelt_b64[_s64](int64_t op1, int64_t op2) + /// WHILELT Presult.D, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanMask64Bit(long left, long right) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svwhilelt_b64[_u32](uint32_t op1, uint32_t op2) + /// WHILELO Presult.D, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanMask64Bit(uint left, uint right) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svwhilelt_b64[_u64](uint64_t op1, uint64_t op2) + /// WHILELO Presult.D, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanMask64Bit(ulong left, ulong right) { throw new PlatformNotSupportedException(); } + + + /// CreateWhileLessThanMask8Bit : While incrementing scalar is less than + + /// + /// svbool_t svwhilelt_b8[_s32](int32_t op1, int32_t op2) + /// WHILELT Presult.B, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanMask8Bit(int left, int right) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svwhilelt_b8[_s64](int64_t op1, int64_t op2) + /// WHILELT Presult.B, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanMask8Bit(long left, long right) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svwhilelt_b8[_u32](uint32_t op1, uint32_t op2) + /// WHILELO Presult.B, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanMask8Bit(uint left, uint right) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svwhilelt_b8[_u64](uint64_t op1, uint64_t op2) + /// WHILELO Presult.B, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanMask8Bit(ulong left, ulong right) { throw new PlatformNotSupportedException(); } + + + /// CreateWhileLessThanOrEqualMask16Bit : While incrementing scalar is less than or equal to + + /// + /// svbool_t svwhilele_b16[_s32](int32_t op1, int32_t op2) + /// WHILELE Presult.H, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask16Bit(int left, int right) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svwhilele_b16[_s64](int64_t op1, int64_t op2) + /// WHILELE Presult.H, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask16Bit(long left, long right) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svwhilele_b16[_u32](uint32_t op1, uint32_t op2) + /// WHILELS Presult.H, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask16Bit(uint left, uint right) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svwhilele_b16[_u64](uint64_t op1, uint64_t op2) + /// WHILELS Presult.H, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask16Bit(ulong left, ulong right) { throw new PlatformNotSupportedException(); } + + + /// CreateWhileLessThanOrEqualMask32Bit : While incrementing scalar is less than or equal to + + /// + /// svbool_t svwhilele_b32[_s32](int32_t op1, int32_t op2) + /// WHILELE Presult.S, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask32Bit(int left, int right) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svwhilele_b32[_s64](int64_t op1, int64_t op2) + /// WHILELE Presult.S, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask32Bit(long left, long right) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svwhilele_b32[_u32](uint32_t op1, uint32_t op2) + /// WHILELS Presult.S, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask32Bit(uint left, uint right) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svwhilele_b32[_u64](uint64_t op1, uint64_t op2) + /// WHILELS Presult.S, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask32Bit(ulong left, ulong right) { throw new PlatformNotSupportedException(); } + + + /// CreateWhileLessThanOrEqualMask64Bit : While incrementing scalar is less than or equal to + + /// + /// svbool_t svwhilele_b64[_s32](int32_t op1, int32_t op2) + /// WHILELE Presult.D, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask64Bit(int left, int right) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svwhilele_b64[_s64](int64_t op1, int64_t op2) + /// WHILELE Presult.D, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask64Bit(long left, long right) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svwhilele_b64[_u32](uint32_t op1, uint32_t op2) + /// WHILELS Presult.D, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask64Bit(uint left, uint right) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svwhilele_b64[_u64](uint64_t op1, uint64_t op2) + /// WHILELS Presult.D, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask64Bit(ulong left, ulong right) { throw new PlatformNotSupportedException(); } + + + /// CreateWhileLessThanOrEqualMask8Bit : While incrementing scalar is less than or equal to + + /// + /// svbool_t svwhilele_b8[_s32](int32_t op1, int32_t op2) + /// WHILELE Presult.B, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask8Bit(int left, int right) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svwhilele_b8[_s64](int64_t op1, int64_t op2) + /// WHILELE Presult.B, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask8Bit(long left, long right) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svwhilele_b8[_u32](uint32_t op1, uint32_t op2) + /// WHILELS Presult.B, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask8Bit(uint left, uint right) { throw new PlatformNotSupportedException(); } + + /// + /// svbool_t svwhilele_b8[_u64](uint64_t op1, uint64_t op2) + /// WHILELS Presult.B, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask8Bit(ulong left, ulong right) { throw new PlatformNotSupportedException(); } + /// LoadVector : Unextended load diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs index 6ba2a2c67bc8a7..001a0734eff563 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Arm/Sve.cs @@ -119,6 +119,221 @@ internal Arm64() { } public static unsafe Vector CreateTrueMaskUInt64([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) => CreateTrueMaskUInt64(pattern); + /// CreateWhileLessThanMask16Bit : While incrementing scalar is less than + + /// + /// svbool_t svwhilelt_b16[_s32](int32_t op1, int32_t op2) + /// WHILELT Presult.H, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanMask16Bit(int left, int right) => CreateWhileLessThanMask16Bit(left, right); + + /// + /// svbool_t svwhilelt_b16[_s64](int64_t op1, int64_t op2) + /// WHILELT Presult.H, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanMask16Bit(long left, long right) => CreateWhileLessThanMask16Bit(left, right); + + /// + /// svbool_t svwhilelt_b16[_u32](uint32_t op1, uint32_t op2) + /// WHILELO Presult.H, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanMask16Bit(uint left, uint right) => CreateWhileLessThanMask16Bit(left, right); + + /// + /// svbool_t svwhilelt_b16[_u64](uint64_t op1, uint64_t op2) + /// WHILELO Presult.H, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanMask16Bit(ulong left, ulong right) => CreateWhileLessThanMask16Bit(left, right); + + + /// CreateWhileLessThanMask32Bit : While incrementing scalar is less than + + /// + /// svbool_t svwhilelt_b32[_s32](int32_t op1, int32_t op2) + /// WHILELT Presult.S, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanMask32Bit(int left, int right) => CreateWhileLessThanMask32Bit(left, right); + + /// + /// svbool_t svwhilelt_b32[_s64](int64_t op1, int64_t op2) + /// WHILELT Presult.S, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanMask32Bit(long left, long right) => CreateWhileLessThanMask32Bit(left, right); + + /// + /// svbool_t svwhilelt_b32[_u32](uint32_t op1, uint32_t op2) + /// WHILELO Presult.S, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanMask32Bit(uint left, uint right) => CreateWhileLessThanMask32Bit(left, right); + + /// + /// svbool_t svwhilelt_b32[_u64](uint64_t op1, uint64_t op2) + /// WHILELO Presult.S, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanMask32Bit(ulong left, ulong right) => CreateWhileLessThanMask32Bit(left, right); + + + /// CreateWhileLessThanMask64Bit : While incrementing scalar is less than + + /// + /// svbool_t svwhilelt_b64[_s32](int32_t op1, int32_t op2) + /// WHILELT Presult.D, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanMask64Bit(int left, int right) => CreateWhileLessThanMask64Bit(left, right); + + /// + /// svbool_t svwhilelt_b64[_s64](int64_t op1, int64_t op2) + /// WHILELT Presult.D, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanMask64Bit(long left, long right) => CreateWhileLessThanMask64Bit(left, right); + + /// + /// svbool_t svwhilelt_b64[_u32](uint32_t op1, uint32_t op2) + /// WHILELO Presult.D, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanMask64Bit(uint left, uint right) => CreateWhileLessThanMask64Bit(left, right); + + /// + /// svbool_t svwhilelt_b64[_u64](uint64_t op1, uint64_t op2) + /// WHILELO Presult.D, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanMask64Bit(ulong left, ulong right) => CreateWhileLessThanMask64Bit(left, right); + + + /// CreateWhileLessThanMask8Bit : While incrementing scalar is less than + + /// + /// svbool_t svwhilelt_b8[_s32](int32_t op1, int32_t op2) + /// WHILELT Presult.B, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanMask8Bit(int left, int right) => CreateWhileLessThanMask8Bit(left, right); + + /// + /// svbool_t svwhilelt_b8[_s64](int64_t op1, int64_t op2) + /// WHILELT Presult.B, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanMask8Bit(long left, long right) => CreateWhileLessThanMask8Bit(left, right); + + /// + /// svbool_t svwhilelt_b8[_u32](uint32_t op1, uint32_t op2) + /// WHILELO Presult.B, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanMask8Bit(uint left, uint right) => CreateWhileLessThanMask8Bit(left, right); + + /// + /// svbool_t svwhilelt_b8[_u64](uint64_t op1, uint64_t op2) + /// WHILELO Presult.B, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanMask8Bit(ulong left, ulong right) => CreateWhileLessThanMask8Bit(left, right); + + + /// CreateWhileLessThanOrEqualMask16Bit : While incrementing scalar is less than or equal to + + /// + /// svbool_t svwhilele_b16[_s32](int32_t op1, int32_t op2) + /// WHILELE Presult.H, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask16Bit(int left, int right) => CreateWhileLessThanOrEqualMask16Bit(left, right); + + /// + /// svbool_t svwhilele_b16[_s64](int64_t op1, int64_t op2) + /// WHILELE Presult.H, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask16Bit(long left, long right) => CreateWhileLessThanOrEqualMask16Bit(left, right); + + /// + /// svbool_t svwhilele_b16[_u32](uint32_t op1, uint32_t op2) + /// WHILELS Presult.H, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask16Bit(uint left, uint right) => CreateWhileLessThanOrEqualMask16Bit(left, right); + + /// + /// svbool_t svwhilele_b16[_u64](uint64_t op1, uint64_t op2) + /// WHILELS Presult.H, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask16Bit(ulong left, ulong right) => CreateWhileLessThanOrEqualMask16Bit(left, right); + + + /// CreateWhileLessThanOrEqualMask32Bit : While incrementing scalar is less than or equal to + + /// + /// svbool_t svwhilele_b32[_s32](int32_t op1, int32_t op2) + /// WHILELE Presult.S, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask32Bit(int left, int right) => CreateWhileLessThanOrEqualMask32Bit(left, right); + + /// + /// svbool_t svwhilele_b32[_s64](int64_t op1, int64_t op2) + /// WHILELE Presult.S, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask32Bit(long left, long right) => CreateWhileLessThanOrEqualMask32Bit(left, right); + + /// + /// svbool_t svwhilele_b32[_u32](uint32_t op1, uint32_t op2) + /// WHILELS Presult.S, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask32Bit(uint left, uint right) => CreateWhileLessThanOrEqualMask32Bit(left, right); + + /// + /// svbool_t svwhilele_b32[_u64](uint64_t op1, uint64_t op2) + /// WHILELS Presult.S, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask32Bit(ulong left, ulong right) => CreateWhileLessThanOrEqualMask32Bit(left, right); + + + /// CreateWhileLessThanOrEqualMask64Bit : While incrementing scalar is less than or equal to + + /// + /// svbool_t svwhilele_b64[_s32](int32_t op1, int32_t op2) + /// WHILELE Presult.D, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask64Bit(int left, int right) => CreateWhileLessThanOrEqualMask64Bit(left, right); + + /// + /// svbool_t svwhilele_b64[_s64](int64_t op1, int64_t op2) + /// WHILELE Presult.D, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask64Bit(long left, long right) => CreateWhileLessThanOrEqualMask64Bit(left, right); + + /// + /// svbool_t svwhilele_b64[_u32](uint32_t op1, uint32_t op2) + /// WHILELS Presult.D, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask64Bit(uint left, uint right) => CreateWhileLessThanOrEqualMask64Bit(left, right); + + /// + /// svbool_t svwhilele_b64[_u64](uint64_t op1, uint64_t op2) + /// WHILELS Presult.D, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask64Bit(ulong left, ulong right) => CreateWhileLessThanOrEqualMask64Bit(left, right); + + + /// CreateWhileLessThanOrEqualMask8Bit : While incrementing scalar is less than or equal to + + /// + /// svbool_t svwhilele_b8[_s32](int32_t op1, int32_t op2) + /// WHILELE Presult.B, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask8Bit(int left, int right) => CreateWhileLessThanOrEqualMask8Bit(left, right); + + /// + /// svbool_t svwhilele_b8[_s64](int64_t op1, int64_t op2) + /// WHILELE Presult.B, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask8Bit(long left, long right) => CreateWhileLessThanOrEqualMask8Bit(left, right); + + /// + /// svbool_t svwhilele_b8[_u32](uint32_t op1, uint32_t op2) + /// WHILELS Presult.B, Wop1, Wop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask8Bit(uint left, uint right) => CreateWhileLessThanOrEqualMask8Bit(left, right); + + /// + /// svbool_t svwhilele_b8[_u64](uint64_t op1, uint64_t op2) + /// WHILELS Presult.B, Xop1, Xop2 + /// + public static unsafe Vector CreateWhileLessThanOrEqualMask8Bit(ulong left, ulong right) => CreateWhileLessThanOrEqualMask8Bit(left, right); + /// LoadVector : Unextended load diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 77fe06ddc5c02c..80772ff570dabe 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -4149,6 +4149,38 @@ internal Arm64() { } public static System.Numerics.Vector CreateTrueMaskUInt16([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; } public static System.Numerics.Vector CreateTrueMaskUInt32([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; } public static System.Numerics.Vector CreateTrueMaskUInt64([ConstantExpected] SveMaskPattern pattern = SveMaskPattern.All) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanMask16Bit(int left, int right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanMask16Bit(long left, long right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanMask16Bit(uint left, uint right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanMask16Bit(ulong left, ulong right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanMask32Bit(int left, int right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanMask32Bit(long left, long right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanMask32Bit(uint left, uint right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanMask32Bit(ulong left, ulong right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanMask64Bit(int left, int right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanMask64Bit(long left, long right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanMask64Bit(uint left, uint right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanMask64Bit(ulong left, ulong right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanMask8Bit(int left, int right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanMask8Bit(long left, long right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanMask8Bit(uint left, uint right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanMask8Bit(ulong left, ulong right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanOrEqualMask16Bit(int left, int right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanOrEqualMask16Bit(long left, long right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanOrEqualMask16Bit(uint left, uint right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanOrEqualMask16Bit(ulong left, ulong right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanOrEqualMask32Bit(int left, int right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanOrEqualMask32Bit(long left, long right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanOrEqualMask32Bit(uint left, uint right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanOrEqualMask32Bit(ulong left, ulong right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanOrEqualMask64Bit(int left, int right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanOrEqualMask64Bit(long left, long right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanOrEqualMask64Bit(uint left, uint right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanOrEqualMask64Bit(ulong left, ulong right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanOrEqualMask8Bit(int left, int right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanOrEqualMask8Bit(long left, long right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanOrEqualMask8Bit(uint left, uint right) { throw null; } + public static System.Numerics.Vector CreateWhileLessThanOrEqualMask8Bit(ulong left, ulong right) { throw null; } public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, sbyte* address) { throw null; } public static unsafe System.Numerics.Vector LoadVector(System.Numerics.Vector mask, short* address) { throw null; } diff --git a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs index 5ee032e2842d61..8c2f3172edb9b2 100644 --- a/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs +++ b/src/tests/Common/GenerateHWIntrinsicTests/GenerateHWIntrinsicTests_Arm.cs @@ -2887,6 +2887,39 @@ (string templateFileName, Dictionary templateData)[] SveInputs = new [] { + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanMask16Bit_Int32", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanMask16Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1BaseType"] = "Int32", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "Helpers.WhileLessThanMask(left + (Int32)i, right) != (Int32)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanMask16Bit_Int64", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanMask16Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1BaseType"] = "Int64", ["Op2BaseType"] = "Int64", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["ValidateIterResult"] = "Helpers.WhileLessThanMask(left + (Int64)i, right) != (Int64)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanMask16Bit_UInt32", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanMask16Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1BaseType"] = "UInt32", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.WhileLessThanMask(left + (UInt32)i, right) != (UInt32)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanMask16Bit_UInt64", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanMask16Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1BaseType"] = "UInt64", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "Helpers.WhileLessThanMask(left + (UInt64)i, right) != (UInt64)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanMask32Bit_Int32", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanMask32Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1BaseType"] = "Int32", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "Helpers.WhileLessThanMask(left + (Int32)i, right) != (Int32)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanMask32Bit_Int64", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanMask32Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1BaseType"] = "Int64", ["Op2BaseType"] = "Int64", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["ValidateIterResult"] = "Helpers.WhileLessThanMask(left + (Int64)i, right) != (Int64)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanMask32Bit_UInt32", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanMask32Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1BaseType"] = "UInt32", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.WhileLessThanMask(left + (UInt32)i, right) != (UInt32)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanMask32Bit_UInt64", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanMask32Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1BaseType"] = "UInt64", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "Helpers.WhileLessThanMask(left + (UInt64)i, right) != (UInt64)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanMask64Bit_Int32", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanMask64Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1BaseType"] = "Int32", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "Helpers.WhileLessThanMask(left + (Int32)i, right) != (Int32)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanMask64Bit_Int64", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanMask64Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1BaseType"] = "Int64", ["Op2BaseType"] = "Int64", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["ValidateIterResult"] = "Helpers.WhileLessThanMask(left + (Int64)i, right) != (Int64)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanMask64Bit_UInt32", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanMask64Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1BaseType"] = "UInt32", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.WhileLessThanMask(left + (UInt32)i, right) != (UInt32)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanMask64Bit_UInt64", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanMask64Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1BaseType"] = "UInt64", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "Helpers.WhileLessThanMask(left + (UInt64)i, right) != (UInt64)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanMask8Bit_Int32", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanMask8Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1BaseType"] = "Int32", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "Helpers.WhileLessThanMask(left + (Int32)i, right) != (Int32)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanMask8Bit_Int64", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanMask8Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1BaseType"] = "Int64", ["Op2BaseType"] = "Int64", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["ValidateIterResult"] = "Helpers.WhileLessThanMask(left + (Int64)i, right) != (Int64)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanMask8Bit_UInt32", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanMask8Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1BaseType"] = "UInt32", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.WhileLessThanMask(left + (UInt32)i, right) != (UInt32)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanMask8Bit_UInt64", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanMask8Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1BaseType"] = "UInt64", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "Helpers.WhileLessThanMask(left + (UInt64)i, right) != (UInt64)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanOrEqualMask16Bit_Int32", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanOrEqualMask16Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1BaseType"] = "Int32", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "Helpers.WhileLessThanOrEqualMask(left + (Int32)i, right) != (Int32)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanOrEqualMask16Bit_Int64", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanOrEqualMask16Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1BaseType"] = "Int64", ["Op2BaseType"] = "Int64", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["ValidateIterResult"] = "Helpers.WhileLessThanOrEqualMask(left + (Int64)i, right) != (Int64)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanOrEqualMask16Bit_UInt32", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanOrEqualMask16Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1BaseType"] = "UInt32", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.WhileLessThanOrEqualMask(left + (UInt32)i, right) != (UInt32)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanOrEqualMask16Bit_UInt64", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanOrEqualMask16Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt16", ["Op1BaseType"] = "UInt64", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "Helpers.WhileLessThanOrEqualMask(left + (UInt64)i, right) != (UInt64)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanOrEqualMask32Bit_Int32", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanOrEqualMask32Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1BaseType"] = "Int32", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "Helpers.WhileLessThanOrEqualMask(left + (Int32)i, right) != (Int32)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanOrEqualMask32Bit_Int64", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanOrEqualMask32Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1BaseType"] = "Int64", ["Op2BaseType"] = "Int64", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["ValidateIterResult"] = "Helpers.WhileLessThanOrEqualMask(left + (Int64)i, right) != (Int64)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanOrEqualMask32Bit_UInt32", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanOrEqualMask32Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1BaseType"] = "UInt32", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.WhileLessThanOrEqualMask(left + (UInt32)i, right) != (UInt32)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanOrEqualMask32Bit_UInt64", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanOrEqualMask32Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt32", ["Op1BaseType"] = "UInt64", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "Helpers.WhileLessThanOrEqualMask(left + (UInt64)i, right) != (UInt64)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanOrEqualMask64Bit_Int32", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanOrEqualMask64Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1BaseType"] = "Int32", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "Helpers.WhileLessThanOrEqualMask(left + (Int32)i, right) != (Int32)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanOrEqualMask64Bit_Int64", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanOrEqualMask64Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1BaseType"] = "Int64", ["Op2BaseType"] = "Int64", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["ValidateIterResult"] = "Helpers.WhileLessThanOrEqualMask(left + (Int64)i, right) != (Int64)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanOrEqualMask64Bit_UInt32", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanOrEqualMask64Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1BaseType"] = "UInt32", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.WhileLessThanOrEqualMask(left + (UInt32)i, right) != (UInt32)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanOrEqualMask64Bit_UInt64", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanOrEqualMask64Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "UInt64", ["Op1BaseType"] = "UInt64", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "Helpers.WhileLessThanOrEqualMask(left + (UInt64)i, right) != (UInt64)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanOrEqualMask8Bit_Int32", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanOrEqualMask8Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1BaseType"] = "Int32", ["Op2BaseType"] = "Int32", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()", ["ValidateIterResult"] = "Helpers.WhileLessThanOrEqualMask(left + (Int32)i, right) != (Int32)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanOrEqualMask8Bit_Int64", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanOrEqualMask8Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1BaseType"] = "Int64", ["Op2BaseType"] = "Int64", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()", ["ValidateIterResult"] = "Helpers.WhileLessThanOrEqualMask(left + (Int64)i, right) != (Int64)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanOrEqualMask8Bit_UInt32", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanOrEqualMask8Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1BaseType"] = "UInt32", ["Op2BaseType"] = "UInt32", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateIterResult"] = "Helpers.WhileLessThanOrEqualMask(left + (UInt32)i, right) != (UInt32)result[i]",}), + ("ScalarBinOpRetVecTest.template",new Dictionary {["TestName"] = "Sve_CreateWhileLessThanOrEqualMask8Bit_UInt64", ["Isa"] = "Sve", ["LoadIsa"] = "Sve", ["Method"] = "CreateWhileLessThanOrEqualMask8Bit", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Byte", ["Op1BaseType"] = "UInt64", ["Op2BaseType"] = "UInt64", ["LargestVectorSize"] = "8", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateIterResult"] = "Helpers.WhileLessThanOrEqualMask(left + (UInt64)i, right) != (UInt64)result[i]",}), + ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_float", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Single", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_double", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "Double", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), ("SveLoadMaskedUnOpTest.template", new Dictionary { ["TestName"] = "SveLoadVector_sbyte", ["Isa"] = "Sve", ["Method"] = "LoadVector", ["RetVectorType"] = "Vector", ["RetBaseType"] = "SByte", ["Op1VectorType"] = "Vector", ["Op1BaseType"] = "SByte", ["Op2BaseType"] = "SByte", ["LargestVectorSize"] = "8", ["NextValueOp2"] = "TestLibrary.Generator.GetSByte()", ["ValidateIterResult"] = "firstOp[i] != result[i]"}), diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs index 125c187bdd2d4e..4b44b29337573a 100644 --- a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/Helpers.cs @@ -5986,5 +5986,45 @@ public static int DotProduct(int op1, sbyte[] op2, int s, sbyte[] op3, int t) return result; } + + public static int WhileLessThanMask(int op1, int op2) + { + return (op1 < op2) ? 1 : 0; + } + + public static uint WhileLessThanMask(uint op1, uint op2) + { + return (uint)((op1 < op2) ? 1 : 0); + } + + public static long WhileLessThanMask(long op1, long op2) + { + return (op1 < op2) ? 1 : 0; + } + + public static ulong WhileLessThanMask(ulong op1, ulong op2) + { + return (ulong)((op1 < op2) ? 1 : 0); + } + + public static int WhileLessThanOrEqualMask(int op1, int op2) + { + return (op1 <= op2) ? 1 : 0; + } + + public static uint WhileLessThanOrEqualMask(uint op1, uint op2) + { + return (uint)((op1 <= op2) ? 1 : 0); + } + + public static long WhileLessThanOrEqualMask(long op1, long op2) + { + return (op1 <= op2) ? 1 : 0; + } + + public static ulong WhileLessThanOrEqualMask(ulong op1, ulong op2) + { + return (ulong)((op1 <= op2) ? 1 : 0); + } } } diff --git a/src/tests/JIT/HardwareIntrinsics/Arm/Shared/ScalarBinOpRetVecTest.template b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/ScalarBinOpRetVecTest.template new file mode 100644 index 00000000000000..a2434756fae862 --- /dev/null +++ b/src/tests/JIT/HardwareIntrinsics/Arm/Shared/ScalarBinOpRetVecTest.template @@ -0,0 +1,216 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +/****************************************************************************** + * This file is auto-generated from a template file by the GenerateTests.csx * + * script in tests\src\JIT\HardwareIntrinsics\Arm\Shared. In order to make * + * changes, please update the corresponding template and run according to the * + * directions listed in the file. * + ******************************************************************************/ + +using System; +using System.Numerics; +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using Xunit; + +namespace JIT.HardwareIntrinsics.Arm +{ + public static partial class Program + { + [Fact] + public static void {TestName}() + { + var test = new ScalarBinaryOpTest__{TestName}(); + + if (test.IsSupported) + { + // Validates basic functionality works, using Unsafe.ReadUnaligned + test.RunBasicScenario_UnsafeRead(); + + // Validates calling via reflection works, using Unsafe.ReadUnaligned + test.RunReflectionScenario_UnsafeRead(); + + // Validates passing a local works, using Unsafe.ReadUnaligned + test.RunLclVarScenario_UnsafeRead(); + + // Validates passing an instance member of a class works + test.RunClassFldScenario(); + + // Validates passing the field of a local struct works + test.RunStructLclFldScenario(); + + // Validates passing an instance member of a struct works + test.RunStructFldScenario(); + } + else + { + // Validates we throw on unsupported hardware + test.RunUnsupportedScenario(); + } + + if (!test.Succeeded) + { + throw new Exception("One or more scenarios did not complete as expected."); + } + } + } + + public sealed unsafe class ScalarBinaryOpTest__{TestName} + { + private struct TestStruct + { + public {Op1BaseType} _fld1; + public {Op2BaseType} _fld2; + + public static TestStruct Create() + { + var testStruct = new TestStruct(); + + testStruct._fld1 = {NextValueOp1}; + testStruct._fld2 = {NextValueOp2}; + + return testStruct; + } + + public void RunStructFldScenario(ScalarBinaryOpTest__{TestName} testClass) + { + var result = {Isa}.{Method}(_fld1, _fld2); + testClass.ValidateResult(_fld1, _fld2, result); + } + } + + private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType}); + + private static {Op1BaseType} _data1; + private static {Op2BaseType} _data2; + + private {Op1BaseType} _fld1; + private {Op2BaseType} _fld2; + + public ScalarBinaryOpTest__{TestName}() + { + Succeeded = true; + + _fld1 = {NextValueOp1}; + _fld2 = {NextValueOp2}; + + _data1 = {NextValueOp1}; + _data2 = {NextValueOp2}; + } + + public bool IsSupported => {Isa}.IsSupported; + + public bool Succeeded { get; set; } + + public void RunBasicScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead)); + + var result = {Isa}.{Method}( + Unsafe.ReadUnaligned<{Op1BaseType}>(ref Unsafe.As<{Op1BaseType}, byte>(ref _data1)), + Unsafe.ReadUnaligned<{Op2BaseType}>(ref Unsafe.As<{Op2BaseType}, byte>(ref _data2)) + ); + + ValidateResult(_data1, _data2, result); + } + + public void RunReflectionScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead)); + + var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({Op1BaseType}), typeof({Op2BaseType}) }) + .Invoke(null, new object[] { + Unsafe.ReadUnaligned<{Op1BaseType}>(ref Unsafe.As<{Op1BaseType}, byte>(ref _data1)), + Unsafe.ReadUnaligned<{Op2BaseType}>(ref Unsafe.As<{Op2BaseType}, byte>(ref _data2)) + }); + + ValidateResult(_data1, _data2, ({RetVectorType}<{RetBaseType}>)result); + } + + public void RunLclVarScenario_UnsafeRead() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead)); + + var data1 = Unsafe.ReadUnaligned<{Op1BaseType}>(ref Unsafe.As<{Op1BaseType}, byte>(ref _data1)); + var data2 = Unsafe.ReadUnaligned<{Op2BaseType}>(ref Unsafe.As<{Op2BaseType}, byte>(ref _data2)); + var result = {Isa}.{Method}(data1, data2); + + ValidateResult(data1, data2, result); + } + + public void RunClassFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario)); + + var result = {Isa}.{Method}(_fld1, _fld2); + ValidateResult(_fld1, _fld2, result); + } + + public void RunStructLclFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario)); + + var test = TestStruct.Create(); + var result = {Isa}.{Method}(test._fld1, test._fld2); + + ValidateResult(test._fld1, test._fld2, result); + } + + public void RunStructFldScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario)); + + var test = TestStruct.Create(); + test.RunStructFldScenario(this); + } + + public void RunUnsupportedScenario() + { + TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario)); + + bool succeeded = false; + + try + { + RunBasicScenario_UnsafeRead(); + } + catch (PlatformNotSupportedException) + { + succeeded = true; + } + + if (!succeeded) + { + Succeeded = false; + } + } + + private void ValidateResult({Op1BaseType} left, {Op2BaseType} right, {RetVectorType}<{RetBaseType}> result, [CallerMemberName] string method = "") + { + bool succeeded = true; + + for (int i = 0; i < RetElementCount; i++) + { + if ({ValidateIterResult}) + { + succeeded = false; + break; + } + } + + if (!succeeded) + { + TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetVectorType}<{RetBaseType}>>({Op1BaseType}, {Op2BaseType}): {Method} failed:"); + TestLibrary.TestFramework.LogInformation($" left: {left}"); + TestLibrary.TestFramework.LogInformation($" right: {right}"); + TestLibrary.TestFramework.LogInformation($" result: {result}"); + TestLibrary.TestFramework.LogInformation(string.Empty); + + Succeeded = false; + } + } + } +} From 3f62ba997e5181f7e4edb6254c7a01f9c90316f1 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 12 Apr 2024 14:20:47 +0100 Subject: [PATCH 2/6] Set simdBaseJitType to type of input args --- src/coreclr/jit/hwintrinsicarm64.cpp | 17 +++++------------ src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 12 +++--------- src/coreclr/jit/hwintrinsiclistarm64sve.h | 16 ++++++++-------- 3 files changed, 16 insertions(+), 29 deletions(-) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index c785f8d7c0488d..043224c9172c07 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2203,7 +2203,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Sve_CreateWhileLessThanOrEqualMask32Bit: case NI_Sve_CreateWhileLessThanOrEqualMask64Bit: { - // Target instruction is dependent on whether the inputs are signed or unsigned. // This information is lost when the type is converted from CorInfoType to var_type. // Ensure this is marked using GTF_UNSIGNED. @@ -2211,24 +2210,18 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); var_types argType = TYP_UNKNOWN; CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; - CorInfoType argCoreInfoType = strip(info.compCompHnd->getArgType(sig, arg2, &argClass)); + + // Target instruction is dependent on whether the inputs are signed or unsigned. + // Use the input type for the base type. + simdBaseJitType = strip(info.compCompHnd->getArgType(sig, arg2, &argClass)); assert(sig->numArgs == 2); - argType = JITtype2varType(argCoreInfoType); + argType = JITtype2varType(simdBaseJitType); op2 = getArgForHWIntrinsic(argType, argClass); argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); op1 = impPopStack().val; retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); - - if (argCoreInfoType == CORINFO_TYPE_ULONG || argCoreInfoType == CORINFO_TYPE_UINT) - { - retNode->gtFlags |= GTF_UNSIGNED; - } - else - { - assert(argCoreInfoType == CORINFO_TYPE_LONG || argCoreInfoType == CORINFO_TYPE_INT); - } } break; diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index 9c8c80c46bec63..d93db8f0780cc6 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -1304,21 +1304,15 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) case NI_Sve_CreateWhileLessThanMask16Bit: case NI_Sve_CreateWhileLessThanMask32Bit: case NI_Sve_CreateWhileLessThanMask64Bit: - // Emit size is the size of the scalar operands. - emitSize = emitActualTypeSize(intrin.op1->TypeGet()); - // Instruction is dependent on whether the inputs are signed or unsigned. - ins = ((node->gtFlags & GTF_UNSIGNED) != 0) ? INS_sve_whilelo : INS_sve_whilelt; - GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); - break; - case NI_Sve_CreateWhileLessThanOrEqualMask8Bit: case NI_Sve_CreateWhileLessThanOrEqualMask16Bit: case NI_Sve_CreateWhileLessThanOrEqualMask32Bit: case NI_Sve_CreateWhileLessThanOrEqualMask64Bit: // Emit size is the size of the scalar operands. emitSize = emitActualTypeSize(intrin.op1->TypeGet()); - // Instruction is dependent on whether the inputs are signed or unsigned. - ins = ((node->gtFlags & GTF_UNSIGNED) != 0) ? INS_sve_whilels : INS_sve_whilele; + // opt is based on the size of the returned vector + // ERROR: node->gtType is TYP_MASK. We need the type of the elements in the mask. + opt = emitter::optGetSveInsOpt(emitTypeSize(node->gtType)); GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); break; diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index b2ba46b1432bf3..632b642e957956 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -27,14 +27,14 @@ HARDWARE_INTRINSIC(Sve, CreateTrueMaskSingle, HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt16, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt32, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt64, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask16Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask32Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask64Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask8Bit, -1, 2, false, {INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask16Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask32Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask64Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask8Bit, -1, 2, false, {INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask16Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_sve_whilelo, INS_sve_whilelt, INS_sve_whilelo, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask32Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_sve_whilelo, INS_sve_whilelt, INS_sve_whilelo, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask64Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_sve_whilelo, INS_sve_whilelt, INS_sve_whilelo, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask8Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_sve_whilelo, INS_sve_whilelt, INS_sve_whilelo, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask16Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_sve_whilels, INS_sve_whilele, INS_sve_whilels, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask32Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_sve_whilels, INS_sve_whilele, INS_sve_whilels, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask64Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_sve_whilels, INS_sve_whilele, INS_sve_whilels, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask8Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_sve_whilels, INS_sve_whilele, INS_sve_whilels, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, true, {INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_LowMaskedOperation) From ddd7168a95882abac54415c76da1c0df740d4cfe Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Fri, 12 Apr 2024 16:36:26 +0100 Subject: [PATCH 3/6] Hardcode opt in codegen --- src/coreclr/jit/hwintrinsicarm64.cpp | 8 +++--- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 30 +++++++++++++++++---- 2 files changed, 29 insertions(+), 9 deletions(-) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 043224c9172c07..906a7dff6d41d5 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2206,10 +2206,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, // This information is lost when the type is converted from CorInfoType to var_type. // Ensure this is marked using GTF_UNSIGNED. - CORINFO_ARG_LIST_HANDLE arg1 = sig->args; - CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); - var_types argType = TYP_UNKNOWN; - CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; + CORINFO_ARG_LIST_HANDLE arg1 = sig->args; + CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); + var_types argType = TYP_UNKNOWN; + CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; // Target instruction is dependent on whether the inputs are signed or unsigned. // Use the input type for the base type. diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index d93db8f0780cc6..fb9659ade88c91 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -1301,18 +1301,38 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; case NI_Sve_CreateWhileLessThanMask8Bit: - case NI_Sve_CreateWhileLessThanMask16Bit: - case NI_Sve_CreateWhileLessThanMask32Bit: - case NI_Sve_CreateWhileLessThanMask64Bit: case NI_Sve_CreateWhileLessThanOrEqualMask8Bit: + // Emit size is the size of the scalar operands. + emitSize = emitActualTypeSize(intrin.op1->TypeGet()); + // opt is based on the size of the returned vector + opt = INS_OPTS_SCALABLE_B; + GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); + break; + + case NI_Sve_CreateWhileLessThanMask16Bit: case NI_Sve_CreateWhileLessThanOrEqualMask16Bit: + // Emit size is the size of the scalar operands. + emitSize = emitActualTypeSize(intrin.op1->TypeGet()); + // opt is based on the size of the returned vector + opt = INS_OPTS_SCALABLE_H; + GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); + break; + + case NI_Sve_CreateWhileLessThanMask32Bit: case NI_Sve_CreateWhileLessThanOrEqualMask32Bit: + // Emit size is the size of the scalar operands. + emitSize = emitActualTypeSize(intrin.op1->TypeGet()); + // opt is based on the size of the returned vector + opt = INS_OPTS_SCALABLE_S; + GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); + break; + + case NI_Sve_CreateWhileLessThanMask64Bit: case NI_Sve_CreateWhileLessThanOrEqualMask64Bit: // Emit size is the size of the scalar operands. emitSize = emitActualTypeSize(intrin.op1->TypeGet()); // opt is based on the size of the returned vector - // ERROR: node->gtType is TYP_MASK. We need the type of the elements in the mask. - opt = emitter::optGetSveInsOpt(emitTypeSize(node->gtType)); + opt = INS_OPTS_SCALABLE_D; GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); break; From a0b56870cd387375927c4d078d903c24b70466ce Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 15 Apr 2024 12:00:23 +0100 Subject: [PATCH 4/6] Fix gtNewSimdConvertMaskToVectorNode types --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/hwintrinsic.cpp | 2 +- src/coreclr/jit/hwintrinsicarm64.cpp | 20 ++++++++++++-------- 3 files changed, 14 insertions(+), 10 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index ba3bac843dddd5..a51d74e460ed0a 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3474,7 +3474,7 @@ class Compiler #if defined(TARGET_ARM64) GenTree* gtNewSimdConvertVectorToMaskNode(var_types type, GenTree* node, CorInfoType simdBaseJitType, unsigned simdSize); - GenTree* gtNewSimdConvertMaskToVectorNode(GenTreeHWIntrinsic* node, var_types type); + GenTree* gtNewSimdConvertMaskToVectorNode(var_types type, GenTreeHWIntrinsic* node, CorInfoType simdBaseJitType, unsigned simdSize); #endif //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 53970ef4a7460b..5c0fd015c002f3 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -1607,7 +1607,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, // HWInstrinsic returns a mask, but all returns must be vectors, so convert mask to vector. assert(HWIntrinsicInfo::ReturnsPerElementMask(intrinsic)); assert(nodeRetType == TYP_MASK); - retNode = gtNewSimdConvertMaskToVectorNode(retNode->AsHWIntrinsic(), retType); + retNode = gtNewSimdConvertMaskToVectorNode(retType, retNode->AsHWIntrinsic(), simdBaseJitType, simdSize); } #endif // defined(TARGET_ARM64) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 906a7dff6d41d5..9ccfe07a563b73 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2235,12 +2235,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } //------------------------------------------------------------------------ -// gtNewSimdConvertMaskToVectorNode: Convert a HW instrinsic vector node to a mask +// gtNewSimdConvertVectorToMaskNode: Convert a HW instrinsic vector node to a mask // // Arguments: // node -- The node to convert -// simdBaseJitType -- the base jit type of the converted node -// simdSize -- the simd size of the converted node +// simdBaseJitType -- The base jit type of the converted node +// simdSize -- The simd size of the converted node // // Return Value: // The node converted to the a mask type @@ -2262,19 +2262,23 @@ GenTree* Compiler::gtNewSimdConvertVectorToMaskNode(var_types type, // gtNewSimdConvertMaskToVectorNode: Convert a HW instrinsic mask node to a vector // // Arguments: -// node -- The node to convert -// type -- The type of the node to convert to +// node -- The node to convert +// type -- The type of the node to convert to +// simdBaseJitType -- The base jit type of node to convert to +// simdSize -- The simd size of the node to convert to // // Return Value: // The node converted to the given type // -GenTree* Compiler::gtNewSimdConvertMaskToVectorNode(GenTreeHWIntrinsic* node, var_types type) +GenTree* Compiler::gtNewSimdConvertMaskToVectorNode(var_types type, + GenTreeHWIntrinsic* node, + CorInfoType simdBaseJitType, + unsigned simdSize) { assert(varTypeIsMask(node)); assert(varTypeIsSIMD(type)); - return gtNewSimdHWIntrinsicNode(type, node, NI_Sve_ConvertMaskToVector, node->GetSimdBaseJitType(), - node->GetSimdSize()); + return gtNewSimdHWIntrinsicNode(type, node, NI_Sve_ConvertMaskToVector, simdBaseJitType, simdSize); } #endif // FEATURE_HW_INTRINSICS From 941c30611fc32d23a2a8de26c3dffb665b699904 Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Mon, 15 Apr 2024 09:31:01 +0100 Subject: [PATCH 5/6] Use HW_Flag_BaseTypeFromFirstArg --- src/coreclr/jit/hwintrinsic.cpp | 60 ++++++++++++++--------- src/coreclr/jit/hwintrinsicarm64.cpp | 31 ------------ src/coreclr/jit/hwintrinsiclistarm64sve.h | 16 +++--- 3 files changed, 45 insertions(+), 62 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 5c0fd015c002f3..2d0e07eb526093 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -697,7 +697,20 @@ unsigned HWIntrinsicInfo::lookupSimdSize(Compiler* comp, NamedIntrinsic id, CORI } CorInfoType simdBaseJitType = comp->getBaseJitTypeAndSizeOfSIMDType(typeHnd, &simdSize); + +#if defined(TARGET_ARM64) + if (simdBaseJitType == CORINFO_TYPE_UNDEF) + { + assert(simdSize == 0); // the argument is not a vector + } + else + { + assert(simdSize > 0); + } +#else assert((simdSize > 0) && (simdBaseJitType != CORINFO_TYPE_UNDEF)); +#endif + return simdSize; } @@ -1062,49 +1075,50 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, int numArgs = sig->numArgs; var_types retType = genActualType(JITtype2varType(sig->retType)); CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; + CorInfoType simdRetJitType = CORINFO_TYPE_UNDEF; GenTree* retNode = nullptr; + unsigned int simdRetSize = 0; if (retType == TYP_STRUCT) { - unsigned int sizeBytes; - simdBaseJitType = getBaseJitTypeAndSizeOfSIMDType(sig->retTypeSigClass, &sizeBytes); + simdRetJitType = getBaseJitTypeAndSizeOfSIMDType(sig->retTypeSigClass, &simdRetSize); if (HWIntrinsicInfo::IsMultiReg(intrinsic)) { - assert(sizeBytes == 0); + assert(simdRetSize == 0); } #ifdef TARGET_ARM64 else if ((intrinsic == NI_AdvSimd_LoadAndInsertScalar) || (intrinsic == NI_AdvSimd_Arm64_LoadAndInsertScalar)) { - CorInfoType pSimdBaseJitType = CORINFO_TYPE_UNDEF; - var_types retFieldType = impNormStructType(sig->retTypeSigClass, &pSimdBaseJitType); + CorInfoType pSimdRetJitType = CORINFO_TYPE_UNDEF; + var_types retFieldType = impNormStructType(sig->retTypeSigClass, &pSimdRetJitType); if (retFieldType == TYP_STRUCT) { CORINFO_CLASS_HANDLE structType; - unsigned int sizeBytes = 0; + unsigned int simdRetSize = 0; // LoadAndInsertScalar that returns 2,3 or 4 vectors - assert(pSimdBaseJitType == CORINFO_TYPE_UNDEF); + assert(pSimdRetJitType == CORINFO_TYPE_UNDEF); unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(sig->retTypeSigClass); assert(fieldCount > 1); CORINFO_FIELD_HANDLE fieldHandle = info.compCompHnd->getFieldInClass(sig->retTypeClass, 0); CorInfoType fieldType = info.compCompHnd->getFieldType(fieldHandle, &structType); - simdBaseJitType = getBaseJitTypeAndSizeOfSIMDType(structType, &sizeBytes); + simdRetJitType = getBaseJitTypeAndSizeOfSIMDType(structType, &simdRetSize); switch (fieldCount) { case 2: - intrinsic = sizeBytes == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x2 - : NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2; + intrinsic = simdRetSize == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x2 + : NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2; break; case 3: - intrinsic = sizeBytes == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x3 - : NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3; + intrinsic = simdRetSize == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x3 + : NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3; break; case 4: - intrinsic = sizeBytes == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x4 - : NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4; + intrinsic = simdRetSize == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x4 + : NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4; break; default: assert("unsupported"); @@ -1113,26 +1127,26 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, else { assert((retFieldType == TYP_SIMD8) || (retFieldType == TYP_SIMD16)); - assert(isSupportedBaseType(intrinsic, simdBaseJitType)); - retType = getSIMDTypeForSize(sizeBytes); + assert(isSupportedBaseType(intrinsic, simdRetJitType)); + retType = getSIMDTypeForSize(simdRetSize); } } #endif else { // We want to return early here for cases where retType was TYP_STRUCT as per method signature and - // rather than deferring the decision after getting the simdBaseJitType of arg. - if (!isSupportedBaseType(intrinsic, simdBaseJitType)) + // rather than deferring the decision after getting the simdRetJitType of arg. + if (!isSupportedBaseType(intrinsic, simdRetJitType)) { return nullptr; } - assert(sizeBytes != 0); - retType = getSIMDTypeForSize(sizeBytes); + assert(simdRetSize != 0); + retType = getSIMDTypeForSize(simdRetSize); } } - simdBaseJitType = getBaseJitTypeFromArgIfNeeded(intrinsic, clsHnd, sig, simdBaseJitType); + simdBaseJitType = getBaseJitTypeFromArgIfNeeded(intrinsic, clsHnd, sig, simdRetJitType); if (simdBaseJitType == CORINFO_TYPE_UNDEF) { @@ -1381,7 +1395,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, } #if defined(TARGET_ARM64) - if ((simdSize != 8) && (simdSize != 16)) + if ((simdSize != 8) && (simdSize != 16) && (simdSize != 0)) #elif defined(TARGET_XARCH) if ((simdSize != 16) && (simdSize != 32) && (simdSize != 64)) #endif // TARGET_* @@ -1607,7 +1621,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, // HWInstrinsic returns a mask, but all returns must be vectors, so convert mask to vector. assert(HWIntrinsicInfo::ReturnsPerElementMask(intrinsic)); assert(nodeRetType == TYP_MASK); - retNode = gtNewSimdConvertMaskToVectorNode(retType, retNode->AsHWIntrinsic(), simdBaseJitType, simdSize); + retNode = gtNewSimdConvertMaskToVectorNode(retType, retNode->AsHWIntrinsic(), simdRetJitType, simdRetSize); } #endif // defined(TARGET_ARM64) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index 9ccfe07a563b73..dfb10d28b1da46 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2194,37 +2194,6 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - case NI_Sve_CreateWhileLessThanMask8Bit: - case NI_Sve_CreateWhileLessThanMask16Bit: - case NI_Sve_CreateWhileLessThanMask32Bit: - case NI_Sve_CreateWhileLessThanMask64Bit: - case NI_Sve_CreateWhileLessThanOrEqualMask8Bit: - case NI_Sve_CreateWhileLessThanOrEqualMask16Bit: - case NI_Sve_CreateWhileLessThanOrEqualMask32Bit: - case NI_Sve_CreateWhileLessThanOrEqualMask64Bit: - { - // This information is lost when the type is converted from CorInfoType to var_type. - // Ensure this is marked using GTF_UNSIGNED. - - CORINFO_ARG_LIST_HANDLE arg1 = sig->args; - CORINFO_ARG_LIST_HANDLE arg2 = info.compCompHnd->getArgNext(arg1); - var_types argType = TYP_UNKNOWN; - CORINFO_CLASS_HANDLE argClass = NO_CLASS_HANDLE; - - // Target instruction is dependent on whether the inputs are signed or unsigned. - // Use the input type for the base type. - simdBaseJitType = strip(info.compCompHnd->getArgType(sig, arg2, &argClass)); - - assert(sig->numArgs == 2); - argType = JITtype2varType(simdBaseJitType); - op2 = getArgForHWIntrinsic(argType, argClass); - argType = JITtype2varType(strip(info.compCompHnd->getArgType(sig, arg1, &argClass))); - op1 = impPopStack().val; - - retNode = gtNewSimdHWIntrinsicNode(retType, op1, op2, intrinsic, simdBaseJitType, simdSize); - } - break; - default: { return nullptr; diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index 632b642e957956..db2da0cab7dca3 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -27,14 +27,14 @@ HARDWARE_INTRINSIC(Sve, CreateTrueMaskSingle, HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt16, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt32, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt64, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask16Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_sve_whilelo, INS_sve_whilelt, INS_sve_whilelo, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask32Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_sve_whilelo, INS_sve_whilelt, INS_sve_whilelo, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask64Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_sve_whilelo, INS_sve_whilelt, INS_sve_whilelo, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask8Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_sve_whilelo, INS_sve_whilelt, INS_sve_whilelo, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask16Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_sve_whilels, INS_sve_whilele, INS_sve_whilels, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask32Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_sve_whilels, INS_sve_whilele, INS_sve_whilels, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask64Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_sve_whilels, INS_sve_whilele, INS_sve_whilels, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask8Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_sve_whilels, INS_sve_whilele, INS_sve_whilels, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask16Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_sve_whilelo, INS_sve_whilelt, INS_sve_whilelo, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask32Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_sve_whilelo, INS_sve_whilelt, INS_sve_whilelo, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask64Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_sve_whilelo, INS_sve_whilelt, INS_sve_whilelo, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask8Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_sve_whilelo, INS_sve_whilelt, INS_sve_whilelo, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask16Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_sve_whilels, INS_sve_whilele, INS_sve_whilels, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask32Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_sve_whilels, INS_sve_whilele, INS_sve_whilels, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask64Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_sve_whilels, INS_sve_whilele, INS_sve_whilels, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask8Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_sve_whilels, INS_sve_whilele, INS_sve_whilels, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, true, {INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_LowMaskedOperation) From 1bd25c4a8300f45fc42144408a3d23cbbe8403bf Mon Sep 17 00:00:00 2001 From: Alan Hayward Date: Wed, 24 Apr 2024 13:51:10 +0100 Subject: [PATCH 6/6] Set base type to return type and auxiliary type to input type --- src/coreclr/jit/compiler.h | 2 +- src/coreclr/jit/hwintrinsic.cpp | 71 ++++++++++----------- src/coreclr/jit/hwintrinsicarm64.cpp | 20 +++--- src/coreclr/jit/hwintrinsiccodegenarm64.cpp | 50 +++++++-------- src/coreclr/jit/hwintrinsiclistarm64sve.h | 17 ++--- 5 files changed, 76 insertions(+), 84 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index e0fa6615e027d8..8f15dbe77446ec 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -3473,7 +3473,7 @@ class Compiler #if defined(TARGET_ARM64) GenTree* gtNewSimdConvertVectorToMaskNode(var_types type, GenTree* node, CorInfoType simdBaseJitType, unsigned simdSize); - GenTree* gtNewSimdConvertMaskToVectorNode(var_types type, GenTreeHWIntrinsic* node, CorInfoType simdBaseJitType, unsigned simdSize); + GenTree* gtNewSimdConvertMaskToVectorNode(GenTreeHWIntrinsic* node, var_types type); #endif //------------------------------------------------------------------------ diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 2d0e07eb526093..8173875f5bda2e 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -697,20 +697,7 @@ unsigned HWIntrinsicInfo::lookupSimdSize(Compiler* comp, NamedIntrinsic id, CORI } CorInfoType simdBaseJitType = comp->getBaseJitTypeAndSizeOfSIMDType(typeHnd, &simdSize); - -#if defined(TARGET_ARM64) - if (simdBaseJitType == CORINFO_TYPE_UNDEF) - { - assert(simdSize == 0); // the argument is not a vector - } - else - { - assert(simdSize > 0); - } -#else assert((simdSize > 0) && (simdBaseJitType != CORINFO_TYPE_UNDEF)); -#endif - return simdSize; } @@ -1075,50 +1062,49 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, int numArgs = sig->numArgs; var_types retType = genActualType(JITtype2varType(sig->retType)); CorInfoType simdBaseJitType = CORINFO_TYPE_UNDEF; - CorInfoType simdRetJitType = CORINFO_TYPE_UNDEF; GenTree* retNode = nullptr; - unsigned int simdRetSize = 0; if (retType == TYP_STRUCT) { - simdRetJitType = getBaseJitTypeAndSizeOfSIMDType(sig->retTypeSigClass, &simdRetSize); + unsigned int sizeBytes; + simdBaseJitType = getBaseJitTypeAndSizeOfSIMDType(sig->retTypeSigClass, &sizeBytes); if (HWIntrinsicInfo::IsMultiReg(intrinsic)) { - assert(simdRetSize == 0); + assert(sizeBytes == 0); } #ifdef TARGET_ARM64 else if ((intrinsic == NI_AdvSimd_LoadAndInsertScalar) || (intrinsic == NI_AdvSimd_Arm64_LoadAndInsertScalar)) { - CorInfoType pSimdRetJitType = CORINFO_TYPE_UNDEF; - var_types retFieldType = impNormStructType(sig->retTypeSigClass, &pSimdRetJitType); + CorInfoType pSimdBaseJitType = CORINFO_TYPE_UNDEF; + var_types retFieldType = impNormStructType(sig->retTypeSigClass, &pSimdBaseJitType); if (retFieldType == TYP_STRUCT) { CORINFO_CLASS_HANDLE structType; - unsigned int simdRetSize = 0; + unsigned int sizeBytes = 0; // LoadAndInsertScalar that returns 2,3 or 4 vectors - assert(pSimdRetJitType == CORINFO_TYPE_UNDEF); + assert(pSimdBaseJitType == CORINFO_TYPE_UNDEF); unsigned fieldCount = info.compCompHnd->getClassNumInstanceFields(sig->retTypeSigClass); assert(fieldCount > 1); CORINFO_FIELD_HANDLE fieldHandle = info.compCompHnd->getFieldInClass(sig->retTypeClass, 0); CorInfoType fieldType = info.compCompHnd->getFieldType(fieldHandle, &structType); - simdRetJitType = getBaseJitTypeAndSizeOfSIMDType(structType, &simdRetSize); + simdBaseJitType = getBaseJitTypeAndSizeOfSIMDType(structType, &sizeBytes); switch (fieldCount) { case 2: - intrinsic = simdRetSize == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x2 - : NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2; + intrinsic = sizeBytes == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x2 + : NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x2; break; case 3: - intrinsic = simdRetSize == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x3 - : NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3; + intrinsic = sizeBytes == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x3 + : NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x3; break; case 4: - intrinsic = simdRetSize == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x4 - : NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4; + intrinsic = sizeBytes == 8 ? NI_AdvSimd_LoadAndInsertScalarVector64x4 + : NI_AdvSimd_Arm64_LoadAndInsertScalarVector128x4; break; default: assert("unsupported"); @@ -1127,26 +1113,26 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, else { assert((retFieldType == TYP_SIMD8) || (retFieldType == TYP_SIMD16)); - assert(isSupportedBaseType(intrinsic, simdRetJitType)); - retType = getSIMDTypeForSize(simdRetSize); + assert(isSupportedBaseType(intrinsic, simdBaseJitType)); + retType = getSIMDTypeForSize(sizeBytes); } } #endif else { // We want to return early here for cases where retType was TYP_STRUCT as per method signature and - // rather than deferring the decision after getting the simdRetJitType of arg. - if (!isSupportedBaseType(intrinsic, simdRetJitType)) + // rather than deferring the decision after getting the simdBaseJitType of arg. + if (!isSupportedBaseType(intrinsic, simdBaseJitType)) { return nullptr; } - assert(simdRetSize != 0); - retType = getSIMDTypeForSize(simdRetSize); + assert(sizeBytes != 0); + retType = getSIMDTypeForSize(sizeBytes); } } - simdBaseJitType = getBaseJitTypeFromArgIfNeeded(intrinsic, clsHnd, sig, simdRetJitType); + simdBaseJitType = getBaseJitTypeFromArgIfNeeded(intrinsic, clsHnd, sig, simdBaseJitType); if (simdBaseJitType == CORINFO_TYPE_UNDEF) { @@ -1395,7 +1381,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, } #if defined(TARGET_ARM64) - if ((simdSize != 8) && (simdSize != 16) && (simdSize != 0)) + if ((simdSize != 8) && (simdSize != 16)) #elif defined(TARGET_XARCH) if ((simdSize != 16) && (simdSize != 32) && (simdSize != 64)) #endif // TARGET_* @@ -1529,6 +1515,17 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, } break; + case NI_Sve_CreateWhileLessThanMask8Bit: + case NI_Sve_CreateWhileLessThanOrEqualMask8Bit: + case NI_Sve_CreateWhileLessThanMask16Bit: + case NI_Sve_CreateWhileLessThanOrEqualMask16Bit: + case NI_Sve_CreateWhileLessThanMask32Bit: + case NI_Sve_CreateWhileLessThanOrEqualMask32Bit: + case NI_Sve_CreateWhileLessThanMask64Bit: + case NI_Sve_CreateWhileLessThanOrEqualMask64Bit: + retNode->AsHWIntrinsic()->SetAuxiliaryJitType(sigReader.op1JitType); + break; + default: break; } @@ -1621,7 +1618,7 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic, // HWInstrinsic returns a mask, but all returns must be vectors, so convert mask to vector. assert(HWIntrinsicInfo::ReturnsPerElementMask(intrinsic)); assert(nodeRetType == TYP_MASK); - retNode = gtNewSimdConvertMaskToVectorNode(retType, retNode->AsHWIntrinsic(), simdRetJitType, simdRetSize); + retNode = gtNewSimdConvertMaskToVectorNode(retNode->AsHWIntrinsic(), retType); } #endif // defined(TARGET_ARM64) diff --git a/src/coreclr/jit/hwintrinsicarm64.cpp b/src/coreclr/jit/hwintrinsicarm64.cpp index dfb10d28b1da46..8e3288f75d7090 100644 --- a/src/coreclr/jit/hwintrinsicarm64.cpp +++ b/src/coreclr/jit/hwintrinsicarm64.cpp @@ -2204,12 +2204,12 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } //------------------------------------------------------------------------ -// gtNewSimdConvertVectorToMaskNode: Convert a HW instrinsic vector node to a mask +// gtNewSimdConvertMaskToVectorNode: Convert a HW instrinsic vector node to a mask // // Arguments: // node -- The node to convert -// simdBaseJitType -- The base jit type of the converted node -// simdSize -- The simd size of the converted node +// simdBaseJitType -- the base jit type of the converted node +// simdSize -- the simd size of the converted node // // Return Value: // The node converted to the a mask type @@ -2231,23 +2231,19 @@ GenTree* Compiler::gtNewSimdConvertVectorToMaskNode(var_types type, // gtNewSimdConvertMaskToVectorNode: Convert a HW instrinsic mask node to a vector // // Arguments: -// node -- The node to convert -// type -- The type of the node to convert to -// simdBaseJitType -- The base jit type of node to convert to -// simdSize -- The simd size of the node to convert to +// node -- The node to convert +// type -- The type of the node to convert to // // Return Value: // The node converted to the given type // -GenTree* Compiler::gtNewSimdConvertMaskToVectorNode(var_types type, - GenTreeHWIntrinsic* node, - CorInfoType simdBaseJitType, - unsigned simdSize) +GenTree* Compiler::gtNewSimdConvertMaskToVectorNode(GenTreeHWIntrinsic* node, var_types type) { assert(varTypeIsMask(node)); assert(varTypeIsSIMD(type)); - return gtNewSimdHWIntrinsicNode(type, node, NI_Sve_ConvertMaskToVector, simdBaseJitType, simdSize); + return gtNewSimdHWIntrinsicNode(type, node, NI_Sve_ConvertMaskToVector, node->GetSimdBaseJitType(), + node->GetSimdSize()); } #endif // FEATURE_HW_INTRINSICS diff --git a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp index fb9659ade88c91..318aaf058316aa 100644 --- a/src/coreclr/jit/hwintrinsiccodegenarm64.cpp +++ b/src/coreclr/jit/hwintrinsiccodegenarm64.cpp @@ -1301,40 +1301,38 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node) break; case NI_Sve_CreateWhileLessThanMask8Bit: - case NI_Sve_CreateWhileLessThanOrEqualMask8Bit: - // Emit size is the size of the scalar operands. - emitSize = emitActualTypeSize(intrin.op1->TypeGet()); - // opt is based on the size of the returned vector - opt = INS_OPTS_SCALABLE_B; - GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); - break; - case NI_Sve_CreateWhileLessThanMask16Bit: - case NI_Sve_CreateWhileLessThanOrEqualMask16Bit: - // Emit size is the size of the scalar operands. - emitSize = emitActualTypeSize(intrin.op1->TypeGet()); - // opt is based on the size of the returned vector - opt = INS_OPTS_SCALABLE_H; - GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); - break; - case NI_Sve_CreateWhileLessThanMask32Bit: - case NI_Sve_CreateWhileLessThanOrEqualMask32Bit: - // Emit size is the size of the scalar operands. - emitSize = emitActualTypeSize(intrin.op1->TypeGet()); - // opt is based on the size of the returned vector - opt = INS_OPTS_SCALABLE_S; + case NI_Sve_CreateWhileLessThanMask64Bit: + { + // Emit size and instruction is based on the scalar operands. + var_types auxType = node->GetAuxiliaryType(); + emitSize = emitActualTypeSize(auxType); + if (varTypeIsUnsigned(auxType)) + { + ins = INS_sve_whilelo; + } + GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); break; + } - case NI_Sve_CreateWhileLessThanMask64Bit: + case NI_Sve_CreateWhileLessThanOrEqualMask8Bit: + case NI_Sve_CreateWhileLessThanOrEqualMask16Bit: + case NI_Sve_CreateWhileLessThanOrEqualMask32Bit: case NI_Sve_CreateWhileLessThanOrEqualMask64Bit: - // Emit size is the size of the scalar operands. - emitSize = emitActualTypeSize(intrin.op1->TypeGet()); - // opt is based on the size of the returned vector - opt = INS_OPTS_SCALABLE_D; + { + // Emit size and instruction is based on the scalar operands. + var_types auxType = node->GetAuxiliaryType(); + emitSize = emitActualTypeSize(auxType); + if (varTypeIsUnsigned(auxType)) + { + ins = INS_sve_whilels; + } + GetEmitter()->emitIns_R_R_R(ins, emitSize, targetReg, op1Reg, op2Reg, opt); break; + } default: unreached(); diff --git a/src/coreclr/jit/hwintrinsiclistarm64sve.h b/src/coreclr/jit/hwintrinsiclistarm64sve.h index db2da0cab7dca3..c054c90cca9afd 100644 --- a/src/coreclr/jit/hwintrinsiclistarm64sve.h +++ b/src/coreclr/jit/hwintrinsiclistarm64sve.h @@ -27,14 +27,15 @@ HARDWARE_INTRINSIC(Sve, CreateTrueMaskSingle, HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt16, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt32, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, CreateTrueMaskUInt64, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_ptrue, INS_invalid, INS_invalid}, HW_Category_EnumPattern, HW_Flag_Scalable|HW_Flag_HasImmediateOperand|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask16Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_sve_whilelo, INS_sve_whilelt, INS_sve_whilelo, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask32Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_sve_whilelo, INS_sve_whilelt, INS_sve_whilelo, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask64Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_sve_whilelo, INS_sve_whilelt, INS_sve_whilelo, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask8Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_sve_whilelo, INS_sve_whilelt, INS_sve_whilelo, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask16Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_sve_whilels, INS_sve_whilele, INS_sve_whilels, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask32Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_sve_whilels, INS_sve_whilele, INS_sve_whilels, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask64Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_sve_whilels, INS_sve_whilele, INS_sve_whilels, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask8Bit, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_sve_whilels, INS_sve_whilele, INS_sve_whilels, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) + +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask16Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask32Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask64Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanMask8Bit, -1, 2, false, {INS_invalid, INS_sve_whilelt, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask16Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask32Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask64Bit, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(Sve, CreateWhileLessThanOrEqualMask8Bit, -1, 2, false, {INS_invalid, INS_sve_whilele, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMD, HW_Flag_Scalable|HW_Flag_SpecialCodeGen|HW_Flag_ReturnsPerElementMask) HARDWARE_INTRINSIC(Sve, LoadVector, -1, 2, true, {INS_sve_ld1b, INS_sve_ld1b, INS_sve_ld1h, INS_sve_ld1h, INS_sve_ld1w, INS_sve_ld1w, INS_sve_ld1d, INS_sve_ld1d, INS_sve_ld1w, INS_sve_ld1d}, HW_Category_MemoryLoad, HW_Flag_Scalable|HW_Flag_LowMaskedOperation)