diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 9a6749de5393ea..6f2aeef7d61b88 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -357,12 +357,6 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_JitRegisterFP, W("JitRegisterFP"), 3, "Control RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitELTHookEnabled, W("JitELTHookEnabled"), 0, "On ARM, setting this will emit Enter/Leave/TailCall callbacks") RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitMemStats, W("JitMemStats"), 0, "Display JIT memory usage statistics") RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitVNMapSelBudget, W("JitVNMapSelBudget"), 100, "Max # of MapSelect's considered for a particular top-level invocation.") -#if defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64) -#define EXTERNAL_FeatureSIMD_Default 1 -#else // !(defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64)) -#define EXTERNAL_FeatureSIMD_Default 0 -#endif // !(defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64)) -RETAIL_CONFIG_DWORD_INFO(INTERNAL_SIMD16ByteOnly, W("SIMD16ByteOnly"), 0, "Limit maximum SIMD vector length to 16 bytes (used by x64_arm64_altjit)") RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TrackDynamicMethodDebugInfo, W("TrackDynamicMethodDebugInfo"), 0, "Specifies whether debug info should be generated and tracked for dynamic methods") #ifdef FEATURE_MULTICOREJIT @@ -745,15 +739,17 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_GDBJitEmitDebugFrame, W("GDBJitEmitDebugFrame" #endif #endif +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_MaxVectorTBitWidth, W("MaxVectorTBitWidth"), 0, "The maximum width, in bits, that Vector is allowed to be. A value less than 128 is treated as the system default.") + // // Hardware Intrinsic ISAs; keep in sync with jitconfigvalues.h // #if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) //TODO: should implement LoongArch64's features. //TODO-RISCV64-CQ: should implement RISCV64's features. -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 0, "Allows Base+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 0, "Allows Base+ hardware intrinsics to be disabled") #else -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 1, "Allows Base+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 1, "Allows Base+ hardware intrinsics to be disabled") #endif // defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64) #if defined(TARGET_AMD64) || defined(TARGET_X86) diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index d19a6ad75b9c94..5aecf46bf3928c 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -14,7 +14,7 @@ enum CORINFO_InstructionSet { InstructionSet_ILLEGAL = 0, - InstructionSet_NONE = 63, + InstructionSet_NONE = 127, #ifdef TARGET_ARM64 InstructionSet_ArmBase=1, InstructionSet_AdvSimd=2, @@ -29,14 +29,15 @@ enum CORINFO_InstructionSet InstructionSet_Vector128=11, InstructionSet_Dczva=12, InstructionSet_Rcpc=13, - InstructionSet_ArmBase_Arm64=14, - InstructionSet_AdvSimd_Arm64=15, - InstructionSet_Aes_Arm64=16, - InstructionSet_Crc32_Arm64=17, - InstructionSet_Dp_Arm64=18, - InstructionSet_Rdm_Arm64=19, - InstructionSet_Sha1_Arm64=20, - InstructionSet_Sha256_Arm64=21, + InstructionSet_VectorT128=14, + InstructionSet_ArmBase_Arm64=15, + InstructionSet_AdvSimd_Arm64=16, + InstructionSet_Aes_Arm64=17, + InstructionSet_Crc32_Arm64=18, + InstructionSet_Dp_Arm64=19, + InstructionSet_Rdm_Arm64=20, + InstructionSet_Sha1_Arm64=21, + InstructionSet_Sha256_Arm64=22, #endif // TARGET_ARM64 #ifdef TARGET_AMD64 InstructionSet_X86Base=1, @@ -71,35 +72,38 @@ enum CORINFO_InstructionSet InstructionSet_AVX512DQ_VL=30, InstructionSet_AVX512VBMI=31, InstructionSet_AVX512VBMI_VL=32, - InstructionSet_X86Base_X64=33, - InstructionSet_SSE_X64=34, - InstructionSet_SSE2_X64=35, - InstructionSet_SSE3_X64=36, - InstructionSet_SSSE3_X64=37, - InstructionSet_SSE41_X64=38, - InstructionSet_SSE42_X64=39, - InstructionSet_AVX_X64=40, - InstructionSet_AVX2_X64=41, - InstructionSet_AES_X64=42, - InstructionSet_BMI1_X64=43, - InstructionSet_BMI2_X64=44, - InstructionSet_FMA_X64=45, - InstructionSet_LZCNT_X64=46, - InstructionSet_PCLMULQDQ_X64=47, - InstructionSet_POPCNT_X64=48, - InstructionSet_AVXVNNI_X64=49, - InstructionSet_MOVBE_X64=50, - InstructionSet_X86Serialize_X64=51, - InstructionSet_AVX512F_X64=52, - InstructionSet_AVX512F_VL_X64=53, - InstructionSet_AVX512BW_X64=54, - InstructionSet_AVX512BW_VL_X64=55, - InstructionSet_AVX512CD_X64=56, - InstructionSet_AVX512CD_VL_X64=57, - InstructionSet_AVX512DQ_X64=58, - InstructionSet_AVX512DQ_VL_X64=59, - InstructionSet_AVX512VBMI_X64=60, - InstructionSet_AVX512VBMI_VL_X64=61, + InstructionSet_VectorT128=33, + InstructionSet_VectorT256=34, + InstructionSet_VectorT512=35, + InstructionSet_X86Base_X64=36, + InstructionSet_SSE_X64=37, + InstructionSet_SSE2_X64=38, + InstructionSet_SSE3_X64=39, + InstructionSet_SSSE3_X64=40, + InstructionSet_SSE41_X64=41, + InstructionSet_SSE42_X64=42, + InstructionSet_AVX_X64=43, + InstructionSet_AVX2_X64=44, + InstructionSet_AES_X64=45, + InstructionSet_BMI1_X64=46, + InstructionSet_BMI2_X64=47, + InstructionSet_FMA_X64=48, + InstructionSet_LZCNT_X64=49, + InstructionSet_PCLMULQDQ_X64=50, + InstructionSet_POPCNT_X64=51, + InstructionSet_AVXVNNI_X64=52, + InstructionSet_MOVBE_X64=53, + InstructionSet_X86Serialize_X64=54, + InstructionSet_AVX512F_X64=55, + InstructionSet_AVX512F_VL_X64=56, + InstructionSet_AVX512BW_X64=57, + InstructionSet_AVX512BW_VL_X64=58, + InstructionSet_AVX512CD_X64=59, + InstructionSet_AVX512CD_VL_X64=60, + InstructionSet_AVX512DQ_X64=61, + InstructionSet_AVX512DQ_VL_X64=62, + InstructionSet_AVX512VBMI_X64=63, + InstructionSet_AVX512VBMI_VL_X64=64, #endif // TARGET_AMD64 #ifdef TARGET_X86 InstructionSet_X86Base=1, @@ -134,35 +138,38 @@ enum CORINFO_InstructionSet InstructionSet_AVX512DQ_VL=30, InstructionSet_AVX512VBMI=31, InstructionSet_AVX512VBMI_VL=32, - InstructionSet_X86Base_X64=33, - InstructionSet_SSE_X64=34, - InstructionSet_SSE2_X64=35, - InstructionSet_SSE3_X64=36, - InstructionSet_SSSE3_X64=37, - InstructionSet_SSE41_X64=38, - InstructionSet_SSE42_X64=39, - InstructionSet_AVX_X64=40, - InstructionSet_AVX2_X64=41, - InstructionSet_AES_X64=42, - InstructionSet_BMI1_X64=43, - InstructionSet_BMI2_X64=44, - InstructionSet_FMA_X64=45, - InstructionSet_LZCNT_X64=46, - InstructionSet_PCLMULQDQ_X64=47, - InstructionSet_POPCNT_X64=48, - InstructionSet_AVXVNNI_X64=49, - InstructionSet_MOVBE_X64=50, - InstructionSet_X86Serialize_X64=51, - InstructionSet_AVX512F_X64=52, - InstructionSet_AVX512F_VL_X64=53, - InstructionSet_AVX512BW_X64=54, - InstructionSet_AVX512BW_VL_X64=55, - InstructionSet_AVX512CD_X64=56, - InstructionSet_AVX512CD_VL_X64=57, - InstructionSet_AVX512DQ_X64=58, - InstructionSet_AVX512DQ_VL_X64=59, - InstructionSet_AVX512VBMI_X64=60, - InstructionSet_AVX512VBMI_VL_X64=61, + InstructionSet_VectorT128=33, + InstructionSet_VectorT256=34, + InstructionSet_VectorT512=35, + InstructionSet_X86Base_X64=36, + InstructionSet_SSE_X64=37, + InstructionSet_SSE2_X64=38, + InstructionSet_SSE3_X64=39, + InstructionSet_SSSE3_X64=40, + InstructionSet_SSE41_X64=41, + InstructionSet_SSE42_X64=42, + InstructionSet_AVX_X64=43, + InstructionSet_AVX2_X64=44, + InstructionSet_AES_X64=45, + InstructionSet_BMI1_X64=46, + InstructionSet_BMI2_X64=47, + InstructionSet_FMA_X64=48, + InstructionSet_LZCNT_X64=49, + InstructionSet_PCLMULQDQ_X64=50, + InstructionSet_POPCNT_X64=51, + InstructionSet_AVXVNNI_X64=52, + InstructionSet_MOVBE_X64=53, + InstructionSet_X86Serialize_X64=54, + InstructionSet_AVX512F_X64=55, + InstructionSet_AVX512F_VL_X64=56, + InstructionSet_AVX512BW_X64=57, + InstructionSet_AVX512BW_VL_X64=58, + InstructionSet_AVX512CD_X64=59, + InstructionSet_AVX512CD_VL_X64=60, + InstructionSet_AVX512DQ_X64=61, + InstructionSet_AVX512DQ_VL_X64=62, + InstructionSet_AVX512VBMI_X64=63, + InstructionSet_AVX512VBMI_VL_X64=64, #endif // TARGET_X86 }; @@ -170,7 +177,7 @@ enum CORINFO_InstructionSet struct CORINFO_InstructionSetFlags { private: - static const int32_t FlagsFieldCount = 1; + static const int32_t FlagsFieldCount = 2; static const int32_t BitsPerFlagsField = sizeof(uint64_t) * 8; uint64_t _flags[FlagsFieldCount] = { }; @@ -404,6 +411,8 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_Vector64); if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_AdvSimd)) resultflags.RemoveInstructionSet(InstructionSet_Vector128); + if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_AdvSimd)) + resultflags.RemoveInstructionSet(InstructionSet_VectorT128); #endif // TARGET_ARM64 #ifdef TARGET_AMD64 if (resultflags.HasInstructionSet(InstructionSet_X86Base) && !resultflags.HasInstructionSet(InstructionSet_X86Base_X64)) @@ -594,6 +603,12 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); + if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + resultflags.RemoveInstructionSet(InstructionSet_VectorT128); + if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_VectorT256); + if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + resultflags.RemoveInstructionSet(InstructionSet_VectorT512); if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX512F); if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL)) @@ -674,6 +689,12 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); + if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + resultflags.RemoveInstructionSet(InstructionSet_VectorT128); + if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_VectorT256); + if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + resultflags.RemoveInstructionSet(InstructionSet_VectorT512); if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX512F); if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL)) @@ -738,6 +759,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "Dczva"; case InstructionSet_Rcpc : return "Rcpc"; + case InstructionSet_VectorT128 : + return "VectorT128"; #endif // TARGET_ARM64 #ifdef TARGET_AMD64 case InstructionSet_X86Base : @@ -862,6 +885,12 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "AVX512VBMI_VL"; case InstructionSet_AVX512VBMI_VL_X64 : return "AVX512VBMI_VL_X64"; + case InstructionSet_VectorT128 : + return "VectorT128"; + case InstructionSet_VectorT256 : + return "VectorT256"; + case InstructionSet_VectorT512 : + return "VectorT512"; #endif // TARGET_AMD64 #ifdef TARGET_X86 case InstructionSet_X86Base : @@ -928,6 +957,12 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet) return "AVX512VBMI"; case InstructionSet_AVX512VBMI_VL : return "AVX512VBMI_VL"; + case InstructionSet_VectorT128 : + return "VectorT128"; + case InstructionSet_VectorT256 : + return "VectorT256"; + case InstructionSet_VectorT512 : + return "VectorT512"; #endif // TARGET_X86 default: @@ -958,6 +993,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Sha256: return InstructionSet_Sha256; case READYTORUN_INSTRUCTION_Atomics: return InstructionSet_Atomics; case READYTORUN_INSTRUCTION_Rcpc: return InstructionSet_Rcpc; + case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; #endif // TARGET_ARM64 #ifdef TARGET_AMD64 case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base; @@ -989,6 +1025,9 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512DQ_VL; case READYTORUN_INSTRUCTION_Avx512Vbmi: return InstructionSet_AVX512VBMI; case READYTORUN_INSTRUCTION_Avx512Vbmi_VL: return InstructionSet_AVX512VBMI_VL; + case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; + case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; + case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; #endif // TARGET_AMD64 #ifdef TARGET_X86 case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base; @@ -1020,6 +1059,9 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512DQ_VL; case READYTORUN_INSTRUCTION_Avx512Vbmi: return InstructionSet_AVX512VBMI; case READYTORUN_INSTRUCTION_Avx512Vbmi_VL: return InstructionSet_AVX512VBMI_VL; + case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128; + case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256; + case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512; #endif // TARGET_X86 default: diff --git a/src/coreclr/inc/jiteeversionguid.h b/src/coreclr/inc/jiteeversionguid.h index 9bb39b00de8d7d..ebcf4919cb6afb 100644 --- a/src/coreclr/inc/jiteeversionguid.h +++ b/src/coreclr/inc/jiteeversionguid.h @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID; #define GUID_DEFINED #endif // !GUID_DEFINED -constexpr GUID JITEEVersionIdentifier = { /* d4414be1-70e4-46ac-8866-ca3a6c2f8422 */ - 0xd4414be1, - 0x70e4, - 0x46ac, - {0x88, 0x66, 0xca, 0x3a, 0x6c, 0x2f, 0x84, 0x22} +constexpr GUID JITEEVersionIdentifier = { /* fda2f9dd-6b3e-4ecd-a7b8-79e5edf1f072 */ + 0xfda2f9dd, + 0x6b3e, + 0x4ecd, + {0xa7, 0xb8, 0x79, 0xe5, 0xed, 0xf1, 0xf0, 0x72} }; ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/src/coreclr/inc/readytoruninstructionset.h b/src/coreclr/inc/readytoruninstructionset.h index faf4a5028cc1c9..0a9a78e03f6c15 100644 --- a/src/coreclr/inc/readytoruninstructionset.h +++ b/src/coreclr/inc/readytoruninstructionset.h @@ -47,6 +47,9 @@ enum ReadyToRunInstructionSet READYTORUN_INSTRUCTION_Avx512DQ_VL=36, READYTORUN_INSTRUCTION_Avx512Vbmi=37, READYTORUN_INSTRUCTION_Avx512Vbmi_VL=38, + READYTORUN_INSTRUCTION_VectorT128=39, + READYTORUN_INSTRUCTION_VectorT256=40, + READYTORUN_INSTRUCTION_VectorT512=41, }; diff --git a/src/coreclr/jit/codegenxarch.cpp b/src/coreclr/jit/codegenxarch.cpp index 3d6291e7c11c36..6d7a973ad07520 100644 --- a/src/coreclr/jit/codegenxarch.cpp +++ b/src/coreclr/jit/codegenxarch.cpp @@ -3401,15 +3401,15 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node) assert(srcOffset < (INT32_MAX - static_cast(size))); assert(dstOffset < (INT32_MAX - static_cast(size))); - if (size >= XMM_REGSIZE_BYTES) + // Get the largest SIMD register available if the size is large enough + unsigned regSize = compiler->roundDownSIMDSize(size); + + if ((size >= regSize) && (regSize > 0)) { regNumber tempReg = node->GetSingleTempReg(RBM_ALLFLOAT); instruction simdMov = simdUnalignedMovIns(); - // Get the largest SIMD register available if the size is large enough - unsigned regSize = compiler->roundDownSIMDSize(size); - auto emitSimdMovs = [&]() { if (srcLclNum != BAD_VAR_NUM) { diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 6ba5e070f19254..65379c6c8240fb 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -8662,17 +8662,31 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX CLANG_FORMAT_COMMENT_ANCHOR; #if defined(TARGET_XARCH) - if (compExactlyDependsOn(InstructionSet_AVX2)) + // TODO-XArch: Add support for 512-bit Vector + assert(!compIsaSupportedDebugOnly(InstructionSet_VectorT512)); + + if (compExactlyDependsOn(InstructionSet_VectorT256)) { - // TODO-XArch-AVX512 : Return ZMM_REGSIZE_BYTES once Vector supports AVX512. + assert(!compIsaSupportedDebugOnly(InstructionSet_VectorT128)); return YMM_REGSIZE_BYTES; } - else + else if (compExactlyDependsOn(InstructionSet_VectorT128)) { return XMM_REGSIZE_BYTES; } + else + { + return 0; + } #elif defined(TARGET_ARM64) - return FP_REGSIZE_BYTES; + if (compExactlyDependsOn(InstructionSet_VectorT128)) + { + return FP_REGSIZE_BYTES; + } + else + { + return 0; + } #else assert(!"getVectorTByteLength() unimplemented on target arch"); unreached(); @@ -8691,23 +8705,33 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX uint32_t getMaxVectorByteLength() const { #if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) - if (compOpportunisticallyDependsOn(InstructionSet_AVX)) + if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) { - if (compOpportunisticallyDependsOn(InstructionSet_AVX512F)) - { - return ZMM_REGSIZE_BYTES; - } - else - { - return YMM_REGSIZE_BYTES; - } + return ZMM_REGSIZE_BYTES; } - else + else if (compOpportunisticallyDependsOn(InstructionSet_AVX)) + { + return YMM_REGSIZE_BYTES; + } + else if (compOpportunisticallyDependsOn(InstructionSet_SSE)) { return XMM_REGSIZE_BYTES; } + else + { + assert((JitConfig.EnableHWIntrinsic() == 0) || (JitConfig.EnableSSE() == 0)); + return 0; + } #elif defined(TARGET_ARM64) - return FP_REGSIZE_BYTES; + if (compOpportunisticallyDependsOn(InstructionSet_AdvSimd)) + { + return FP_REGSIZE_BYTES; + } + else + { + assert((JitConfig.EnableHWIntrinsic() == 0) || (JitConfig.EnableArm64AdvSimd() == 0)); + return 0; + } #else assert(!"getMaxVectorByteLength() unimplemented on target arch"); unreached(); diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 379509a17180e6..9c38e454126ad8 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -783,10 +783,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, return impSpecialIntrinsic(NI_Vector128_ToVector256, clsHnd, method, sig, simdBaseJitType, retType, simdSize); } - else + else if (vectorTByteLength == XMM_REGSIZE_BYTES) { - assert(vectorTByteLength == XMM_REGSIZE_BYTES); - // We fold away the cast here, as it only exists to satisfy // the type system. It is safe to do this here since the retNode type // and the signature return type are both the same TYP_SIMD. @@ -795,6 +793,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, SetOpLclRelatedToSIMDIntrinsic(retNode); assert(retNode->gtType == getSIMDTypeForSize(getSIMDTypeSizeInBytes(sig->retTypeSigClass))); } + else + { + assert(vectorTByteLength == 0); + } break; } @@ -919,10 +921,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, break; } - else + else if (vectorTByteLength == XMM_REGSIZE_BYTES) { - assert(vectorTByteLength == XMM_REGSIZE_BYTES); - if (compExactlyDependsOn(InstructionSet_AVX)) { // We support Vector256 but Vector is only 16-bytes, so we should @@ -941,6 +941,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } } } + else + { + assert(vectorTByteLength == 0); + } break; } @@ -969,10 +973,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } break; } - else + else if (vectorTByteLength == XMM_REGSIZE_BYTES) { - assert(vectorTByteLength == XMM_REGSIZE_BYTES); - if (compExactlyDependsOn(InstructionSet_AVX512F)) { // We support Vector512 but Vector is only 16-bytes, so we should @@ -991,6 +993,10 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, } } } + else + { + assert(vectorTByteLength == 0); + } break; } diff --git a/src/coreclr/jit/lclvars.cpp b/src/coreclr/jit/lclvars.cpp index ff91e429c321eb..80d9493daca427 100644 --- a/src/coreclr/jit/lclvars.cpp +++ b/src/coreclr/jit/lclvars.cpp @@ -1757,7 +1757,8 @@ bool Compiler::StructPromotionHelper::CanPromoteStructType(CORINFO_CLASS_HANDLE #if defined(FEATURE_SIMD) // getMaxVectorByteLength() represents the size of the largest primitive type that we can struct promote. - const unsigned maxSize = MAX_NumOfFieldsInPromotableStruct * compiler->getMaxVectorByteLength(); + const unsigned maxSize = + MAX_NumOfFieldsInPromotableStruct * max(compiler->getMaxVectorByteLength(), sizeof(double)); #else // !FEATURE_SIMD // sizeof(double) represents the size of the largest primitive type that we can struct promote. const unsigned maxSize = MAX_NumOfFieldsInPromotableStruct * sizeof(double); diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index 765de402b90e23..08ae13c6cff2f3 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -1166,7 +1166,7 @@ int LinearScan::BuildCall(GenTreeCall* call) // The return value will be on the X87 stack, and we will need to move it. dstCandidates = allRegs(registerType); #else // !TARGET_X86 - dstCandidates = RBM_FLOATRET; + dstCandidates = RBM_FLOATRET; #endif // !TARGET_X86 } else @@ -1378,12 +1378,10 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) { case GenTreeBlk::BlkOpKindUnroll: { -#ifdef TARGET_AMD64 - const bool canUse16BytesSimdMov = !blkNode->IsOnHeapAndContainsReferences(); - const bool willUseSimdMov = canUse16BytesSimdMov && (size >= 16); -#else - const bool willUseSimdMov = (size >= 16); -#endif + const bool canUse16BytesSimdMov = + !blkNode->IsOnHeapAndContainsReferences() && compiler->IsBaselineSimdIsaSupported(); + const bool willUseSimdMov = canUse16BytesSimdMov && (size >= XMM_REGSIZE_BYTES); + if (willUseSimdMov) { buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates()); @@ -1440,8 +1438,26 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) break; case GenTreeBlk::BlkOpKindUnroll: - if ((size % XMM_REGSIZE_BYTES) != 0) + { + unsigned regSize = compiler->roundDownSIMDSize(size); + unsigned remainder = size; + + if ((size >= regSize) && (regSize > 0)) + { + // We need a float temporary if we're doing SIMD operations + + buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates()); + SetContainsAVXFlags(size); + + remainder %= regSize; + } + + if ((remainder > 0) && ((regSize == 0) || (isPow2(remainder) && (remainder <= REGSIZE_BYTES)))) { + // We need an int temporary if we're not doing SIMD operations + // or if are but the remainder is a power of 2 and less than the + // size of a register + regMaskTP regMask = availableIntRegs; #ifdef TARGET_X86 if ((size & 1) != 0) @@ -1453,13 +1469,8 @@ int LinearScan::BuildBlockStore(GenTreeBlk* blkNode) #endif internalIntDef = buildInternalIntRegisterDefForNode(blkNode, regMask); } - - if (size >= XMM_REGSIZE_BYTES) - { - buildInternalFloatRegisterDefForNode(blkNode, internalFloatRegCandidates()); - SetContainsAVXFlags(size); - } break; + } case GenTreeBlk::BlkOpKindUnrollMemmove: { diff --git a/src/coreclr/jit/simd.cpp b/src/coreclr/jit/simd.cpp index 97b7d06e51ebd0..2e10faae2f0188 100644 --- a/src/coreclr/jit/simd.cpp +++ b/src/coreclr/jit/simd.cpp @@ -299,7 +299,10 @@ CorInfoType Compiler::getBaseJitTypeAndSizeOfSIMDType(CORINFO_CLASS_HANDLE typeH JITDUMP(" Found Vector<%s>\n", varTypeName(JitType2PreciseVarType(simdBaseJitType))); size = getVectorTByteLength(); - assert(size != 0); + if (size == 0) + { + return CORINFO_TYPE_UNDEF; + } break; } diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp index 0cb64b37428e37..7f71f1c62ecf8b 100644 --- a/src/coreclr/jit/simdashwintrinsic.cpp +++ b/src/coreclr/jit/simdashwintrinsic.cpp @@ -219,8 +219,14 @@ SimdAsHWIntrinsicClassId SimdAsHWIntrinsicInfo::lookupClassId(Compiler* comp, } #endif // TARGET_XARCH - assert(vectorTByteLength == 16); - return SimdAsHWIntrinsicClassId::VectorT128; + if (vectorTByteLength == 16) + { + return SimdAsHWIntrinsicClassId::VectorT128; + } + else + { + return SimdAsHWIntrinsicClassId::Unknown; + } } break; } diff --git a/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.targets b/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.targets index e30405481f2ee1..e11fac5806808d 100644 --- a/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.targets +++ b/src/coreclr/nativeaot/BuildIntegration/Microsoft.NETCore.Native.targets @@ -229,6 +229,7 @@ The .NET Foundation licenses this file to you under the MIT license. + diff --git a/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h b/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h index ad7d2e11ee69ac..41ec8dec9c3d02 100644 --- a/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h +++ b/src/coreclr/nativeaot/Runtime/IntrinsicConstants.h @@ -35,6 +35,9 @@ enum XArchIntrinsicConstants XArchIntrinsicConstants_Avx512Vbmi = 0x800000, XArchIntrinsicConstants_Avx512Vbmi_vl = 0x1000000, XArchIntrinsicConstants_Serialize = 0x2000000, + XArchIntrinsicConstants_VectorT128 = 0x4000000, + XArchIntrinsicConstants_VectorT256 = 0x8000000, + XArchIntrinsicConstants_VectorT512 = 0x10000000, }; #endif //HOST_X86 || HOST_AMD64 @@ -50,6 +53,7 @@ enum ARM64IntrinsicConstants ARM64IntrinsicConstants_Sha256 = 0x0040, ARM64IntrinsicConstants_Atomics = 0x0080, ARM64IntrinsicConstants_Rcpc = 0x0100, + ARM64IntrinsicConstants_VectorT128 = 0x0200, }; // Bit position for the ARM64IntrinsicConstants_Atomics flags, to be used with tbz / tbnz instructions diff --git a/src/coreclr/nativeaot/Runtime/startup.cpp b/src/coreclr/nativeaot/Runtime/startup.cpp index 7ce983600df46d..58740157c49609 100644 --- a/src/coreclr/nativeaot/Runtime/startup.cpp +++ b/src/coreclr/nativeaot/Runtime/startup.cpp @@ -202,6 +202,8 @@ bool DetectCPUFeatures() if ((cpuidInfo[CPUID_EDX] & requiredBaselineEdxFlags) == requiredBaselineEdxFlags) { + g_cpuFeatures |= XArchIntrinsicConstants_VectorT128; + if ((cpuidInfo[CPUID_ECX] & (1 << 25)) != 0) // AESNI { g_cpuFeatures |= XArchIntrinsicConstants_Aes; @@ -259,12 +261,14 @@ bool DetectCPUFeatures() if ((cpuidInfo[CPUID_EBX] & (1 << 5)) != 0) // AVX2 { g_cpuFeatures |= XArchIntrinsicConstants_Avx2; + g_cpuFeatures |= XArchIntrinsicConstants_VectorT256; if (PalIsAvx512Enabled() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111 { if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) // AVX512F { g_cpuFeatures |= XArchIntrinsicConstants_Avx512f; + g_cpuFeatures |= XArchIntrinsicConstants_VectorT512; bool isAVX512_VLSupported = false; if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0) // AVX512VL diff --git a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp index a856be48f4ab8c..69ee3da64a36d1 100644 --- a/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp +++ b/src/coreclr/nativeaot/Runtime/unix/PalRedhawkUnix.cpp @@ -1446,7 +1446,7 @@ REDHAWK_PALEXPORT void REDHAWK_PALAPI PAL_GetCpuCapabilityFlags(int* flags) #endif #ifdef HWCAP_ASIMD if (hwCap & HWCAP_ASIMD) - *flags |= ARM64IntrinsicConstants_AdvSimd; + *flags |= ARM64IntrinsicConstants_AdvSimd | ARM64IntrinsicConstants_VectorT128; #endif #ifdef HWCAP_ASIMDRDM if (hwCap & HWCAP_ASIMDRDM) @@ -1545,7 +1545,7 @@ REDHAWK_PALEXPORT void REDHAWK_PALAPI PAL_GetCpuCapabilityFlags(int* flags) // Every ARM64 CPU should support SIMD and FP // If the OS have no function to query for CPU capabilities we set just these - *flags |= ARM64IntrinsicConstants_AdvSimd; + *flags |= ARM64IntrinsicConstants_AdvSimd | ARM64IntrinsicConstants_VectorT128; #endif // HAVE_AUXV_HWCAP_H } #endif diff --git a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp index c7b1f3e313fa39..0c591bd8a89860 100644 --- a/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp +++ b/src/coreclr/nativeaot/Runtime/windows/PalRedhawkMinWin.cpp @@ -796,7 +796,7 @@ REDHAWK_PALIMPORT void REDHAWK_PALAPI PAL_GetCpuCapabilityFlags(int* flags) #endif // FP and SIMD support are enabled by default - *flags |= ARM64IntrinsicConstants_AdvSimd; + *flags |= ARM64IntrinsicConstants_AdvSimd | ARM64IntrinsicConstants_VectorT128; if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) { diff --git a/src/coreclr/nativeaot/docs/optimizing.md b/src/coreclr/nativeaot/docs/optimizing.md index 9f2c9ae70d12a8..9a90eb97c21286 100644 --- a/src/coreclr/nativeaot/docs/optimizing.md +++ b/src/coreclr/nativeaot/docs/optimizing.md @@ -40,4 +40,5 @@ Since `PublishTrimmed` is implied to be true with Native AOT, some framework fea * `Speed`: when generating optimized code, favor code execution speed. * `Size`: when generating optimized code, favor smaller code size. * ``: By default, the compiler targets the minimum instruction set supported by the target OS and architecture. This option allows targeting newer instruction sets for better performance. The native binary will require the instruction sets to be supported by the hardware in order to run. For example, `avx2,bmi2,fma,pclmul,popcnt,aes` will produce binary that takes advantage of instruction sets that are typically present on current Intel and AMD processors. Run `ilc --help` for the full list of available instruction sets. `ilc` can be executed from the NativeAOT package in your local nuget cache e.g. `%USERPROFILE%\.nuget\packages\runtime.win-x64.microsoft.dotnet.ilcompiler\8.0.0-...\tools\ilc.exe` on Windows or `~/.nuget/packages/runtime.linux-arm64.microsoft.dotnet.ilcompiler/8.0.0-.../tools/ilc` on Linux. +* ``: By default, the compiler targets the a `Vector` size of `16` or `32` bytes, depending on the underlying instruction sets supported. This option allows specifying a different maximum bit width. For example, if by default on x64 hardware `Vector` will be 16-bytes. However, if `AVX2` is targeted then `Vector` will automatically grow to be 32-bytes instead, setting `128` would keep the size as 16-bytes. Alternatively, even if `AVX512F` is targeted then by default `Vector` will not grow larger than 32-bytes, setting `512` would allow it to grow to 64-bytes. diff --git a/src/coreclr/pal/src/misc/jitsupport.cpp b/src/coreclr/pal/src/misc/jitsupport.cpp index f7ca5c36e71217..30426290043514 100644 --- a/src/coreclr/pal/src/misc/jitsupport.cpp +++ b/src/coreclr/pal/src/misc/jitsupport.cpp @@ -235,7 +235,10 @@ PAL_GetJitCpuCapabilityFlags(CORJIT_FLAGS *flags) #endif #ifdef HWCAP_ASIMD if (hwCap & HWCAP_ASIMD) + { flags->Set(InstructionSet_AdvSimd); + flags->Set(InstructionSet_VectorT128); + } #endif #ifdef HWCAP_ASIMDRDM if (hwCap & HWCAP_ASIMDRDM) @@ -292,6 +295,7 @@ PAL_GetJitCpuCapabilityFlags(CORJIT_FLAGS *flags) // Set baseline flags if OS has not exposed mechanism for us to determine CPU capabilities flags->Set(InstructionSet_ArmBase); flags->Set(InstructionSet_AdvSimd); + flags->Set(InstructionSet_VectorT128); // flags->Set(CORJIT_FLAGS::CORJIT_FLAG_HAS_ARM64_FP); #endif // HAVE_AUXV_HWCAP_H } diff --git a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs index 6d124bda4673c6..46528387fec8ca 100644 --- a/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs +++ b/src/coreclr/tools/Common/Compiler/InstructionSetSupport.cs @@ -96,18 +96,37 @@ public SimdVectorLength GetVectorTSimdVector() { if ((_targetArchitecture == TargetArchitecture.X64) || (_targetArchitecture == TargetArchitecture.X86)) { - Debug.Assert(InstructionSet.X64_AVX2 == InstructionSet.X86_AVX2); - Debug.Assert(InstructionSet.X64_SSE2 == InstructionSet.X86_SSE2); - if (IsInstructionSetSupported(InstructionSet.X86_AVX2)) + Debug.Assert(InstructionSet.X64_VectorT128 == InstructionSet.X86_VectorT128); + Debug.Assert(InstructionSet.X64_VectorT256 == InstructionSet.X86_VectorT256); + Debug.Assert(InstructionSet.X64_VectorT512 == InstructionSet.X86_VectorT512); + + // TODO-XArch: Add support for 512-bit Vector + Debug.Assert(!IsInstructionSetSupported(InstructionSet.X64_VectorT512)); + + if (IsInstructionSetSupported(InstructionSet.X64_VectorT256)) + { + Debug.Assert(!IsInstructionSetSupported(InstructionSet.X64_VectorT128)); return SimdVectorLength.Vector256Bit; - else if (IsInstructionSetExplicitlyUnsupported(InstructionSet.X86_AVX2) && IsInstructionSetSupported(InstructionSet.X64_SSE2)) + } + else if (IsInstructionSetSupported(InstructionSet.X64_VectorT128)) + { return SimdVectorLength.Vector128Bit; + } else + { return SimdVectorLength.None; + } } else if (_targetArchitecture == TargetArchitecture.ARM64) { - return SimdVectorLength.Vector128Bit; + if (IsInstructionSetSupported(InstructionSet.ARM64_VectorT128)) + { + return SimdVectorLength.Vector128Bit; + } + else + { + return SimdVectorLength.None; + } } else if (_targetArchitecture == TargetArchitecture.ARM) { @@ -183,15 +202,24 @@ public static InstructionSetFlags GetNonSpecifiableInstructionSetsForArch(Target return s_nonSpecifiableInstructionSets[architecture]; } - private readonly SortedSet _supportedInstructionSets = new SortedSet(); - private readonly SortedSet _unsupportedInstructionSets = new SortedSet(); + private readonly SortedSet _supportedInstructionSets; + private readonly SortedSet _unsupportedInstructionSets; private readonly TargetArchitecture _architecture; public InstructionSetSupportBuilder(TargetArchitecture architecture) { + _supportedInstructionSets = new SortedSet(); + _unsupportedInstructionSets = new SortedSet(); _architecture = architecture; } + public InstructionSetSupportBuilder(InstructionSetSupportBuilder other) + { + _supportedInstructionSets = new SortedSet(other._supportedInstructionSets); + _unsupportedInstructionSets = new SortedSet(other._unsupportedInstructionSets); + _architecture = other._architecture; + } + /// /// Add a supported instruction set to the specified list. /// @@ -245,9 +273,10 @@ public bool RemoveInstructionSetSupport(string instructionSet) /// Seal modifications to instruction set support /// /// returns "false" if instruction set isn't valid on this architecture - public bool ComputeInstructionSetFlags(out InstructionSetFlags supportedInstructionSets, - out InstructionSetFlags unsupportedInstructionSets, - Action invalidInstructionSetImplication) + public bool ComputeInstructionSetFlags(int maxVectorTBitWidth, + out InstructionSetFlags supportedInstructionSets, + out InstructionSetFlags unsupportedInstructionSets, + Action invalidInstructionSetImplication) { supportedInstructionSets = new InstructionSetFlags(); unsupportedInstructionSets = new InstructionSetFlags(); @@ -288,6 +317,51 @@ public bool ComputeInstructionSetFlags(out InstructionSetFlags supportedInstruct } } + switch (_architecture) + { + case TargetArchitecture.X64: + case TargetArchitecture.X86: + { + Debug.Assert(InstructionSet.X86_SSE2 == InstructionSet.X64_SSE2); + Debug.Assert(InstructionSet.X86_AVX2 == InstructionSet.X64_AVX2); + Debug.Assert(InstructionSet.X86_AVX512F == InstructionSet.X64_AVX512F); + + Debug.Assert(InstructionSet.X86_VectorT128 == InstructionSet.X64_VectorT128); + Debug.Assert(InstructionSet.X86_VectorT256 == InstructionSet.X64_VectorT256); + Debug.Assert(InstructionSet.X86_VectorT512 == InstructionSet.X64_VectorT512); + + // We only want one size supported for Vector and we want the other sizes explicitly + // unsupported to ensure we throw away the given methods if runtime picks a larger size + + Debug.Assert(supportedInstructionSets.HasInstructionSet(InstructionSet.X86_SSE2)); + Debug.Assert((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 128)); + supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT128); + + if (supportedInstructionSets.HasInstructionSet(InstructionSet.X86_AVX2)) + { + if ((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 256)) + { + supportedInstructionSets.RemoveInstructionSet(InstructionSet.X86_VectorT128); + supportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT256); + + unsupportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT128); + unsupportedInstructionSets.AddInstructionSet(InstructionSet.X86_VectorT512); + } + + // TODO-XArch: Add support for 512-bit Vector + } + break; + } + + case TargetArchitecture.ARM64: + { + Debug.Assert(supportedInstructionSets.HasInstructionSet(InstructionSet.ARM64_AdvSimd)); + Debug.Assert((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 128)); + supportedInstructionSets.AddInstructionSet(InstructionSet.ARM64_VectorT128); + break; + } + } + return true; } } diff --git a/src/coreclr/tools/Common/InstructionSetHelpers.cs b/src/coreclr/tools/Common/InstructionSetHelpers.cs index 93b04a1f7dbdd1..c969927e1531f7 100644 --- a/src/coreclr/tools/Common/InstructionSetHelpers.cs +++ b/src/coreclr/tools/Common/InstructionSetHelpers.cs @@ -11,7 +11,7 @@ namespace System.CommandLine { internal static partial class Helpers { - public static InstructionSetSupport ConfigureInstructionSetSupport(string instructionSet, TargetArchitecture targetArchitecture, TargetOS targetOS, + public static InstructionSetSupport ConfigureInstructionSetSupport(string instructionSet, int maxVectorTBitWidth, TargetArchitecture targetArchitecture, TargetOS targetOS, string mustNotBeMessage, string invalidImplicationMessage) { InstructionSetSupportBuilder instructionSetSupportBuilder = new(targetArchitecture); @@ -74,11 +74,16 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru } } - instructionSetSupportBuilder.ComputeInstructionSetFlags(out var supportedInstructionSet, out var unsupportedInstructionSet, + instructionSetSupportBuilder.ComputeInstructionSetFlags(maxVectorTBitWidth, out var supportedInstructionSet, out var unsupportedInstructionSet, (string specifiedInstructionSet, string impliedInstructionSet) => throw new CommandLineException(string.Format(invalidImplicationMessage, specifiedInstructionSet, impliedInstructionSet))); - InstructionSetSupportBuilder optimisticInstructionSetSupportBuilder = new InstructionSetSupportBuilder(targetArchitecture); + // Due to expansion by implication, the optimistic set is most often a pure superset of the supported set + // + // However, there are some gaps in cases like Arm64 neon where none of the optimistic sets imply it. Likewise, + // the optimistic set would be missing the explicitly unsupported sets. So we effectively clone the list and + // tack on the additional optimistic bits after. This ensures the optimistic set remains an accurate superset + InstructionSetSupportBuilder optimisticInstructionSetSupportBuilder = new InstructionSetSupportBuilder(instructionSetSupportBuilder); // Optimistically assume some instruction sets are present. if (targetArchitecture == TargetArchitecture.X86 || targetArchitecture == TargetArchitecture.X64) @@ -112,10 +117,6 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("bmi2"); } - if (supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX2)) - { - } - Debug.Assert(InstructionSet.X64_AVX512F == InstructionSet.X86_AVX512F); if (supportedInstructionSet.HasInstructionSet(InstructionSet.X64_AVX512F)) { @@ -143,7 +144,7 @@ public static InstructionSetSupport ConfigureInstructionSetSupport(string instru optimisticInstructionSetSupportBuilder.AddSupportedInstructionSet("rcpc"); } - optimisticInstructionSetSupportBuilder.ComputeInstructionSetFlags(out var optimisticInstructionSet, out _, + optimisticInstructionSetSupportBuilder.ComputeInstructionSetFlags(maxVectorTBitWidth, out var optimisticInstructionSet, out _, (string specifiedInstructionSet, string impliedInstructionSet) => throw new NotSupportedException()); optimisticInstructionSet.Remove(unsupportedInstructionSet); optimisticInstructionSet.Add(supportedInstructionSet); diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs index 0068f0be007764..32b60ecbcda7da 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs @@ -50,6 +50,9 @@ public enum ReadyToRunInstructionSet Avx512DQ_VL=36, Avx512Vbmi=37, Avx512Vbmi_VL=38, + VectorT128=39, + VectorT256=40, + VectorT512=41, } } diff --git a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs index b387eedd89f2b7..f593808be32989 100644 --- a/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs +++ b/src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs @@ -44,6 +44,7 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.ARM64_Vector128: return null; case InstructionSet.ARM64_Dczva: return null; case InstructionSet.ARM64_Rcpc: return ReadyToRunInstructionSet.Rcpc; + case InstructionSet.ARM64_VectorT128: return ReadyToRunInstructionSet.VectorT128; default: throw new Exception("Unknown instruction set"); } @@ -114,6 +115,9 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X64_AVX512VBMI_X64: return ReadyToRunInstructionSet.Avx512Vbmi; case InstructionSet.X64_AVX512VBMI_VL: return ReadyToRunInstructionSet.Avx512Vbmi_VL; case InstructionSet.X64_AVX512VBMI_VL_X64: return ReadyToRunInstructionSet.Avx512Vbmi_VL; + case InstructionSet.X64_VectorT128: return ReadyToRunInstructionSet.VectorT128; + case InstructionSet.X64_VectorT256: return ReadyToRunInstructionSet.VectorT256; + case InstructionSet.X64_VectorT512: return ReadyToRunInstructionSet.VectorT512; default: throw new Exception("Unknown instruction set"); } @@ -184,6 +188,9 @@ public static class ReadyToRunInstructionSetHelper case InstructionSet.X86_AVX512VBMI_X64: return null; case InstructionSet.X86_AVX512VBMI_VL: return ReadyToRunInstructionSet.Avx512Vbmi_VL; case InstructionSet.X86_AVX512VBMI_VL_X64: return null; + case InstructionSet.X86_VectorT128: return ReadyToRunInstructionSet.VectorT128; + case InstructionSet.X86_VectorT256: return ReadyToRunInstructionSet.VectorT256; + case InstructionSet.X86_VectorT512: return ReadyToRunInstructionSet.VectorT512; default: throw new Exception("Unknown instruction set"); } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index 1557059b76ab67..f26abd68262391 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -15,7 +15,7 @@ namespace Internal.JitInterface public enum InstructionSet { ILLEGAL = 0, - NONE = 63, + NONE = 127, ARM64_ArmBase = InstructionSet_ARM64.ArmBase, ARM64_AdvSimd = InstructionSet_ARM64.AdvSimd, ARM64_Aes = InstructionSet_ARM64.Aes, @@ -29,6 +29,7 @@ public enum InstructionSet ARM64_Vector128 = InstructionSet_ARM64.Vector128, ARM64_Dczva = InstructionSet_ARM64.Dczva, ARM64_Rcpc = InstructionSet_ARM64.Rcpc, + ARM64_VectorT128 = InstructionSet_ARM64.VectorT128, ARM64_ArmBase_Arm64 = InstructionSet_ARM64.ArmBase_Arm64, ARM64_AdvSimd_Arm64 = InstructionSet_ARM64.AdvSimd_Arm64, ARM64_Aes_Arm64 = InstructionSet_ARM64.Aes_Arm64, @@ -69,6 +70,9 @@ public enum InstructionSet X64_AVX512DQ_VL = InstructionSet_X64.AVX512DQ_VL, X64_AVX512VBMI = InstructionSet_X64.AVX512VBMI, X64_AVX512VBMI_VL = InstructionSet_X64.AVX512VBMI_VL, + X64_VectorT128 = InstructionSet_X64.VectorT128, + X64_VectorT256 = InstructionSet_X64.VectorT256, + X64_VectorT512 = InstructionSet_X64.VectorT512, X64_X86Base_X64 = InstructionSet_X64.X86Base_X64, X64_SSE_X64 = InstructionSet_X64.SSE_X64, X64_SSE2_X64 = InstructionSet_X64.SSE2_X64, @@ -130,6 +134,9 @@ public enum InstructionSet X86_AVX512DQ_VL = InstructionSet_X86.AVX512DQ_VL, X86_AVX512VBMI = InstructionSet_X86.AVX512VBMI, X86_AVX512VBMI_VL = InstructionSet_X86.AVX512VBMI_VL, + X86_VectorT128 = InstructionSet_X86.VectorT128, + X86_VectorT256 = InstructionSet_X86.VectorT256, + X86_VectorT512 = InstructionSet_X86.VectorT512, X86_X86Base_X64 = InstructionSet_X86.X86Base_X64, X86_SSE_X64 = InstructionSet_X86.SSE_X64, X86_SSE2_X64 = InstructionSet_X86.SSE2_X64, @@ -177,14 +184,15 @@ public enum InstructionSet_ARM64 Vector128 = 11, Dczva = 12, Rcpc = 13, - ArmBase_Arm64 = 14, - AdvSimd_Arm64 = 15, - Aes_Arm64 = 16, - Crc32_Arm64 = 17, - Dp_Arm64 = 18, - Rdm_Arm64 = 19, - Sha1_Arm64 = 20, - Sha256_Arm64 = 21, + VectorT128 = 14, + ArmBase_Arm64 = 15, + AdvSimd_Arm64 = 16, + Aes_Arm64 = 17, + Crc32_Arm64 = 18, + Dp_Arm64 = 19, + Rdm_Arm64 = 20, + Sha1_Arm64 = 21, + Sha256_Arm64 = 22, } public enum InstructionSet_X64 @@ -223,35 +231,38 @@ public enum InstructionSet_X64 AVX512DQ_VL = 30, AVX512VBMI = 31, AVX512VBMI_VL = 32, - X86Base_X64 = 33, - SSE_X64 = 34, - SSE2_X64 = 35, - SSE3_X64 = 36, - SSSE3_X64 = 37, - SSE41_X64 = 38, - SSE42_X64 = 39, - AVX_X64 = 40, - AVX2_X64 = 41, - AES_X64 = 42, - BMI1_X64 = 43, - BMI2_X64 = 44, - FMA_X64 = 45, - LZCNT_X64 = 46, - PCLMULQDQ_X64 = 47, - POPCNT_X64 = 48, - AVXVNNI_X64 = 49, - MOVBE_X64 = 50, - X86Serialize_X64 = 51, - AVX512F_X64 = 52, - AVX512F_VL_X64 = 53, - AVX512BW_X64 = 54, - AVX512BW_VL_X64 = 55, - AVX512CD_X64 = 56, - AVX512CD_VL_X64 = 57, - AVX512DQ_X64 = 58, - AVX512DQ_VL_X64 = 59, - AVX512VBMI_X64 = 60, - AVX512VBMI_VL_X64 = 61, + VectorT128 = 33, + VectorT256 = 34, + VectorT512 = 35, + X86Base_X64 = 36, + SSE_X64 = 37, + SSE2_X64 = 38, + SSE3_X64 = 39, + SSSE3_X64 = 40, + SSE41_X64 = 41, + SSE42_X64 = 42, + AVX_X64 = 43, + AVX2_X64 = 44, + AES_X64 = 45, + BMI1_X64 = 46, + BMI2_X64 = 47, + FMA_X64 = 48, + LZCNT_X64 = 49, + PCLMULQDQ_X64 = 50, + POPCNT_X64 = 51, + AVXVNNI_X64 = 52, + MOVBE_X64 = 53, + X86Serialize_X64 = 54, + AVX512F_X64 = 55, + AVX512F_VL_X64 = 56, + AVX512BW_X64 = 57, + AVX512BW_VL_X64 = 58, + AVX512CD_X64 = 59, + AVX512CD_VL_X64 = 60, + AVX512DQ_X64 = 61, + AVX512DQ_VL_X64 = 62, + AVX512VBMI_X64 = 63, + AVX512VBMI_VL_X64 = 64, } public enum InstructionSet_X86 @@ -290,40 +301,43 @@ public enum InstructionSet_X86 AVX512DQ_VL = 30, AVX512VBMI = 31, AVX512VBMI_VL = 32, - X86Base_X64 = 33, - SSE_X64 = 34, - SSE2_X64 = 35, - SSE3_X64 = 36, - SSSE3_X64 = 37, - SSE41_X64 = 38, - SSE42_X64 = 39, - AVX_X64 = 40, - AVX2_X64 = 41, - AES_X64 = 42, - BMI1_X64 = 43, - BMI2_X64 = 44, - FMA_X64 = 45, - LZCNT_X64 = 46, - PCLMULQDQ_X64 = 47, - POPCNT_X64 = 48, - AVXVNNI_X64 = 49, - MOVBE_X64 = 50, - X86Serialize_X64 = 51, - AVX512F_X64 = 52, - AVX512F_VL_X64 = 53, - AVX512BW_X64 = 54, - AVX512BW_VL_X64 = 55, - AVX512CD_X64 = 56, - AVX512CD_VL_X64 = 57, - AVX512DQ_X64 = 58, - AVX512DQ_VL_X64 = 59, - AVX512VBMI_X64 = 60, - AVX512VBMI_VL_X64 = 61, + VectorT128 = 33, + VectorT256 = 34, + VectorT512 = 35, + X86Base_X64 = 36, + SSE_X64 = 37, + SSE2_X64 = 38, + SSE3_X64 = 39, + SSSE3_X64 = 40, + SSE41_X64 = 41, + SSE42_X64 = 42, + AVX_X64 = 43, + AVX2_X64 = 44, + AES_X64 = 45, + BMI1_X64 = 46, + BMI2_X64 = 47, + FMA_X64 = 48, + LZCNT_X64 = 49, + PCLMULQDQ_X64 = 50, + POPCNT_X64 = 51, + AVXVNNI_X64 = 52, + MOVBE_X64 = 53, + X86Serialize_X64 = 54, + AVX512F_X64 = 55, + AVX512F_VL_X64 = 56, + AVX512BW_X64 = 57, + AVX512BW_VL_X64 = 58, + AVX512CD_X64 = 59, + AVX512CD_VL_X64 = 60, + AVX512DQ_X64 = 61, + AVX512DQ_VL_X64 = 62, + AVX512VBMI_X64 = 63, + AVX512VBMI_VL_X64 = 64, } public unsafe struct InstructionSetFlags : IEnumerable { - private const int FlagsFieldCount = 1; + private const int FlagsFieldCount = 2; private const int BitsPerFlagsField = 64; private fixed ulong _flags[FlagsFieldCount]; public IEnumerable ARM64Flags => this.Select((x) => (InstructionSet_ARM64)x); @@ -527,6 +541,8 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.ARM64_AdvSimd); if (resultflags.HasInstructionSet(InstructionSet.ARM64_Vector128)) resultflags.AddInstructionSet(InstructionSet.ARM64_AdvSimd); + if (resultflags.HasInstructionSet(InstructionSet.ARM64_VectorT128)) + resultflags.AddInstructionSet(InstructionSet.ARM64_AdvSimd); break; case TargetArchitecture.X64: @@ -718,6 +734,12 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI_VL)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL); + if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT128)) + resultflags.AddInstructionSet(InstructionSet.X64_SSE2); + if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT256)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX2); + if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT512)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) @@ -799,6 +821,12 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI_VL)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL); + if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT128)) + resultflags.AddInstructionSet(InstructionSet.X86_SSE2); + if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT256)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX2); + if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT512)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) @@ -862,6 +890,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.ARM64_Vector64); if (resultflags.HasInstructionSet(InstructionSet.ARM64_AdvSimd)) resultflags.AddInstructionSet(InstructionSet.ARM64_Vector128); + if (resultflags.HasInstructionSet(InstructionSet.ARM64_AdvSimd)) + resultflags.AddInstructionSet(InstructionSet.ARM64_VectorT128); break; case TargetArchitecture.X64: @@ -995,6 +1025,12 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL); + if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) + resultflags.AddInstructionSet(InstructionSet.X64_VectorT128); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) + resultflags.AddInstructionSet(InstructionSet.X64_VectorT256); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X64_VectorT512); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_VL)) @@ -1076,6 +1112,12 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI_VL); + if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2)) + resultflags.AddInstructionSet(InstructionSet.X86_VectorT128); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) + resultflags.AddInstructionSet(InstructionSet.X86_VectorT256); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X86_VectorT512); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD_VL)) @@ -1152,6 +1194,7 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("Vector128", "", InstructionSet.ARM64_Vector128, false); yield return new InstructionSetInfo("Dczva", "", InstructionSet.ARM64_Dczva, false); yield return new InstructionSetInfo("rcpc", "", InstructionSet.ARM64_Rcpc, true); + yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.ARM64_VectorT128, true); break; case TargetArchitecture.X64: @@ -1187,6 +1230,9 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("avx512dq_vl", "Avx512DQ_VL", InstructionSet.X64_AVX512DQ_VL, true); yield return new InstructionSetInfo("avx512vbmi", "Avx512Vbmi", InstructionSet.X64_AVX512VBMI, true); yield return new InstructionSetInfo("avx512vbmi_vl", "Avx512Vbmi_VL", InstructionSet.X64_AVX512VBMI_VL, true); + yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.X64_VectorT128, true); + yield return new InstructionSetInfo("vectort256", "VectorT256", InstructionSet.X64_VectorT256, true); + yield return new InstructionSetInfo("vectort512", "VectorT512", InstructionSet.X64_VectorT512, true); break; case TargetArchitecture.X86: @@ -1222,6 +1268,9 @@ public static IEnumerable ArchitectureToValidInstructionSets yield return new InstructionSetInfo("avx512dq_vl", "Avx512DQ_VL", InstructionSet.X86_AVX512DQ_VL, true); yield return new InstructionSetInfo("avx512vbmi", "Avx512Vbmi", InstructionSet.X86_AVX512VBMI, true); yield return new InstructionSetInfo("avx512vbmi_vl", "Avx512Vbmi_VL", InstructionSet.X86_AVX512VBMI_VL, true); + yield return new InstructionSetInfo("vectort128", "VectorT128", InstructionSet.X86_VectorT128, true); + yield return new InstructionSetInfo("vectort256", "VectorT256", InstructionSet.X86_VectorT256, true); + yield return new InstructionSetInfo("vectort512", "VectorT512", InstructionSet.X86_VectorT512, true); break; } } @@ -1496,6 +1545,9 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite else { return InstructionSet.ARM64_Sha256; } + case "VectorT128": + { return InstructionSet.ARM64_VectorT128; } + } break; @@ -1662,6 +1714,15 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite else { return InstructionSet.X64_AVX512VBMI; } + case "VectorT128": + { return InstructionSet.X64_VectorT128; } + + case "VectorT256": + { return InstructionSet.X64_VectorT256; } + + case "VectorT512": + { return InstructionSet.X64_VectorT512; } + } break; @@ -1756,6 +1817,15 @@ public static InstructionSet LookupPlatformIntrinsicInstructionSet(TargetArchite else { return InstructionSet.X86_AVX512VBMI; } + case "VectorT128": + { return InstructionSet.X86_VectorT128; } + + case "VectorT256": + { return InstructionSet.X86_VectorT256; } + + case "VectorT512": + { return InstructionSet.X86_VectorT512; } + } break; diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index 1b8b2c1ce973a9..3c669e1ea95fea 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -57,7 +57,9 @@ instructionset ,X86 ,Avx512DQ , ,35 ,AVX512DQ instructionset ,X86 ,Avx512DQ_VL , ,36 ,AVX512DQ_VL ,avx512dq_vl instructionset ,X86 ,Avx512Vbmi , ,37 ,AVX512VBMI ,avx512vbmi instructionset ,X86 ,Avx512Vbmi_VL , ,38 ,AVX512VBMI_VL ,avx512vbmi_vl - +instructionset ,X86 ,VectorT128 , ,39 ,VectorT128 ,vectort128 +instructionset ,X86 ,VectorT256 , ,40 ,VectorT256 ,vectort256 +instructionset ,X86 ,VectorT512 , ,41 ,VectorT512 ,vectort512 instructionset64bit,X86 ,X86Base instructionset64bit,X86 ,SSE @@ -129,6 +131,9 @@ implication ,X86 ,AVX512DQ_VL ,AVX512F_VL implication ,X86 ,AVX512VBMI ,AVX512BW implication ,X86 ,AVX512VBMI_VL ,AVX512VBMI implication ,X86 ,AVX512VBMI_VL ,AVX512BW_VL +implication ,X86 ,VectorT128 ,SSE2 +implication ,X86 ,VectorT256 ,AVX2 +implication ,X86 ,VectorT512 ,AVX512F ; While the AVX-512 ISAs can be individually lit-up, they really ; need F, BW, CD, DQ, and VL to be fully functional without adding @@ -148,19 +153,20 @@ copyinstructionsets,X86 ,X64 ; Definition of Arm64 instruction sets definearch ,ARM64 ,64Bit ,Arm64, Arm64 -instructionset ,ARM64 ,ArmBase , ,16 ,ArmBase ,base -instructionset ,ARM64 ,AdvSimd , ,17 ,AdvSimd ,neon -instructionset ,ARM64 ,Aes , ,9 ,Aes ,aes -instructionset ,ARM64 ,Crc32 , ,18 ,Crc32 ,crc -instructionset ,ARM64 ,Dp , ,23 ,Dp ,dotprod -instructionset ,ARM64 ,Rdm , ,24 ,Rdm ,rdma -instructionset ,ARM64 ,Sha1 , ,19 ,Sha1 ,sha1 -instructionset ,ARM64 ,Sha256 , ,20 ,Sha256 ,sha2 -instructionset ,ARM64 , ,Atomics ,21 ,Atomics ,lse -instructionset ,ARM64 , , , ,Vector64 , -instructionset ,ARM64 , , , ,Vector128, -instructionset ,ARM64 , , , ,Dczva , -instructionset ,ARM64 , ,Rcpc ,26 ,Rcpc ,rcpc +instructionset ,ARM64 ,ArmBase , ,16 ,ArmBase ,base +instructionset ,ARM64 ,AdvSimd , ,17 ,AdvSimd ,neon +instructionset ,ARM64 ,Aes , ,9 ,Aes ,aes +instructionset ,ARM64 ,Crc32 , ,18 ,Crc32 ,crc +instructionset ,ARM64 ,Dp , ,23 ,Dp ,dotprod +instructionset ,ARM64 ,Rdm , ,24 ,Rdm ,rdma +instructionset ,ARM64 ,Sha1 , ,19 ,Sha1 ,sha1 +instructionset ,ARM64 ,Sha256 , ,20 ,Sha256 ,sha2 +instructionset ,ARM64 , ,Atomics ,21 ,Atomics ,lse +instructionset ,ARM64 , , , ,Vector64 , +instructionset ,ARM64 , , , ,Vector128 , +instructionset ,ARM64 , , , ,Dczva , +instructionset ,ARM64 , ,Rcpc ,26 ,Rcpc ,rcpc +instructionset ,ARM64 ,VectorT128 , ,39 ,VectorT128 ,vectort128 instructionset64bit,ARM64 ,ArmBase instructionset64bit,ARM64 ,AdvSimd @@ -174,16 +180,16 @@ instructionset64bit,ARM64 ,Sha256 vectorinstructionset,ARM64,Vector64 vectorinstructionset,ARM64,Vector128 -implication ,ARM64 ,AdvSimd ,ArmBase -implication ,ARM64 ,Aes ,ArmBase -implication ,ARM64 ,Crc32 ,ArmBase -implication ,ARM64 ,Dp ,AdvSimd -implication ,ARM64 ,Rdm ,AdvSimd -implication ,ARM64 ,Sha1 ,ArmBase -implication ,ARM64 ,Sha256 ,ArmBase -implication ,ARM64 ,Vector64 ,AdvSimd -implication ,ARM64 ,Vector128 ,AdvSimd - +implication ,ARM64 ,AdvSimd ,ArmBase +implication ,ARM64 ,Aes ,ArmBase +implication ,ARM64 ,Crc32 ,ArmBase +implication ,ARM64 ,Dp ,AdvSimd +implication ,ARM64 ,Rdm ,AdvSimd +implication ,ARM64 ,Sha1 ,ArmBase +implication ,ARM64 ,Sha256 ,ArmBase +implication ,ARM64 ,Vector64 ,AdvSimd +implication ,ARM64 ,Vector128 ,AdvSimd +implication ,ARM64 ,VectorT128 ,AdvSimd ; ,name and aliases ,archs ,lower baselines included by implication ; diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetGenerator.cs b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetGenerator.cs index 4547b91e2fdc70..3ce3bf5a795dbd 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetGenerator.cs +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetGenerator.cs @@ -95,7 +95,7 @@ public InstructionSetImplication(string architecture, InstructionSetImplication private Dictionary _64BitVariantArchitectureManagedNameSuffix = new Dictionary(); // This represents the number of flags fields we currently track - private const int FlagsFieldCount = 1; + private const int FlagsFieldCount = 2; private void ArchitectureEncountered(string arch) { diff --git a/src/coreclr/tools/Common/TypeSystem/Common/DefType.FieldLayout.cs b/src/coreclr/tools/Common/TypeSystem/Common/DefType.FieldLayout.cs index 99305804f53519..fb4cd6bf17157f 100644 --- a/src/coreclr/tools/Common/TypeSystem/Common/DefType.FieldLayout.cs +++ b/src/coreclr/tools/Common/TypeSystem/Common/DefType.FieldLayout.cs @@ -73,6 +73,11 @@ private static class FieldLayoutFlags /// True if the type transitively has an Int128 in it or is an Int128 /// public const int IsInt128OrHasInt128Fields = 0x800; + + /// + /// True if the type transitively has a Vector in it or is Vector + /// + public const int IsVectorTOrHasVectorTFields = 0x1000; } private sealed class StaticBlockInfo @@ -153,6 +158,21 @@ public virtual bool IsInt128OrHasInt128Fields } } + /// + /// Is a type Vector or transitively have any fields of a type Vector. + /// + public virtual bool IsVectorTOrHasVectorTFields + { + get + { + if (!_fieldLayoutFlags.HasFlags(FieldLayoutFlags.ComputedInstanceTypeLayout)) + { + ComputeInstanceLayout(InstanceLayoutKind.TypeAndFields); + } + return _fieldLayoutFlags.HasFlags(FieldLayoutFlags.IsVectorTOrHasVectorTFields); + } + } + /// /// The number of bytes required to hold a field of this type /// @@ -451,6 +471,10 @@ public void ComputeInstanceLayout(InstanceLayoutKind layoutKind) { _fieldLayoutFlags.AddFlags(FieldLayoutFlags.IsInt128OrHasInt128Fields); } + if (computedLayout.IsVectorTOrHasVectorTFields) + { + _fieldLayoutFlags.AddFlags(FieldLayoutFlags.IsVectorTOrHasVectorTFields); + } if (computedLayout.Offsets != null) { diff --git a/src/coreclr/tools/Common/TypeSystem/Common/FieldLayoutAlgorithm.cs b/src/coreclr/tools/Common/TypeSystem/Common/FieldLayoutAlgorithm.cs index 53388c915b85d8..31a46ec47f6416 100644 --- a/src/coreclr/tools/Common/TypeSystem/Common/FieldLayoutAlgorithm.cs +++ b/src/coreclr/tools/Common/TypeSystem/Common/FieldLayoutAlgorithm.cs @@ -84,6 +84,7 @@ public struct ComputedInstanceFieldLayout public bool LayoutAbiStable; // Is the layout stable such that it can safely be used in function calling conventions public bool IsAutoLayoutOrHasAutoLayoutFields; public bool IsInt128OrHasInt128Fields; + public bool IsVectorTOrHasVectorTFields; /// /// If Offsets is non-null, then all field based layout is complete. diff --git a/src/coreclr/tools/Common/TypeSystem/Common/MetadataFieldLayoutAlgorithm.cs b/src/coreclr/tools/Common/TypeSystem/Common/MetadataFieldLayoutAlgorithm.cs index 0fc9064fb00a81..d56bfa19bbf210 100644 --- a/src/coreclr/tools/Common/TypeSystem/Common/MetadataFieldLayoutAlgorithm.cs +++ b/src/coreclr/tools/Common/TypeSystem/Common/MetadataFieldLayoutAlgorithm.cs @@ -110,6 +110,7 @@ out instanceByteSizeAndAlignment LayoutAbiStable = true, IsAutoLayoutOrHasAutoLayoutFields = false, IsInt128OrHasInt128Fields = false, + IsVectorTOrHasVectorTFields = false, }; if (numInstanceFields > 0) @@ -211,7 +212,7 @@ public override ComputedStaticFieldLayout ComputeStaticFieldLayout(DefType defTy } ref StaticsBlock block = ref GetStaticsBlockForField(ref result, field); - SizeAndAlignment sizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, hasLayout: false, context.Target.DefaultPackingSize, out bool _, out bool _, out bool _); + SizeAndAlignment sizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, hasLayout: false, context.Target.DefaultPackingSize, out bool _, out bool _, out bool _, out bool _); block.Size = LayoutInt.AlignUp(block.Size, sizeAndAlignment.Alignment, context.Target); result.Offsets[index] = new FieldAndOffset(field, block.Size); @@ -303,18 +304,27 @@ protected ComputedInstanceFieldLayout ComputeExplicitFieldLayout(MetadataType ty int fieldOrdinal = 0; bool layoutAbiStable = true; bool hasAutoLayoutField = false; - bool hasInt128Field = type.BaseType == null ? false : type.BaseType.IsInt128OrHasInt128Fields; + bool hasInt128Field = false; + bool hasVectorTField = false; + + if (type.BaseType is not null) + { + hasInt128Field = type.BaseType.IsInt128OrHasInt128Fields; + hasVectorTField = type.BaseType.IsVectorTOrHasVectorTFields; + } foreach (var fieldAndOffset in layoutMetadata.Offsets) { TypeDesc fieldType = fieldAndOffset.Field.FieldType; - var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType.UnderlyingType, hasLayout: true, packingSize, out bool fieldLayoutAbiStable, out bool fieldHasAutoLayout, out bool fieldHasInt128Field); + var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType.UnderlyingType, hasLayout: true, packingSize, out bool fieldLayoutAbiStable, out bool fieldHasAutoLayout, out bool fieldHasInt128Field, out bool fieldHasVectorTField); if (!fieldLayoutAbiStable) layoutAbiStable = false; if (fieldHasAutoLayout) hasAutoLayoutField = true; if (fieldHasInt128Field) hasInt128Field = true; + if (fieldHasVectorTField) + hasVectorTField = true; largestAlignmentRequired = LayoutInt.Max(fieldSizeAndAlignment.Alignment, largestAlignmentRequired); @@ -367,6 +377,7 @@ protected ComputedInstanceFieldLayout ComputeExplicitFieldLayout(MetadataType ty { IsAutoLayoutOrHasAutoLayoutFields = hasAutoLayoutField, IsInt128OrHasInt128Fields = hasInt128Field, + IsVectorTOrHasVectorTFields = hasVectorTField, }; computedLayout.FieldAlignment = instanceSizeAndAlignment.Alignment; computedLayout.FieldSize = instanceSizeAndAlignment.Size; @@ -402,20 +413,29 @@ protected ComputedInstanceFieldLayout ComputeSequentialFieldLayout(MetadataType int packingSize = ComputePackingSize(type, layoutMetadata); bool layoutAbiStable = true; bool hasAutoLayoutField = false; - bool hasInt128Field = type.BaseType == null ? false : type.BaseType.IsInt128OrHasInt128Fields; + bool hasInt128Field = false; + bool hasVectorTField = false; + + if (type.BaseType is not null) + { + hasInt128Field = type.BaseType.IsInt128OrHasInt128Fields; + hasVectorTField = type.BaseType.IsVectorTOrHasVectorTFields; + } foreach (var field in type.GetFields()) { if (field.IsStatic) continue; - var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(field.FieldType.UnderlyingType, hasLayout: true, packingSize, out bool fieldLayoutAbiStable, out bool fieldHasAutoLayout, out bool fieldHasInt128Field); + var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(field.FieldType.UnderlyingType, hasLayout: true, packingSize, out bool fieldLayoutAbiStable, out bool fieldHasAutoLayout, out bool fieldHasInt128Field, out bool fieldHasVectorTField); if (!fieldLayoutAbiStable) layoutAbiStable = false; if (fieldHasAutoLayout) hasAutoLayoutField = true; if (fieldHasInt128Field) hasInt128Field = true; + if (fieldHasVectorTField) + hasVectorTField = true; largestAlignmentRequirement = LayoutInt.Max(fieldSizeAndAlignment.Alignment, largestAlignmentRequirement); @@ -443,6 +463,7 @@ protected ComputedInstanceFieldLayout ComputeSequentialFieldLayout(MetadataType { IsAutoLayoutOrHasAutoLayoutFields = hasAutoLayoutField, IsInt128OrHasInt128Fields = hasInt128Field, + IsVectorTOrHasVectorTFields = hasVectorTField, }; computedLayout.FieldAlignment = instanceSizeAndAlignment.Alignment; computedLayout.FieldSize = instanceSizeAndAlignment.Size; @@ -517,6 +538,7 @@ protected ComputedInstanceFieldLayout ComputeAutoFieldLayout(MetadataType type, int instanceGCPointerFieldsCount = 0; int[] instanceNonGCPointerFieldsCount = new int[maxLog2Size + 1]; bool hasInt128Field = false; + bool hasVectorTField = false; foreach (var field in type.GetFields()) { @@ -531,6 +553,8 @@ protected ComputedInstanceFieldLayout ComputeAutoFieldLayout(MetadataType type, instanceValueClassFieldCount++; if (((DefType)fieldType).IsInt128OrHasInt128Fields) hasInt128Field = true; + if (((DefType)fieldType).IsVectorTOrHasVectorTFields) + hasVectorTField = true; } else if (fieldType.IsGCPointer) { @@ -540,7 +564,7 @@ protected ComputedInstanceFieldLayout ComputeAutoFieldLayout(MetadataType type, { Debug.Assert(fieldType.IsPrimitive || fieldType.IsPointer || fieldType.IsFunctionPointer || fieldType.IsEnum || fieldType.IsByRef); - var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, hasLayout, packingSize, out bool _, out bool _, out bool _); + var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, hasLayout, packingSize, out bool _, out bool _, out bool _, out bool _); instanceNonGCPointerFieldsCount[CalculateLog2(fieldSizeAndAlignment.Size.AsInt)]++; } } @@ -577,7 +601,7 @@ protected ComputedInstanceFieldLayout ComputeAutoFieldLayout(MetadataType type, TypeDesc fieldType = field.FieldType; - var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, hasLayout, packingSize, out bool fieldLayoutAbiStable, out bool _, out bool _); + var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(fieldType, hasLayout, packingSize, out bool fieldLayoutAbiStable, out bool _, out bool _, out bool _); if (!fieldLayoutAbiStable) layoutAbiStable = false; @@ -747,7 +771,7 @@ protected ComputedInstanceFieldLayout ComputeAutoFieldLayout(MetadataType type, for (int i = 0; i < instanceValueClassFieldsArr.Length; i++) { // Align the cumulative field offset to the indeterminate value - var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(instanceValueClassFieldsArr[i].FieldType, hasLayout, packingSize, out bool fieldLayoutAbiStable, out bool _, out bool _); + var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(instanceValueClassFieldsArr[i].FieldType, hasLayout, packingSize, out bool fieldLayoutAbiStable, out bool _, out bool _, out bool _); if (!fieldLayoutAbiStable) layoutAbiStable = false; @@ -804,6 +828,7 @@ protected ComputedInstanceFieldLayout ComputeAutoFieldLayout(MetadataType type, { IsAutoLayoutOrHasAutoLayoutFields = true, IsInt128OrHasInt128Fields = hasInt128Field, + IsVectorTOrHasVectorTFields = hasVectorTField, }; computedLayout.FieldAlignment = instanceSizeAndAlignment.Alignment; computedLayout.FieldSize = instanceSizeAndAlignment.Size; @@ -817,7 +842,7 @@ protected ComputedInstanceFieldLayout ComputeAutoFieldLayout(MetadataType type, private static void PlaceInstanceField(FieldDesc field, bool hasLayout, int packingSize, FieldAndOffset[] offsets, ref LayoutInt instanceFieldPos, ref int fieldOrdinal, LayoutInt offsetBias) { - var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(field.FieldType, hasLayout, packingSize, out bool _, out bool _, out bool _); + var fieldSizeAndAlignment = ComputeFieldSizeAndAlignment(field.FieldType, hasLayout, packingSize, out bool _, out bool _, out bool _, out bool _); instanceFieldPos = AlignUpInstanceFieldOffset(instanceFieldPos, fieldSizeAndAlignment.Alignment, field.Context.Target); offsets[fieldOrdinal] = new FieldAndOffset(field, instanceFieldPos + offsetBias); @@ -877,12 +902,13 @@ public LayoutInt CalculateFieldBaseOffset(MetadataType type, bool requiresAlign8 return cumulativeInstanceFieldPos; } - private static SizeAndAlignment ComputeFieldSizeAndAlignment(TypeDesc fieldType, bool hasLayout, int packingSize, out bool layoutAbiStable, out bool fieldTypeHasAutoLayout, out bool fieldTypeHasInt128Field) + private static SizeAndAlignment ComputeFieldSizeAndAlignment(TypeDesc fieldType, bool hasLayout, int packingSize, out bool layoutAbiStable, out bool fieldTypeHasAutoLayout, out bool fieldTypeHasInt128Field, out bool fieldTypeHasVectorTField) { SizeAndAlignment result; layoutAbiStable = true; fieldTypeHasAutoLayout = true; fieldTypeHasInt128Field = false; + fieldTypeHasVectorTField = false; if (fieldType.IsDefType) { @@ -894,6 +920,7 @@ private static SizeAndAlignment ComputeFieldSizeAndAlignment(TypeDesc fieldType, layoutAbiStable = defType.LayoutAbiStable; fieldTypeHasAutoLayout = defType.IsAutoLayoutOrHasAutoLayoutFields; fieldTypeHasInt128Field = defType.IsInt128OrHasInt128Fields; + fieldTypeHasVectorTField = defType.IsVectorTOrHasVectorTFields; } else { diff --git a/src/coreclr/tools/Common/TypeSystem/Interop/IL/MarshalHelpers.cs b/src/coreclr/tools/Common/TypeSystem/Interop/IL/MarshalHelpers.cs index cb80143d94250d..3aa6ca2db11a16 100644 --- a/src/coreclr/tools/Common/TypeSystem/Interop/IL/MarshalHelpers.cs +++ b/src/coreclr/tools/Common/TypeSystem/Interop/IL/MarshalHelpers.cs @@ -422,6 +422,12 @@ internal static MarshallerKind GetMarshallerKind( return MarshallerKind.Invalid; } + if (!isField && ((DefType)type).IsVectorTOrHasVectorTFields) + { + // Vector types or structs that contain them cannot be passed by value + return MarshallerKind.Invalid; + } + if (MarshalUtils.IsBlittableType(type)) { if (nativeType != NativeTypeKind.Default && nativeType != NativeTypeKind.Struct) diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs index b7e4ee14e57ac5..cd7a616c0698b3 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/HardwareIntrinsicHelpers.Aot.cs @@ -112,6 +112,9 @@ private static class XArchIntrinsicConstants public const int Avx512Vbmi = 0x800000; public const int Avx512Vbmi_vl = 0x1000000; public const int Serialize = 0x2000000; + public const int VectorT128 = 0x4000000; + public const int VectorT256 = 0x8000000; + public const int VectorT512 = 0x10000000; public static int FromInstructionSet(InstructionSet instructionSet) { @@ -121,6 +124,7 @@ public static int FromInstructionSet(InstructionSet instructionSet) return instructionSet switch { + // Optional ISAs - only available via opt-in or opportunistic light-up InstructionSet.X64_AES => Aes, InstructionSet.X64_AES_X64 => Aes, InstructionSet.X64_PCLMULQDQ => Pclmulqdq, @@ -174,7 +178,7 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_X86Serialize => Serialize, InstructionSet.X64_X86Serialize_X64 => Serialize, - // SSE and SSE2 are baseline ISAs - they're always available + // Baseline ISAs - they're always available InstructionSet.X64_SSE => 0, InstructionSet.X64_SSE_X64 => 0, InstructionSet.X64_SSE2 => 0, @@ -183,6 +187,11 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_X86Base => 0, InstructionSet.X64_X86Base_X64 => 0, + // Vector Sizes + InstructionSet.X64_VectorT128 => VectorT128, + InstructionSet.X64_VectorT256 => VectorT256, + InstructionSet.X64_VectorT512 => VectorT512, + _ => throw new NotSupportedException(((InstructionSet_X64)instructionSet).ToString()) }; } @@ -199,13 +208,20 @@ private static class Arm64IntrinsicConstants public const int Sha256 = 0x0040; public const int Atomics = 0x0080; public const int Rcpc = 0x0100; + public const int VectorT128 = 0x0200; public static int FromInstructionSet(InstructionSet instructionSet) { return instructionSet switch { + + // Baseline ISAs - they're always available + InstructionSet.ARM64_ArmBase => 0, + InstructionSet.ARM64_ArmBase_Arm64 => 0, InstructionSet.ARM64_AdvSimd => AdvSimd, InstructionSet.ARM64_AdvSimd_Arm64 => AdvSimd, + + // Optional ISAs - only available via opt-in or opportunistic light-up InstructionSet.ARM64_Aes => Aes, InstructionSet.ARM64_Aes_Arm64 => Aes, InstructionSet.ARM64_Crc32 => Crc32, @@ -221,8 +237,8 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.ARM64_Atomics => Atomics, InstructionSet.ARM64_Rcpc => Rcpc, - InstructionSet.ARM64_ArmBase => 0, - InstructionSet.ARM64_ArmBase_Arm64 => 0, + // Vector Sizes + InstructionSet.ARM64_VectorT128 => VectorT128, _ => throw new NotSupportedException(((InstructionSet_ARM64)instructionSet).ToString()) }; diff --git a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/VectorOfTFieldLayoutAlgorithm.cs b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/VectorOfTFieldLayoutAlgorithm.cs index 1a1eef14d55820..cfa4dc2524815b 100644 --- a/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/VectorOfTFieldLayoutAlgorithm.cs +++ b/src/coreclr/tools/aot/ILCompiler.Compiler/Compiler/VectorOfTFieldLayoutAlgorithm.cs @@ -25,6 +25,7 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType defTyp TargetDetails targetDetails = defType.Context.Target; ComputedInstanceFieldLayout layoutFromMetadata = _fallbackAlgorithm.ComputeInstanceLayout(defType, layoutKind); + layoutFromMetadata.IsVectorTOrHasVectorTFields = true; LayoutInt instanceFieldSize; @@ -53,6 +54,7 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType defTyp FieldAlignment = layoutFromMetadata.FieldAlignment, FieldSize = instanceFieldSize, Offsets = layoutFromMetadata.Offsets, + IsVectorTOrHasVectorTFields = true, }; } diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypeFixupSignature.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypeFixupSignature.cs index a0b9e801f2495d..e379651ff5e2cb 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypeFixupSignature.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/DependencyAnalysis/ReadyToRun/TypeFixupSignature.cs @@ -36,14 +36,20 @@ public override ObjectData GetData(NodeFactory factory, bool relocsOnly = false) if (!relocsOnly) { + ReadyToRunFixupKind fixupKind = _fixupKind; dataBuilder.AddSymbol(this); + if ((fixupKind == ReadyToRunFixupKind.Verify_TypeLayout) && ((MetadataType)_typeDesc).IsVectorTOrHasVectorTFields) + { + fixupKind = ReadyToRunFixupKind.Check_TypeLayout; + } + IEcmaModule targetModule = factory.SignatureContext.GetTargetModule(_typeDesc); - SignatureContext innerContext = dataBuilder.EmitFixup(factory, _fixupKind, targetModule, factory.SignatureContext); + SignatureContext innerContext = dataBuilder.EmitFixup(factory, fixupKind, targetModule, factory.SignatureContext); dataBuilder.EmitTypeSignature(_typeDesc, innerContext); - if ((_fixupKind == ReadyToRunFixupKind.Check_TypeLayout) || - (_fixupKind == ReadyToRunFixupKind.Verify_TypeLayout)) + if ((fixupKind == ReadyToRunFixupKind.Check_TypeLayout) || + (fixupKind == ReadyToRunFixupKind.Verify_TypeLayout)) { EncodeTypeLayout(dataBuilder, _typeDesc); } @@ -92,7 +98,7 @@ private static void EncodeTypeLayout(ObjectDataSignatureBuilder dataBuilder, Typ }; dataBuilder.EmitUInt((uint)hfaElementType); } - + if (alignment != pointerSize) { dataBuilder.EmitUInt((uint)alignment); diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilation.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilation.cs index 8b1b8d7c30e2e0..3644241dcf85b7 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilation.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCodegenCompilation.cs @@ -475,12 +475,18 @@ private bool IsLayoutFixedInCurrentVersionBubbleInternal(TypeDesc type) return true; } - if (!(type is MetadataType defType)) + if (type is not MetadataType defType) { // Non metadata backed types have layout defined in all version bubbles return true; } + if (VectorOfTFieldLayoutAlgorithm.IsVectorOfTType(defType)) + { + // Vector always needs a layout check + return false; + } + if (!NodeFactory.CompilationModuleGroup.VersionsWithModule(defType.Module)) { // Valuetypes with non-versionable attribute are candidates for fixed layout. Reject the rest. diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCompilerContext.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCompilerContext.cs index 6eed36223b0992..c6d40d4ee7b42b 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCompilerContext.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/Compiler/ReadyToRunCompilerContext.cs @@ -220,6 +220,7 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType type, ByteCountAlignment = LayoutInt.Indeterminate, Offsets = fieldsAndOffsets.ToArray(), LayoutAbiStable = false, + IsVectorTOrHasVectorTFields = true, }; return instanceLayout; } @@ -238,6 +239,7 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType type, FieldSize = layoutFromSimilarIntrinsicVector.FieldSize, Offsets = layoutFromMetadata.Offsets, LayoutAbiStable = _vectorAbiIsStable, + IsVectorTOrHasVectorTFields = true, }; #else return new ComputedInstanceFieldLayout @@ -248,6 +250,7 @@ public override ComputedInstanceFieldLayout ComputeInstanceLayout(DefType type, FieldSize = layoutFromSimilarIntrinsicVector.FieldSize, Offsets = layoutFromMetadata.Offsets, LayoutAbiStable = _vectorAbiIsStable, + IsVectorTOrHasVectorTFields = true, }; #endif } diff --git a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs index bb23738a4dc621..6744603dcb2288 100644 --- a/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs +++ b/src/coreclr/tools/aot/ILCompiler.ReadyToRun/JitInterface/CorInfoImpl.ReadyToRun.cs @@ -3128,7 +3128,7 @@ private bool getStringChar(CORINFO_OBJECT_STRUCT_* strObj, int index, ushort* va { return false; } - + private CORINFO_OBJECT_STRUCT_* getRuntimeTypePointer(CORINFO_CLASS_STRUCT_* cls) { return null; diff --git a/src/coreclr/tools/aot/ILCompiler/ILCompilerRootCommand.cs b/src/coreclr/tools/aot/ILCompiler/ILCompilerRootCommand.cs index 7a27f04ed6fb87..574bfb78c461ca 100644 --- a/src/coreclr/tools/aot/ILCompiler/ILCompilerRootCommand.cs +++ b/src/coreclr/tools/aot/ILCompiler/ILCompilerRootCommand.cs @@ -112,6 +112,8 @@ internal sealed class ILCompilerRootCommand : RootCommand }, true, "Maximum number of threads to use during compilation"); public Option InstructionSet { get; } = new(new[] { "--instruction-set" }, "Instruction set to allow or disallow"); + public Option MaxVectorTBitWidth { get; } = + new(new[] { "--max-vectort-bitwidth" }, "Maximum width, in bits, that Vector is allowed to be"); public Option Guard { get; } = new(new[] { "--guard" }, "Enable mitigations. Options: 'cf': CFG (Control Flow Guard, Windows only)"); public Option Dehydrate { get; } = @@ -210,6 +212,7 @@ public ILCompilerRootCommand(string[] args) : base(".NET Native IL Compiler") AddOption(RuntimeKnobs); AddOption(Parallelism); AddOption(InstructionSet); + AddOption(MaxVectorTBitWidth); AddOption(Guard); AddOption(Dehydrate); AddOption(PreinitStatics); diff --git a/src/coreclr/tools/aot/ILCompiler/Program.cs b/src/coreclr/tools/aot/ILCompiler/Program.cs index da1aa1a021bb8e..f83ad617dda8d2 100644 --- a/src/coreclr/tools/aot/ILCompiler/Program.cs +++ b/src/coreclr/tools/aot/ILCompiler/Program.cs @@ -67,7 +67,7 @@ public int Run() TargetArchitecture targetArchitecture = Get(_command.TargetArchitecture); TargetOS targetOS = Get(_command.TargetOS); - InstructionSetSupport instructionSetSupport = Helpers.ConfigureInstructionSetSupport(Get(_command.InstructionSet), targetArchitecture, targetOS, + InstructionSetSupport instructionSetSupport = Helpers.ConfigureInstructionSetSupport(Get(_command.InstructionSet), Get(_command.MaxVectorTBitWidth), targetArchitecture, targetOS, "Unrecognized instruction set {0}", "Unsupported combination of instruction sets: {0}/{1}"); string systemModuleName = Get(_command.SystemModuleName); diff --git a/src/coreclr/tools/aot/crossgen2/Crossgen2RootCommand.cs b/src/coreclr/tools/aot/crossgen2/Crossgen2RootCommand.cs index e3918372e42dc9..ef384a1abece68 100644 --- a/src/coreclr/tools/aot/crossgen2/Crossgen2RootCommand.cs +++ b/src/coreclr/tools/aot/crossgen2/Crossgen2RootCommand.cs @@ -23,6 +23,8 @@ internal class Crossgen2RootCommand : RootCommand new(new[] { "--reference", "-r" }, result => Helpers.BuildPathDictionary(result.Tokens, false), true, SR.ReferenceFiles); public Option InstructionSet { get; } = new(new[] { "--instruction-set" }, SR.InstructionSets); + public Option MaxVectorTBitWidth { get; } = + new(new[] { "--max-vectort-bitwidth" }, SR.MaxVectorTBitWidths); public Option MibcFilePaths { get; } = new(new[] { "--mibc", "-m" }, Array.Empty, SR.MibcFiles); public Option OutputFilePath { get; } = @@ -193,6 +195,7 @@ public Crossgen2RootCommand(string[] args) : base(SR.Crossgen2BannerText) AddOption(UnrootedInputFilePaths); AddOption(ReferenceFilePaths); AddOption(InstructionSet); + AddOption(MaxVectorTBitWidth); AddOption(MibcFilePaths); AddOption(OutputFilePath); AddOption(CompositeRootPath); diff --git a/src/coreclr/tools/aot/crossgen2/Program.cs b/src/coreclr/tools/aot/crossgen2/Program.cs index 639e5551e96940..1d43fc8c7a9150 100644 --- a/src/coreclr/tools/aot/crossgen2/Program.cs +++ b/src/coreclr/tools/aot/crossgen2/Program.cs @@ -76,7 +76,7 @@ public int Run() TargetArchitecture targetArchitecture = Get(_command.TargetArchitecture); TargetOS targetOS = Get(_command.TargetOS); - InstructionSetSupport instructionSetSupport = Helpers.ConfigureInstructionSetSupport(Get(_command.InstructionSet), targetArchitecture, targetOS, + InstructionSetSupport instructionSetSupport = Helpers.ConfigureInstructionSetSupport(Get(_command.InstructionSet), Get(_command.MaxVectorTBitWidth), targetArchitecture, targetOS, SR.InstructionSetMustNotBe, SR.InstructionSetInvalidImplication); SharedGenericsMode genericsMode = SharedGenericsMode.CanonicalReferenceTypes; var targetDetails = new TargetDetails(targetArchitecture, targetOS, Crossgen2RootCommand.IsArmel ? TargetAbi.NativeAotArmel : TargetAbi.NativeAot, instructionSetSupport.GetVectorTSimdVector()); diff --git a/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx b/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx index b899f77a8f1147..a737ea6aeb7706 100644 --- a/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx +++ b/src/coreclr/tools/aot/crossgen2/Properties/Resources.resx @@ -180,6 +180,9 @@ Instruction set '{0}' implies support for instruction set '{1}' + + The maximum width, in bits, for System.Numerics.Vector<T>. For example '128', '256', or '512'. + Input files without automatic rooting of all methods diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index cea589081e5b3f..51eec2b9dae9a2 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1338,6 +1338,9 @@ void EEJitManager::SetCpuInfo() CORJIT_FLAGS CPUCompileFlags; + // Get the maximum bitwidth of Vector, rounding down to the nearest multiple of 128-bits + uint32_t maxVectorTBitWidth = (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_MaxVectorTBitWidth) / 128) * 128; + #if defined(TARGET_X86) || defined(TARGET_AMD64) CPUCompileFlags.Set(InstructionSet_X86Base); @@ -1401,6 +1404,7 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.Set(InstructionSet_SSE); CPUCompileFlags.Set(InstructionSet_SSE2); + CPUCompileFlags.Set(InstructionSet_VectorT128); if ((cpuidInfo[CPUID_ECX] & (1 << 25)) != 0) // AESNI { @@ -1460,12 +1464,22 @@ void EEJitManager::SetCpuInfo() { CPUCompileFlags.Set(InstructionSet_AVX2); + if ((maxVectorTBitWidth == 0) || (maxVectorTBitWidth >= 256)) + { + // We allow 256-bit Vector by default + CPUCompileFlags.Clear(InstructionSet_VectorT128); + CPUCompileFlags.Set(InstructionSet_VectorT256); + } + if (DoesOSSupportAVX512() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111 { if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) // AVX512F { CPUCompileFlags.Set(InstructionSet_AVX512F); + // TODO-XArch: Add support for 512-bit Vector + assert(!CPUCompileFlags.IsSet(InstructionSet_VectorT512)); + bool isAVX512_VLSupported = false; if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0) // AVX512VL { @@ -1526,11 +1540,6 @@ void EEJitManager::SetCpuInfo() } } - if (CLRConfig::GetConfigValue(CLRConfig::INTERNAL_SIMD16ByteOnly) != 0) - { - CPUCompileFlags.Clear(InstructionSet_AVX2); - } - if (maxCpuId >= 0x07) { __cpuidex(cpuidInfo, 0x00000007, 0x00000000); @@ -1581,6 +1590,7 @@ void EEJitManager::SetCpuInfo() // FP and SIMD support are enabled by default CPUCompileFlags.Set(InstructionSet_ArmBase); CPUCompileFlags.Set(InstructionSet_AdvSimd); + CPUCompileFlags.Set(InstructionSet_VectorT128); // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE (30) if (IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE)) @@ -1792,7 +1802,6 @@ void EEJitManager::SetCpuInfo() { CPUCompileFlags.Clear(InstructionSet_X86Serialize); } - #elif defined(TARGET_ARM64) if (!CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableHWIntrinsic)) { diff --git a/src/coreclr/vm/methodtablebuilder.cpp b/src/coreclr/vm/methodtablebuilder.cpp index 0349ca2684e990..45c585da6d2196 100644 --- a/src/coreclr/vm/methodtablebuilder.cpp +++ b/src/coreclr/vm/methodtablebuilder.cpp @@ -1135,6 +1135,7 @@ BOOL MethodTableBuilder::CheckIfSIMDAndUpdateSize() LPCUTF8 className; LPCUTF8 nameSpace; + if (FAILED(GetMDImport()->GetNameOfTypeDef(bmtInternal->pType->GetTypeDefToken(), &className, &nameSpace))) return false; @@ -1144,7 +1145,12 @@ BOOL MethodTableBuilder::CheckIfSIMDAndUpdateSize() CORJIT_FLAGS CPUCompileFlags = ExecutionManager::GetEEJitManager()->GetCPUCompileFlags(); uint32_t numInstanceFieldBytes = 16; - if (CPUCompileFlags.IsSet(InstructionSet_AVX2)) + if (CPUCompileFlags.IsSet(InstructionSet_VectorT512)) + { + // TODO-XARCH: The JIT needs to be updated to support 64-byte Vector + numInstanceFieldBytes = 32; + } + else if (CPUCompileFlags.IsSet(InstructionSet_VectorT256)) { numInstanceFieldBytes = 32; }