Skip to content

Commit

Permalink
Allow the user to control the MaxVectorTBitWidth (#85551)
Browse files Browse the repository at this point in the history
* Expose DOTNET_MaxVectorTBitWidth and an undocumented DOTNET_PreferredVectorBitWidth

* Ensure SPMI keeps a getMaxVectorTBitWidth implementation

* Fix the non-xarch vm build

* Remove getMaxVectorTBitWidth from the JIT/EE interface, it's no longer needed

* Move SetCpuInfo down into the EEJitManager constructor

* Remove getXarchCpuInfo in favor of passing `JIT_FLAG_VECTOR512_THROTTLING`

* Make sure CORINFO_XARCH_CPU is fully removed

* Have ENCODE_VERIFY_TYPE_LAYOUT not fail-fast for Vector<T> size differences

* Only encode types containing Vector<T> as check, not verify

* Remove changes that were extracted to separate PRs

* Ensure that the optimistic flags are a strict superset of the supported flags

* Make VectorT128/256/512 proper instruction sets and only allow one to be active at a time

* Don't allow avxvnni to be "optimistic" since that brings in avx2

* Ensure we handle HWIntrinsics being disabled

* Ensure that the Vector<T> size ISAs are covered by FromInstructionSet

* Ensure that `getMaxVectorByteLength` being 0 is handled

* Ensure NAOT startup can correctly check for the VectorT size bits

* Have BlkOpKindUnroll account for SIMD being disabled

* Ensure InstructionSet_VectorT128 is set in the fallback path for PAL_GetJitCpuCapabilityFlags
  • Loading branch information
tannergooding authored Jun 5, 2023
1 parent 6328b23 commit af1262c
Show file tree
Hide file tree
Showing 42 changed files with 651 additions and 269 deletions.
12 changes: 4 additions & 8 deletions src/coreclr/inc/clrconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -346,12 +346,6 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_JitRegisterFP, W("JitRegisterFP"), 3, "Control
RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitELTHookEnabled, W("JitELTHookEnabled"), 0, "On ARM, setting this will emit Enter/Leave/TailCall callbacks")
RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitMemStats, W("JitMemStats"), 0, "Display JIT memory usage statistics")
RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitVNMapSelBudget, W("JitVNMapSelBudget"), 100, "Max # of MapSelect's considered for a particular top-level invocation.")
#if defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64)
#define EXTERNAL_FeatureSIMD_Default 1
#else // !(defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64))
#define EXTERNAL_FeatureSIMD_Default 0
#endif // !(defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64))
RETAIL_CONFIG_DWORD_INFO(INTERNAL_SIMD16ByteOnly, W("SIMD16ByteOnly"), 0, "Limit maximum SIMD vector length to 16 bytes (used by x64_arm64_altjit)")
RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TrackDynamicMethodDebugInfo, W("TrackDynamicMethodDebugInfo"), 0, "Specifies whether debug info should be generated and tracked for dynamic methods")

#ifdef FEATURE_MULTICOREJIT
Expand Down Expand Up @@ -734,15 +728,17 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_GDBJitEmitDebugFrame, W("GDBJitEmitDebugFrame"
#endif
#endif

RETAIL_CONFIG_DWORD_INFO(EXTERNAL_MaxVectorTBitWidth, W("MaxVectorTBitWidth"), 0, "The maximum width, in bits, that Vector<T> is allowed to be. A value less than 128 is treated as the system default.")

//
// Hardware Intrinsic ISAs; keep in sync with jitconfigvalues.h
//
#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
//TODO: should implement LoongArch64's features.
//TODO-RISCV64-CQ: should implement RISCV64's features.
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 0, "Allows Base+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 0, "Allows Base+ hardware intrinsics to be disabled")
#else
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 1, "Allows Base+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 1, "Allows Base+ hardware intrinsics to be disabled")
#endif // defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)

#if defined(TARGET_AMD64) || defined(TARGET_X86)
Expand Down
178 changes: 110 additions & 68 deletions src/coreclr/inc/corinfoinstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
enum CORINFO_InstructionSet
{
InstructionSet_ILLEGAL = 0,
InstructionSet_NONE = 63,
InstructionSet_NONE = 127,
#ifdef TARGET_ARM64
InstructionSet_ArmBase=1,
InstructionSet_AdvSimd=2,
Expand All @@ -29,14 +29,15 @@ enum CORINFO_InstructionSet
InstructionSet_Vector128=11,
InstructionSet_Dczva=12,
InstructionSet_Rcpc=13,
InstructionSet_ArmBase_Arm64=14,
InstructionSet_AdvSimd_Arm64=15,
InstructionSet_Aes_Arm64=16,
InstructionSet_Crc32_Arm64=17,
InstructionSet_Dp_Arm64=18,
InstructionSet_Rdm_Arm64=19,
InstructionSet_Sha1_Arm64=20,
InstructionSet_Sha256_Arm64=21,
InstructionSet_VectorT128=14,
InstructionSet_ArmBase_Arm64=15,
InstructionSet_AdvSimd_Arm64=16,
InstructionSet_Aes_Arm64=17,
InstructionSet_Crc32_Arm64=18,
InstructionSet_Dp_Arm64=19,
InstructionSet_Rdm_Arm64=20,
InstructionSet_Sha1_Arm64=21,
InstructionSet_Sha256_Arm64=22,
#endif // TARGET_ARM64
#ifdef TARGET_AMD64
InstructionSet_X86Base=1,
Expand Down Expand Up @@ -71,35 +72,38 @@ enum CORINFO_InstructionSet
InstructionSet_AVX512DQ_VL=30,
InstructionSet_AVX512VBMI=31,
InstructionSet_AVX512VBMI_VL=32,
InstructionSet_X86Base_X64=33,
InstructionSet_SSE_X64=34,
InstructionSet_SSE2_X64=35,
InstructionSet_SSE3_X64=36,
InstructionSet_SSSE3_X64=37,
InstructionSet_SSE41_X64=38,
InstructionSet_SSE42_X64=39,
InstructionSet_AVX_X64=40,
InstructionSet_AVX2_X64=41,
InstructionSet_AES_X64=42,
InstructionSet_BMI1_X64=43,
InstructionSet_BMI2_X64=44,
InstructionSet_FMA_X64=45,
InstructionSet_LZCNT_X64=46,
InstructionSet_PCLMULQDQ_X64=47,
InstructionSet_POPCNT_X64=48,
InstructionSet_AVXVNNI_X64=49,
InstructionSet_MOVBE_X64=50,
InstructionSet_X86Serialize_X64=51,
InstructionSet_AVX512F_X64=52,
InstructionSet_AVX512F_VL_X64=53,
InstructionSet_AVX512BW_X64=54,
InstructionSet_AVX512BW_VL_X64=55,
InstructionSet_AVX512CD_X64=56,
InstructionSet_AVX512CD_VL_X64=57,
InstructionSet_AVX512DQ_X64=58,
InstructionSet_AVX512DQ_VL_X64=59,
InstructionSet_AVX512VBMI_X64=60,
InstructionSet_AVX512VBMI_VL_X64=61,
InstructionSet_VectorT128=33,
InstructionSet_VectorT256=34,
InstructionSet_VectorT512=35,
InstructionSet_X86Base_X64=36,
InstructionSet_SSE_X64=37,
InstructionSet_SSE2_X64=38,
InstructionSet_SSE3_X64=39,
InstructionSet_SSSE3_X64=40,
InstructionSet_SSE41_X64=41,
InstructionSet_SSE42_X64=42,
InstructionSet_AVX_X64=43,
InstructionSet_AVX2_X64=44,
InstructionSet_AES_X64=45,
InstructionSet_BMI1_X64=46,
InstructionSet_BMI2_X64=47,
InstructionSet_FMA_X64=48,
InstructionSet_LZCNT_X64=49,
InstructionSet_PCLMULQDQ_X64=50,
InstructionSet_POPCNT_X64=51,
InstructionSet_AVXVNNI_X64=52,
InstructionSet_MOVBE_X64=53,
InstructionSet_X86Serialize_X64=54,
InstructionSet_AVX512F_X64=55,
InstructionSet_AVX512F_VL_X64=56,
InstructionSet_AVX512BW_X64=57,
InstructionSet_AVX512BW_VL_X64=58,
InstructionSet_AVX512CD_X64=59,
InstructionSet_AVX512CD_VL_X64=60,
InstructionSet_AVX512DQ_X64=61,
InstructionSet_AVX512DQ_VL_X64=62,
InstructionSet_AVX512VBMI_X64=63,
InstructionSet_AVX512VBMI_VL_X64=64,
#endif // TARGET_AMD64
#ifdef TARGET_X86
InstructionSet_X86Base=1,
Expand Down Expand Up @@ -134,43 +138,46 @@ enum CORINFO_InstructionSet
InstructionSet_AVX512DQ_VL=30,
InstructionSet_AVX512VBMI=31,
InstructionSet_AVX512VBMI_VL=32,
InstructionSet_X86Base_X64=33,
InstructionSet_SSE_X64=34,
InstructionSet_SSE2_X64=35,
InstructionSet_SSE3_X64=36,
InstructionSet_SSSE3_X64=37,
InstructionSet_SSE41_X64=38,
InstructionSet_SSE42_X64=39,
InstructionSet_AVX_X64=40,
InstructionSet_AVX2_X64=41,
InstructionSet_AES_X64=42,
InstructionSet_BMI1_X64=43,
InstructionSet_BMI2_X64=44,
InstructionSet_FMA_X64=45,
InstructionSet_LZCNT_X64=46,
InstructionSet_PCLMULQDQ_X64=47,
InstructionSet_POPCNT_X64=48,
InstructionSet_AVXVNNI_X64=49,
InstructionSet_MOVBE_X64=50,
InstructionSet_X86Serialize_X64=51,
InstructionSet_AVX512F_X64=52,
InstructionSet_AVX512F_VL_X64=53,
InstructionSet_AVX512BW_X64=54,
InstructionSet_AVX512BW_VL_X64=55,
InstructionSet_AVX512CD_X64=56,
InstructionSet_AVX512CD_VL_X64=57,
InstructionSet_AVX512DQ_X64=58,
InstructionSet_AVX512DQ_VL_X64=59,
InstructionSet_AVX512VBMI_X64=60,
InstructionSet_AVX512VBMI_VL_X64=61,
InstructionSet_VectorT128=33,
InstructionSet_VectorT256=34,
InstructionSet_VectorT512=35,
InstructionSet_X86Base_X64=36,
InstructionSet_SSE_X64=37,
InstructionSet_SSE2_X64=38,
InstructionSet_SSE3_X64=39,
InstructionSet_SSSE3_X64=40,
InstructionSet_SSE41_X64=41,
InstructionSet_SSE42_X64=42,
InstructionSet_AVX_X64=43,
InstructionSet_AVX2_X64=44,
InstructionSet_AES_X64=45,
InstructionSet_BMI1_X64=46,
InstructionSet_BMI2_X64=47,
InstructionSet_FMA_X64=48,
InstructionSet_LZCNT_X64=49,
InstructionSet_PCLMULQDQ_X64=50,
InstructionSet_POPCNT_X64=51,
InstructionSet_AVXVNNI_X64=52,
InstructionSet_MOVBE_X64=53,
InstructionSet_X86Serialize_X64=54,
InstructionSet_AVX512F_X64=55,
InstructionSet_AVX512F_VL_X64=56,
InstructionSet_AVX512BW_X64=57,
InstructionSet_AVX512BW_VL_X64=58,
InstructionSet_AVX512CD_X64=59,
InstructionSet_AVX512CD_VL_X64=60,
InstructionSet_AVX512DQ_X64=61,
InstructionSet_AVX512DQ_VL_X64=62,
InstructionSet_AVX512VBMI_X64=63,
InstructionSet_AVX512VBMI_VL_X64=64,
#endif // TARGET_X86

};

struct CORINFO_InstructionSetFlags
{
private:
static const int32_t FlagsFieldCount = 1;
static const int32_t FlagsFieldCount = 2;
static const int32_t BitsPerFlagsField = sizeof(uint64_t) * 8;
uint64_t _flags[FlagsFieldCount] = { };

Expand Down Expand Up @@ -404,6 +411,8 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
resultflags.RemoveInstructionSet(InstructionSet_Vector64);
if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_AdvSimd))
resultflags.RemoveInstructionSet(InstructionSet_Vector128);
if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_AdvSimd))
resultflags.RemoveInstructionSet(InstructionSet_VectorT128);
#endif // TARGET_ARM64
#ifdef TARGET_AMD64
if (resultflags.HasInstructionSet(InstructionSet_X86Base) && !resultflags.HasInstructionSet(InstructionSet_X86Base_X64))
Expand Down Expand Up @@ -594,6 +603,12 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL);
if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL))
resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL);
if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2))
resultflags.RemoveInstructionSet(InstructionSet_VectorT128);
if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2))
resultflags.RemoveInstructionSet(InstructionSet_VectorT256);
if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F))
resultflags.RemoveInstructionSet(InstructionSet_VectorT512);
if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL))
resultflags.RemoveInstructionSet(InstructionSet_AVX512F);
if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL))
Expand Down Expand Up @@ -674,6 +689,12 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL);
if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL))
resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL);
if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2))
resultflags.RemoveInstructionSet(InstructionSet_VectorT128);
if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2))
resultflags.RemoveInstructionSet(InstructionSet_VectorT256);
if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F))
resultflags.RemoveInstructionSet(InstructionSet_VectorT512);
if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL))
resultflags.RemoveInstructionSet(InstructionSet_AVX512F);
if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL))
Expand Down Expand Up @@ -738,6 +759,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
return "Dczva";
case InstructionSet_Rcpc :
return "Rcpc";
case InstructionSet_VectorT128 :
return "VectorT128";
#endif // TARGET_ARM64
#ifdef TARGET_AMD64
case InstructionSet_X86Base :
Expand Down Expand Up @@ -862,6 +885,12 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
return "AVX512VBMI_VL";
case InstructionSet_AVX512VBMI_VL_X64 :
return "AVX512VBMI_VL_X64";
case InstructionSet_VectorT128 :
return "VectorT128";
case InstructionSet_VectorT256 :
return "VectorT256";
case InstructionSet_VectorT512 :
return "VectorT512";
#endif // TARGET_AMD64
#ifdef TARGET_X86
case InstructionSet_X86Base :
Expand Down Expand Up @@ -928,6 +957,12 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
return "AVX512VBMI";
case InstructionSet_AVX512VBMI_VL :
return "AVX512VBMI_VL";
case InstructionSet_VectorT128 :
return "VectorT128";
case InstructionSet_VectorT256 :
return "VectorT256";
case InstructionSet_VectorT512 :
return "VectorT512";
#endif // TARGET_X86

default:
Expand Down Expand Up @@ -958,6 +993,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
case READYTORUN_INSTRUCTION_Sha256: return InstructionSet_Sha256;
case READYTORUN_INSTRUCTION_Atomics: return InstructionSet_Atomics;
case READYTORUN_INSTRUCTION_Rcpc: return InstructionSet_Rcpc;
case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128;
#endif // TARGET_ARM64
#ifdef TARGET_AMD64
case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base;
Expand Down Expand Up @@ -989,6 +1025,9 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512DQ_VL;
case READYTORUN_INSTRUCTION_Avx512Vbmi: return InstructionSet_AVX512VBMI;
case READYTORUN_INSTRUCTION_Avx512Vbmi_VL: return InstructionSet_AVX512VBMI_VL;
case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128;
case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256;
case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512;
#endif // TARGET_AMD64
#ifdef TARGET_X86
case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base;
Expand Down Expand Up @@ -1020,6 +1059,9 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512DQ_VL;
case READYTORUN_INSTRUCTION_Avx512Vbmi: return InstructionSet_AVX512VBMI;
case READYTORUN_INSTRUCTION_Avx512Vbmi_VL: return InstructionSet_AVX512VBMI_VL;
case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128;
case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256;
case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512;
#endif // TARGET_X86

default:
Expand Down
10 changes: 5 additions & 5 deletions src/coreclr/inc/jiteeversionguid.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
#define GUID_DEFINED
#endif // !GUID_DEFINED

constexpr GUID JITEEVersionIdentifier = { /* d4414be1-70e4-46ac-8866-ca3a6c2f8422 */
0xd4414be1,
0x70e4,
0x46ac,
{0x88, 0x66, 0xca, 0x3a, 0x6c, 0x2f, 0x84, 0x22}
constexpr GUID JITEEVersionIdentifier = { /* fda2f9dd-6b3e-4ecd-a7b8-79e5edf1f072 */
0xfda2f9dd,
0x6b3e,
0x4ecd,
{0xa7, 0xb8, 0x79, 0xe5, 0xed, 0xf1, 0xf0, 0x72}
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/inc/readytoruninstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ enum ReadyToRunInstructionSet
READYTORUN_INSTRUCTION_Avx512DQ_VL=36,
READYTORUN_INSTRUCTION_Avx512Vbmi=37,
READYTORUN_INSTRUCTION_Avx512Vbmi_VL=38,
READYTORUN_INSTRUCTION_VectorT128=39,
READYTORUN_INSTRUCTION_VectorT256=40,
READYTORUN_INSTRUCTION_VectorT512=41,

};

Expand Down
8 changes: 4 additions & 4 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3401,15 +3401,15 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node)
assert(srcOffset < (INT32_MAX - static_cast<int>(size)));
assert(dstOffset < (INT32_MAX - static_cast<int>(size)));

if (size >= XMM_REGSIZE_BYTES)
// Get the largest SIMD register available if the size is large enough
unsigned regSize = compiler->roundDownSIMDSize(size);

if ((size >= regSize) && (regSize > 0))
{
regNumber tempReg = node->GetSingleTempReg(RBM_ALLFLOAT);

instruction simdMov = simdUnalignedMovIns();

// Get the largest SIMD register available if the size is large enough
unsigned regSize = compiler->roundDownSIMDSize(size);

auto emitSimdMovs = [&]() {
if (srcLclNum != BAD_VAR_NUM)
{
Expand Down
Loading

0 comments on commit af1262c

Please sign in to comment.