Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow the user to control the MaxVectorTBitWidth #85551

Merged
merged 24 commits into from
Jun 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
53be3c6
Expose DOTNET_MaxVectorTBitWidth and an undocumented DOTNET_Preferred…
tannergooding May 7, 2023
a3a21af
Ensure SPMI keeps a getMaxVectorTBitWidth implementation
tannergooding May 7, 2023
84f0680
Fix the non-xarch vm build
tannergooding May 8, 2023
beacbc5
Remove getMaxVectorTBitWidth from the JIT/EE interface, it's no longe…
tannergooding May 8, 2023
52b055f
Move SetCpuInfo down into the EEJitManager constructor
tannergooding May 8, 2023
8ce5112
Remove getXarchCpuInfo in favor of passing `JIT_FLAG_VECTOR512_THROTT…
tannergooding May 8, 2023
a026b45
Make sure CORINFO_XARCH_CPU is fully removed
tannergooding May 8, 2023
3d8feff
Have ENCODE_VERIFY_TYPE_LAYOUT not fail-fast for Vector<T> size diffe…
tannergooding May 8, 2023
4f950f6
Merge remote-tracking branch 'dotnet/main' into prefer-vector-width
tannergooding May 12, 2023
9e73c19
Only encode types containing Vector<T> as check, not verify
tannergooding May 12, 2023
8129b32
Merge remote-tracking branch 'dotnet/main' into prefer-vector-width
tannergooding May 19, 2023
ade7fc7
Remove changes that were extracted to separate PRs
tannergooding May 19, 2023
e96eca9
Ensure that the optimistic flags are a strict superset of the support…
tannergooding May 19, 2023
92c0307
Make VectorT128/256/512 proper instruction sets and only allow one to…
tannergooding May 19, 2023
b37c597
Merge remote-tracking branch 'dotnet/main' into prefer-vector-width
tannergooding May 20, 2023
9eeefd7
Don't allow avxvnni to be "optimistic" since that brings in avx2
tannergooding May 20, 2023
079e9b0
Ensure we handle HWIntrinsics being disabled
tannergooding May 21, 2023
76c33aa
Ensure that the Vector<T> size ISAs are covered by FromInstructionSet
tannergooding May 21, 2023
7e60826
Merge remote-tracking branch 'dotnet/main' into prefer-vector-width
tannergooding May 23, 2023
17e0e01
Merge remote-tracking branch 'dotnet/main' into prefer-vector-width
tannergooding Jun 2, 2023
3b84fb0
Ensure that `getMaxVectorByteLength` being 0 is handled
tannergooding Jun 4, 2023
69e496a
Ensure NAOT startup can correctly check for the VectorT size bits
tannergooding Jun 4, 2023
b0deccd
Have BlkOpKindUnroll account for SIMD being disabled
tannergooding Jun 4, 2023
b7b26d7
Ensure InstructionSet_VectorT128 is set in the fallback path for PAL_…
tannergooding Jun 5, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 4 additions & 8 deletions src/coreclr/inc/clrconfigvalues.h
Original file line number Diff line number Diff line change
Expand Up @@ -357,12 +357,6 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_JitRegisterFP, W("JitRegisterFP"), 3, "Control
RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitELTHookEnabled, W("JitELTHookEnabled"), 0, "On ARM, setting this will emit Enter/Leave/TailCall callbacks")
RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitMemStats, W("JitMemStats"), 0, "Display JIT memory usage statistics")
RETAIL_CONFIG_DWORD_INFO(INTERNAL_JitVNMapSelBudget, W("JitVNMapSelBudget"), 100, "Max # of MapSelect's considered for a particular top-level invocation.")
#if defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64)
#define EXTERNAL_FeatureSIMD_Default 1
#else // !(defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64))
#define EXTERNAL_FeatureSIMD_Default 0
#endif // !(defined(TARGET_AMD64) || defined(TARGET_X86) || defined(TARGET_ARM64))
RETAIL_CONFIG_DWORD_INFO(INTERNAL_SIMD16ByteOnly, W("SIMD16ByteOnly"), 0, "Limit maximum SIMD vector length to 16 bytes (used by x64_arm64_altjit)")
RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_TrackDynamicMethodDebugInfo, W("TrackDynamicMethodDebugInfo"), 0, "Specifies whether debug info should be generated and tracked for dynamic methods")

#ifdef FEATURE_MULTICOREJIT
Expand Down Expand Up @@ -745,15 +739,17 @@ RETAIL_CONFIG_DWORD_INFO(INTERNAL_GDBJitEmitDebugFrame, W("GDBJitEmitDebugFrame"
#endif
#endif

RETAIL_CONFIG_DWORD_INFO(EXTERNAL_MaxVectorTBitWidth, W("MaxVectorTBitWidth"), 0, "The maximum width, in bits, that Vector<T> is allowed to be. A value less than 128 is treated as the system default.")

//
// Hardware Intrinsic ISAs; keep in sync with jitconfigvalues.h
//
#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
//TODO: should implement LoongArch64's features.
//TODO-RISCV64-CQ: should implement RISCV64's features.
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 0, "Allows Base+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 0, "Allows Base+ hardware intrinsics to be disabled")
#else
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 1, "Allows Base+ hardware intrinsics to be disabled")
RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableHWIntrinsic, W("EnableHWIntrinsic"), 1, "Allows Base+ hardware intrinsics to be disabled")
#endif // defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)

#if defined(TARGET_AMD64) || defined(TARGET_X86)
Expand Down
178 changes: 110 additions & 68 deletions src/coreclr/inc/corinfoinstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
enum CORINFO_InstructionSet
{
InstructionSet_ILLEGAL = 0,
InstructionSet_NONE = 63,
InstructionSet_NONE = 127,
#ifdef TARGET_ARM64
InstructionSet_ArmBase=1,
InstructionSet_AdvSimd=2,
Expand All @@ -29,14 +29,15 @@ enum CORINFO_InstructionSet
InstructionSet_Vector128=11,
InstructionSet_Dczva=12,
InstructionSet_Rcpc=13,
InstructionSet_ArmBase_Arm64=14,
InstructionSet_AdvSimd_Arm64=15,
InstructionSet_Aes_Arm64=16,
InstructionSet_Crc32_Arm64=17,
InstructionSet_Dp_Arm64=18,
InstructionSet_Rdm_Arm64=19,
InstructionSet_Sha1_Arm64=20,
InstructionSet_Sha256_Arm64=21,
InstructionSet_VectorT128=14,
InstructionSet_ArmBase_Arm64=15,
InstructionSet_AdvSimd_Arm64=16,
InstructionSet_Aes_Arm64=17,
InstructionSet_Crc32_Arm64=18,
InstructionSet_Dp_Arm64=19,
InstructionSet_Rdm_Arm64=20,
InstructionSet_Sha1_Arm64=21,
InstructionSet_Sha256_Arm64=22,
#endif // TARGET_ARM64
#ifdef TARGET_AMD64
InstructionSet_X86Base=1,
Expand Down Expand Up @@ -71,35 +72,38 @@ enum CORINFO_InstructionSet
InstructionSet_AVX512DQ_VL=30,
InstructionSet_AVX512VBMI=31,
InstructionSet_AVX512VBMI_VL=32,
InstructionSet_X86Base_X64=33,
InstructionSet_SSE_X64=34,
InstructionSet_SSE2_X64=35,
InstructionSet_SSE3_X64=36,
InstructionSet_SSSE3_X64=37,
InstructionSet_SSE41_X64=38,
InstructionSet_SSE42_X64=39,
InstructionSet_AVX_X64=40,
InstructionSet_AVX2_X64=41,
InstructionSet_AES_X64=42,
InstructionSet_BMI1_X64=43,
InstructionSet_BMI2_X64=44,
InstructionSet_FMA_X64=45,
InstructionSet_LZCNT_X64=46,
InstructionSet_PCLMULQDQ_X64=47,
InstructionSet_POPCNT_X64=48,
InstructionSet_AVXVNNI_X64=49,
InstructionSet_MOVBE_X64=50,
InstructionSet_X86Serialize_X64=51,
InstructionSet_AVX512F_X64=52,
InstructionSet_AVX512F_VL_X64=53,
InstructionSet_AVX512BW_X64=54,
InstructionSet_AVX512BW_VL_X64=55,
InstructionSet_AVX512CD_X64=56,
InstructionSet_AVX512CD_VL_X64=57,
InstructionSet_AVX512DQ_X64=58,
InstructionSet_AVX512DQ_VL_X64=59,
InstructionSet_AVX512VBMI_X64=60,
InstructionSet_AVX512VBMI_VL_X64=61,
InstructionSet_VectorT128=33,
InstructionSet_VectorT256=34,
InstructionSet_VectorT512=35,
InstructionSet_X86Base_X64=36,
InstructionSet_SSE_X64=37,
InstructionSet_SSE2_X64=38,
InstructionSet_SSE3_X64=39,
InstructionSet_SSSE3_X64=40,
InstructionSet_SSE41_X64=41,
InstructionSet_SSE42_X64=42,
InstructionSet_AVX_X64=43,
InstructionSet_AVX2_X64=44,
InstructionSet_AES_X64=45,
InstructionSet_BMI1_X64=46,
InstructionSet_BMI2_X64=47,
InstructionSet_FMA_X64=48,
InstructionSet_LZCNT_X64=49,
InstructionSet_PCLMULQDQ_X64=50,
InstructionSet_POPCNT_X64=51,
InstructionSet_AVXVNNI_X64=52,
InstructionSet_MOVBE_X64=53,
InstructionSet_X86Serialize_X64=54,
InstructionSet_AVX512F_X64=55,
InstructionSet_AVX512F_VL_X64=56,
InstructionSet_AVX512BW_X64=57,
InstructionSet_AVX512BW_VL_X64=58,
InstructionSet_AVX512CD_X64=59,
InstructionSet_AVX512CD_VL_X64=60,
InstructionSet_AVX512DQ_X64=61,
InstructionSet_AVX512DQ_VL_X64=62,
InstructionSet_AVX512VBMI_X64=63,
InstructionSet_AVX512VBMI_VL_X64=64,
#endif // TARGET_AMD64
#ifdef TARGET_X86
InstructionSet_X86Base=1,
Expand Down Expand Up @@ -134,43 +138,46 @@ enum CORINFO_InstructionSet
InstructionSet_AVX512DQ_VL=30,
InstructionSet_AVX512VBMI=31,
InstructionSet_AVX512VBMI_VL=32,
InstructionSet_X86Base_X64=33,
InstructionSet_SSE_X64=34,
InstructionSet_SSE2_X64=35,
InstructionSet_SSE3_X64=36,
InstructionSet_SSSE3_X64=37,
InstructionSet_SSE41_X64=38,
InstructionSet_SSE42_X64=39,
InstructionSet_AVX_X64=40,
InstructionSet_AVX2_X64=41,
InstructionSet_AES_X64=42,
InstructionSet_BMI1_X64=43,
InstructionSet_BMI2_X64=44,
InstructionSet_FMA_X64=45,
InstructionSet_LZCNT_X64=46,
InstructionSet_PCLMULQDQ_X64=47,
InstructionSet_POPCNT_X64=48,
InstructionSet_AVXVNNI_X64=49,
InstructionSet_MOVBE_X64=50,
InstructionSet_X86Serialize_X64=51,
InstructionSet_AVX512F_X64=52,
InstructionSet_AVX512F_VL_X64=53,
InstructionSet_AVX512BW_X64=54,
InstructionSet_AVX512BW_VL_X64=55,
InstructionSet_AVX512CD_X64=56,
InstructionSet_AVX512CD_VL_X64=57,
InstructionSet_AVX512DQ_X64=58,
InstructionSet_AVX512DQ_VL_X64=59,
InstructionSet_AVX512VBMI_X64=60,
InstructionSet_AVX512VBMI_VL_X64=61,
InstructionSet_VectorT128=33,
InstructionSet_VectorT256=34,
InstructionSet_VectorT512=35,
InstructionSet_X86Base_X64=36,
InstructionSet_SSE_X64=37,
InstructionSet_SSE2_X64=38,
InstructionSet_SSE3_X64=39,
InstructionSet_SSSE3_X64=40,
InstructionSet_SSE41_X64=41,
InstructionSet_SSE42_X64=42,
InstructionSet_AVX_X64=43,
InstructionSet_AVX2_X64=44,
InstructionSet_AES_X64=45,
InstructionSet_BMI1_X64=46,
InstructionSet_BMI2_X64=47,
InstructionSet_FMA_X64=48,
InstructionSet_LZCNT_X64=49,
InstructionSet_PCLMULQDQ_X64=50,
InstructionSet_POPCNT_X64=51,
InstructionSet_AVXVNNI_X64=52,
InstructionSet_MOVBE_X64=53,
InstructionSet_X86Serialize_X64=54,
InstructionSet_AVX512F_X64=55,
InstructionSet_AVX512F_VL_X64=56,
InstructionSet_AVX512BW_X64=57,
InstructionSet_AVX512BW_VL_X64=58,
InstructionSet_AVX512CD_X64=59,
InstructionSet_AVX512CD_VL_X64=60,
InstructionSet_AVX512DQ_X64=61,
InstructionSet_AVX512DQ_VL_X64=62,
InstructionSet_AVX512VBMI_X64=63,
InstructionSet_AVX512VBMI_VL_X64=64,
#endif // TARGET_X86

};

struct CORINFO_InstructionSetFlags
{
private:
static const int32_t FlagsFieldCount = 1;
static const int32_t FlagsFieldCount = 2;
static const int32_t BitsPerFlagsField = sizeof(uint64_t) * 8;
uint64_t _flags[FlagsFieldCount] = { };

Expand Down Expand Up @@ -404,6 +411,8 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
resultflags.RemoveInstructionSet(InstructionSet_Vector64);
if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_AdvSimd))
resultflags.RemoveInstructionSet(InstructionSet_Vector128);
if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_AdvSimd))
resultflags.RemoveInstructionSet(InstructionSet_VectorT128);
#endif // TARGET_ARM64
#ifdef TARGET_AMD64
if (resultflags.HasInstructionSet(InstructionSet_X86Base) && !resultflags.HasInstructionSet(InstructionSet_X86Base_X64))
Expand Down Expand Up @@ -594,6 +603,12 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL);
if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL))
resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL);
if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2))
resultflags.RemoveInstructionSet(InstructionSet_VectorT128);
if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2))
resultflags.RemoveInstructionSet(InstructionSet_VectorT256);
if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F))
resultflags.RemoveInstructionSet(InstructionSet_VectorT512);
if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL))
resultflags.RemoveInstructionSet(InstructionSet_AVX512F);
if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL))
Expand Down Expand Up @@ -674,6 +689,12 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL);
if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL))
resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL);
if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2))
resultflags.RemoveInstructionSet(InstructionSet_VectorT128);
if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2))
resultflags.RemoveInstructionSet(InstructionSet_VectorT256);
if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F))
resultflags.RemoveInstructionSet(InstructionSet_VectorT512);
if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL))
resultflags.RemoveInstructionSet(InstructionSet_AVX512F);
if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL))
Expand Down Expand Up @@ -738,6 +759,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
return "Dczva";
case InstructionSet_Rcpc :
return "Rcpc";
case InstructionSet_VectorT128 :
return "VectorT128";
#endif // TARGET_ARM64
#ifdef TARGET_AMD64
case InstructionSet_X86Base :
Expand Down Expand Up @@ -862,6 +885,12 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
return "AVX512VBMI_VL";
case InstructionSet_AVX512VBMI_VL_X64 :
return "AVX512VBMI_VL_X64";
case InstructionSet_VectorT128 :
return "VectorT128";
case InstructionSet_VectorT256 :
return "VectorT256";
case InstructionSet_VectorT512 :
return "VectorT512";
#endif // TARGET_AMD64
#ifdef TARGET_X86
case InstructionSet_X86Base :
Expand Down Expand Up @@ -928,6 +957,12 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
return "AVX512VBMI";
case InstructionSet_AVX512VBMI_VL :
return "AVX512VBMI_VL";
case InstructionSet_VectorT128 :
return "VectorT128";
case InstructionSet_VectorT256 :
return "VectorT256";
case InstructionSet_VectorT512 :
return "VectorT512";
#endif // TARGET_X86

default:
Expand Down Expand Up @@ -958,6 +993,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
case READYTORUN_INSTRUCTION_Sha256: return InstructionSet_Sha256;
case READYTORUN_INSTRUCTION_Atomics: return InstructionSet_Atomics;
case READYTORUN_INSTRUCTION_Rcpc: return InstructionSet_Rcpc;
case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128;
#endif // TARGET_ARM64
#ifdef TARGET_AMD64
case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base;
Expand Down Expand Up @@ -989,6 +1025,9 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512DQ_VL;
case READYTORUN_INSTRUCTION_Avx512Vbmi: return InstructionSet_AVX512VBMI;
case READYTORUN_INSTRUCTION_Avx512Vbmi_VL: return InstructionSet_AVX512VBMI_VL;
case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128;
case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256;
case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512;
#endif // TARGET_AMD64
#ifdef TARGET_X86
case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base;
Expand Down Expand Up @@ -1020,6 +1059,9 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
case READYTORUN_INSTRUCTION_Avx512DQ_VL: return InstructionSet_AVX512DQ_VL;
case READYTORUN_INSTRUCTION_Avx512Vbmi: return InstructionSet_AVX512VBMI;
case READYTORUN_INSTRUCTION_Avx512Vbmi_VL: return InstructionSet_AVX512VBMI_VL;
case READYTORUN_INSTRUCTION_VectorT128: return InstructionSet_VectorT128;
case READYTORUN_INSTRUCTION_VectorT256: return InstructionSet_VectorT256;
case READYTORUN_INSTRUCTION_VectorT512: return InstructionSet_VectorT512;
#endif // TARGET_X86

default:
Expand Down
10 changes: 5 additions & 5 deletions src/coreclr/inc/jiteeversionguid.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
#define GUID_DEFINED
#endif // !GUID_DEFINED

constexpr GUID JITEEVersionIdentifier = { /* d4414be1-70e4-46ac-8866-ca3a6c2f8422 */
0xd4414be1,
0x70e4,
0x46ac,
{0x88, 0x66, 0xca, 0x3a, 0x6c, 0x2f, 0x84, 0x22}
constexpr GUID JITEEVersionIdentifier = { /* fda2f9dd-6b3e-4ecd-a7b8-79e5edf1f072 */
0xfda2f9dd,
0x6b3e,
0x4ecd,
{0xa7, 0xb8, 0x79, 0xe5, 0xed, 0xf1, 0xf0, 0x72}
};

//////////////////////////////////////////////////////////////////////////////////////////////////////////
Expand Down
3 changes: 3 additions & 0 deletions src/coreclr/inc/readytoruninstructionset.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,9 @@ enum ReadyToRunInstructionSet
READYTORUN_INSTRUCTION_Avx512DQ_VL=36,
READYTORUN_INSTRUCTION_Avx512Vbmi=37,
READYTORUN_INSTRUCTION_Avx512Vbmi_VL=38,
READYTORUN_INSTRUCTION_VectorT128=39,
READYTORUN_INSTRUCTION_VectorT256=40,
READYTORUN_INSTRUCTION_VectorT512=41,

};

Expand Down
8 changes: 4 additions & 4 deletions src/coreclr/jit/codegenxarch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3401,15 +3401,15 @@ void CodeGen::genCodeForCpBlkUnroll(GenTreeBlk* node)
assert(srcOffset < (INT32_MAX - static_cast<int>(size)));
assert(dstOffset < (INT32_MAX - static_cast<int>(size)));

if (size >= XMM_REGSIZE_BYTES)
// Get the largest SIMD register available if the size is large enough
unsigned regSize = compiler->roundDownSIMDSize(size);

if ((size >= regSize) && (regSize > 0))
{
regNumber tempReg = node->GetSingleTempReg(RBM_ALLFLOAT);

instruction simdMov = simdUnalignedMovIns();

// Get the largest SIMD register available if the size is large enough
unsigned regSize = compiler->roundDownSIMDSize(size);

auto emitSimdMovs = [&]() {
if (srcLclNum != BAD_VAR_NUM)
{
Expand Down
Loading