From bdce1f4a6155e4230e70da4c4612050e8cea518b Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Sun, 19 Mar 2023 08:02:54 -0700
Subject: [PATCH 01/16] Remove JitForceEVEXEncoding in favor of the existing
 AltJit enablement

---
 src/coreclr/jit/compiler.cpp          | 49 +++++++++++++++++++++++----
 src/coreclr/jit/compiler.h            | 22 +-----------
 src/coreclr/jit/jitconfigvalues.h     |  1 -
 src/tests/Common/testenvironment.proj |  8 ++---
 4 files changed, 47 insertions(+), 33 deletions(-)

diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
index 97f9cabaf6f47..8db104191d61c 100644
--- a/src/coreclr/jit/compiler.cpp
+++ b/src/coreclr/jit/compiler.cpp
@@ -2289,8 +2289,7 @@ void Compiler::compSetProcessor()
     {
         // Using JitStressEVEXEncoding flag will force instructions which would
         // otherwise use VEX encoding but can be EVEX encoded to use EVEX encoding
-        // This requires AVX512VL support. JitForceEVEXEncoding forces this encoding, thus
-        // causing failure if not running on compatible hardware.
+        // This requires AVX512VL support.
 
         // We can't use !DoJitStressEvexEncoding() yet because opts.compSupportsISA hasn't
         // been set yet as that's what we're trying to set here
@@ -2298,11 +2297,7 @@ void Compiler::compSetProcessor()
         bool enableAvx512 = false;
 
 #if defined(DEBUG)
-        if (JitConfig.JitForceEVEXEncoding())
-        {
-            enableAvx512 = true;
-        }
-        else if (JitConfig.JitStressEvexEncoding() && instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL))
+        if (JitConfig.JitStressEvexEncoding() && instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL))
         {
             enableAvx512 = true;
         }
@@ -6028,6 +6023,46 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr,
         {
             instructionSetFlags.AddInstructionSet(InstructionSet_AVXVNNI);
         }
+
+        if (JitConfig.EnableAVX512F() != 0)
+        {
+            instructionSetFlags.AddInstructionSet(InstructionSet_AVX512F);
+        }
+
+        if (JitConfig.EnableAVX512F_VL() != 0)
+        {
+            instructionSetFlags.AddInstructionSet(InstructionSet_AVX512F_VL);
+        }
+
+        if (JitConfig.EnableAVX512BW() != 0)
+        {
+            instructionSetFlags.AddInstructionSet(InstructionSet_AVX512BW);
+        }
+
+        if (JitConfig.EnableAVX512BW_VL() != 0)
+        {
+            instructionSetFlags.AddInstructionSet(InstructionSet_AVX512BW_VL);
+        }
+
+        if (JitConfig.EnableAVX512CD() != 0)
+        {
+            instructionSetFlags.AddInstructionSet(InstructionSet_AVX512CD);
+        }
+
+        if (JitConfig.EnableAVX512CD_VL() != 0)
+        {
+            instructionSetFlags.AddInstructionSet(InstructionSet_AVX512CD_VL);
+        }
+
+        if (JitConfig.EnableAVX512DQ() != 0)
+        {
+            instructionSetFlags.AddInstructionSet(InstructionSet_AVX512DQ);
+        }
+
+        if (JitConfig.EnableAVX512DQ_VL() != 0)
+        {
+            instructionSetFlags.AddInstructionSet(InstructionSet_AVX512DQ_VL);
+        }
 #endif
 
         // These calls are important and explicitly ordered to ensure that the flags are correct in
diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index 8a6f8e775ae14..23c6a9b364dfe 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -9197,13 +9197,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #ifdef TARGET_XARCH
     bool canUseVexEncoding() const
     {
-#ifdef DEBUG
-        if (JitConfig.JitForceEVEXEncoding())
-        {
-            return true;
-        }
-#endif // DEBUG
-
         return compOpportunisticallyDependsOn(InstructionSet_AVX);
     }
 
@@ -9215,13 +9208,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     //
     bool canUseEvexEncoding() const
     {
-#ifdef DEBUG
-        if (JitConfig.JitForceEVEXEncoding())
-        {
-            return true;
-        }
-#endif // DEBUG
-
         return compOpportunisticallyDependsOn(InstructionSet_AVX512F);
     }
 
@@ -9236,13 +9222,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #ifdef DEBUG
         // Using JitStressEVEXEncoding flag will force instructions which would
         // otherwise use VEX encoding but can be EVEX encoded to use EVEX encoding
-        // This requires AVX512VL support. JitForceEVEXEncoding forces this encoding, thus
-        // causing failure if not running on compatible hardware.
-
-        if (JitConfig.JitForceEVEXEncoding())
-        {
-            return true;
-        }
+        // This requires AVX512VL support.
 
         if (JitConfig.JitStressEvexEncoding() && compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL))
         {
diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h
index 5d0ea994c46c8..449a02bcd0a22 100644
--- a/src/coreclr/jit/jitconfigvalues.h
+++ b/src/coreclr/jit/jitconfigvalues.h
@@ -304,7 +304,6 @@ CONFIG_INTEGER(EnableMultiRegLocals, W("EnableMultiRegLocals"), 1) // Enable the
 #if defined(DEBUG)
 CONFIG_INTEGER(JitStressEvexEncoding, W("JitStressEvexEncoding"), 0) // Enable EVEX encoding for SIMD instructions when
                                                                      // AVX-512VL is available.
-CONFIG_INTEGER(JitForceEVEXEncoding, W("JitForceEVEXEncoding"), 0)   // Force EVEX encoding for SIMD instructions.
 #endif
 
 // clang-format off
diff --git a/src/tests/Common/testenvironment.proj b/src/tests/Common/testenvironment.proj
index 70c89a0787687..50de34656eca2 100644
--- a/src/tests/Common/testenvironment.proj
+++ b/src/tests/Common/testenvironment.proj
@@ -36,7 +36,7 @@
       DOTNET_EnableSSE41;
       DOTNET_EnableSSE42;
       DOTNET_EnableSSSE3;
-      DOTNET_JitForceEVEXEncoding;
+      DOTNET_JitStressEvexEncoding;
       DOTNET_ForceRelocs;
       DOTNET_GCStress;
       DOTNET_GCName;
@@ -156,8 +156,8 @@
     <TestEnvironment Include="jitstress_isas_2_x86_nosse41" JitStress="2" EnableSSE41="0" /> <!-- Depends on SSSE3 and SSE3_4 -->
     <TestEnvironment Include="jitstress_isas_2_x86_nosse42" JitStress="2" EnableSSE42="0" /> <!-- Depends on SSE41 -->
     <TestEnvironment Include="jitstress_isas_2_x86_nossse3" JitStress="2" EnableSSSE3="0" /> <!-- Depends on SSE3 -->
-    <TestEnvironment Include="jitstress_isas_avx512_forceevex" JitForceEVEXEncoding="1" EnableIncompleteISAClass="1" /> <!-- Depends on AVX512 -->
-    <TestEnvironment Include="jitstress_isas_avx512_forceevex_stresshighregs" JitForceEVEXEncoding="1" JitStressRegs="0x2000" EnableIncompleteISAClass="1" /> <!-- Depends on AVX512 -->
+    <TestEnvironment Include="jitstress_isas_avx512" JitStressEvexEncoding="1" EnableIncompleteISAClass="1" /> <!-- Depends on AVX512 -->
+    <TestEnvironment Include="jitstress_isas_avx512_stresshighregs" JitStressEvexEncoding="1" JitStressRegs="0x2000" EnableIncompleteISAClass="1" /> <!-- Depends on AVX512 -->
     <TestEnvironment Include="jitstressregs1_x86_noavx" JitStressRegs="1" EnableAVX="0" />
     <TestEnvironment Include="jitstressregs2_x86_noavx" JitStressRegs="2" EnableAVX="0" />
     <TestEnvironment Include="jitstressregs3_x86_noavx" JitStressRegs="3" EnableAVX="0" />
@@ -260,7 +260,7 @@
 
        2) Otherwise, the test scenario defines such DOTNET_* environment variable and the specified value
           of this variable can be extracted by using Metadata() item function.
-      
+
        3) This is where we process a pseudo-value of "random" in the DOTNET table into an actual random number.
       -->
 

From 30f54f19f7ac35bc45391a4d5ab4bbb5f63097c2 Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Sun, 19 Mar 2023 08:21:38 -0700
Subject: [PATCH 02/16] Rename IsVexEncodingInstruction to
 IsVexEncodableInstruction

---
 src/coreclr/jit/emitxarch.cpp | 92 +++++++++++++++++------------------
 src/coreclr/jit/emitxarch.h   |  6 +--
 src/coreclr/jit/instr.cpp     |  2 +-
 3 files changed, 50 insertions(+), 50 deletions(-)

diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index e239810c575ed..c17c18dab8ba1 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -159,7 +159,7 @@ regNumber emitter::getSseShiftRegNumber(instruction ins)
     }
 }
 
-bool emitter::IsVexEncodedInstruction(instruction ins) const
+bool emitter::IsVexEncodableInstruction(instruction ins) const
 {
     if (!UseVEXEncoding())
     {
@@ -174,7 +174,7 @@ bool emitter::IsVexEncodedInstruction(instruction ins) const
 }
 
 //------------------------------------------------------------------------
-// IsEvexEncodedInstruction: Answer the question- Can this instruction be Evex encoded.
+// IsEvexEncodableInstruction: Answer the question- Can this instruction be Evex encoded.
 //
 // Arguments:
 //    ins - The instruction to check.
@@ -182,7 +182,7 @@ bool emitter::IsVexEncodedInstruction(instruction ins) const
 // Returns:
 //    `true` if ins can be Evex encoded.
 //
-bool emitter::IsEvexEncodedInstruction(instruction ins) const
+bool emitter::IsEvexEncodableInstruction(instruction ins) const
 {
     if (!UseEvexEncoding())
     {
@@ -205,9 +205,9 @@ bool emitter::IsEvexEncodedInstruction(instruction ins) const
 // Returns:
 //    `true` if ins is a SIMD instruction.
 //
-bool emitter::IsVexOrEvexEncodedInstruction(instruction ins) const
+bool emitter::IsVexOrEvexEncodableInstruction(instruction ins) const
 {
-    return IsVexEncodedInstruction(ins) || IsEvexEncodedInstruction(ins);
+    return IsVexEncodableInstruction(ins) || IsEvexEncodableInstruction(ins);
 }
 
 // Returns true if the AVX instruction is a binary operator that requires 3 operands.
@@ -1034,7 +1034,7 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const
 {
     instruction ins = id->idIns();
 
-    if (!IsEvexEncodedInstruction(ins))
+    if (!IsEvexEncodableInstruction(ins))
     {
         return false;
     }
@@ -1102,7 +1102,7 @@ emitter::code_t emitter::AddEvexPrefix(instruction ins, code_t code, emitAttr at
 {
 
     // Only AVX512 instructions require EVEX prefix
-    assert(IsEvexEncodedInstruction(ins));
+    assert(IsEvexEncodableInstruction(ins));
 
     // Shouldn't have already added EVEX prefix
     assert(!hasEvexPrefix(code));
@@ -1146,7 +1146,7 @@ bool emitter::TakesVexPrefix(instruction ins) const
             break;
     }
 
-    return IsVexEncodedInstruction(ins);
+    return IsVexEncodableInstruction(ins);
 }
 
 // Add base VEX prefix without setting W, R, X, or B bits
@@ -1183,7 +1183,7 @@ emitter::code_t emitter::AddVexPrefix(instruction ins, code_t code, emitAttr att
     // emitted, by simply checking that all the requirements were met.
 
     // Only AVX instructions require VEX prefix
-    assert(IsVexEncodedInstruction(ins));
+    assert(IsVexEncodableInstruction(ins));
 
     // Shouldn't have already added VEX prefix
     assert(!hasVexPrefix(code));
@@ -1463,7 +1463,7 @@ emitter::code_t emitter::AddRexWPrefix(const instrDesc* id, code_t code)
 {
     instruction ins = id->idIns();
 
-    if (UseEvexEncoding() && IsEvexEncodedInstruction(ins))
+    if (UseEvexEncoding() && IsEvexEncodableInstruction(ins))
     {
         if (TakesEvexPrefix(id) && codeEvexMigrationCheck(code)) // TODO-XArch-AVX512: Remove codeEvexMigrationCheck().
         {
@@ -1474,7 +1474,7 @@ emitter::code_t emitter::AddRexWPrefix(const instrDesc* id, code_t code)
             return emitter::code_t(code | 0x0000800000000000ULL);
         }
     }
-    if (UseVEXEncoding() && IsVexEncodedInstruction(ins))
+    if (UseVEXEncoding() && IsVexEncodableInstruction(ins))
     {
         if (TakesVexPrefix(ins))
         {
@@ -1499,7 +1499,7 @@ emitter::code_t emitter::AddRexRPrefix(const instrDesc* id, code_t code)
 {
     instruction ins = id->idIns();
 
-    if (UseEvexEncoding() && IsEvexEncodedInstruction(ins))
+    if (UseEvexEncoding() && IsEvexEncodableInstruction(ins))
     {
         if (TakesEvexPrefix(id) && codeEvexMigrationCheck(code)) // TODO-XArch-AVX512: Remove codeEvexMigrationCheck().
         {
@@ -1510,7 +1510,7 @@ emitter::code_t emitter::AddRexRPrefix(const instrDesc* id, code_t code)
             return code & 0xFF7FFFFFFFFFFFFFULL;
         }
     }
-    if (UseVEXEncoding() && IsVexEncodedInstruction(ins))
+    if (UseVEXEncoding() && IsVexEncodableInstruction(ins))
     {
         if (TakesVexPrefix(ins))
         {
@@ -1529,7 +1529,7 @@ emitter::code_t emitter::AddRexXPrefix(const instrDesc* id, code_t code)
 {
     instruction ins = id->idIns();
 
-    if (UseEvexEncoding() && IsEvexEncodedInstruction(ins))
+    if (UseEvexEncoding() && IsEvexEncodableInstruction(ins))
     {
         if (TakesEvexPrefix(id))
         {
@@ -1539,7 +1539,7 @@ emitter::code_t emitter::AddRexXPrefix(const instrDesc* id, code_t code)
             return code & 0xFFBFFFFFFFFFFFFFULL;
         }
     }
-    if (UseVEXEncoding() && IsVexEncodedInstruction(ins))
+    if (UseVEXEncoding() && IsVexEncodableInstruction(ins))
     {
         if (TakesVexPrefix(ins))
         {
@@ -1558,7 +1558,7 @@ emitter::code_t emitter::AddRexBPrefix(const instrDesc* id, code_t code)
 {
     instruction ins = id->idIns();
 
-    if (UseEvexEncoding() && IsEvexEncodedInstruction(ins))
+    if (UseEvexEncoding() && IsEvexEncodableInstruction(ins))
     {
         if (TakesEvexPrefix(id) && codeEvexMigrationCheck(code)) // TODO-XArch-AVX512: Remove codeEvexMigrationCheck().
         {
@@ -1569,7 +1569,7 @@ emitter::code_t emitter::AddRexBPrefix(const instrDesc* id, code_t code)
             return code & 0xFFDFFFFFFFFFFFFFULL;
         }
     }
-    if (UseVEXEncoding() && IsVexEncodedInstruction(ins))
+    if (UseVEXEncoding() && IsVexEncodableInstruction(ins))
     {
         if (TakesVexPrefix(ins))
         {
@@ -1587,8 +1587,8 @@ emitter::code_t emitter::AddRexBPrefix(const instrDesc* id, code_t code)
 // Adds REX prefix (0x40) without W, R, X or B bits set
 emitter::code_t emitter::AddRexPrefix(instruction ins, code_t code)
 {
-    assert(!UseVEXEncoding() || !IsVexEncodedInstruction(ins));
-    assert(!UseEvexEncoding() || !IsEvexEncodedInstruction(ins));
+    assert(!UseVEXEncoding() || !IsVexEncodableInstruction(ins));
+    assert(!UseEvexEncoding() || !IsEvexEncodableInstruction(ins));
     return code | 0x4000000000ULL;
 }
 
@@ -1663,7 +1663,7 @@ bool isPrefix(BYTE b)
 //
 emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) const
 {
-    assert(IsEvexEncodedInstruction(ins));
+    assert(IsEvexEncodableInstruction(ins));
 
     code_t evexPrefix = (code >> 32) & 0xFFFFFFFF;
     code &= 0x00000000FFFFFFFFLL;
@@ -1799,7 +1799,7 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co
 //
 emitter::code_t emitter::emitExtractVexPrefix(instruction ins, code_t& code) const
 {
-    assert(IsVexEncodedInstruction(ins));
+    assert(IsVexEncodableInstruction(ins));
 
     code_t vexPrefix = (code >> 32) & 0x00FFFFFF;
     code &= 0x00000000FFFFFFFFLL;
@@ -2128,7 +2128,7 @@ unsigned emitter::emitGetRexPrefixSize(instruction ins)
 {
     // In case of AVX instructions, REX prefixes are part of VEX prefix.
     // And hence requires no additional byte to encode REX prefixes.
-    if (IsVexOrEvexEncodedInstruction(ins))
+    if (IsVexOrEvexEncodableInstruction(ins))
     {
         return 0;
     }
@@ -2148,7 +2148,7 @@ unsigned emitter::emitGetRexPrefixSize(instruction ins)
 //
 unsigned emitter::emitGetEvexPrefixSize(instrDesc* id) const
 {
-    assert(IsEvexEncodedInstruction(id->idIns()));
+    assert(IsEvexEncodableInstruction(id->idIns()));
     return 4;
 }
 
@@ -2169,10 +2169,10 @@ unsigned emitter::emitGetAdjustedSize(instrDesc* id, code_t code) const
     unsigned    adjustedSize = 0;
 
     // TODO-XArch-AVX512: Remove redundant code and possiblly collapse EVEX and VEX into a single pathway
-    // IsEvexEncodedInstruction(ins) is `true` for AVX/SSE instructions also which needs to be VEX encoded unless
+    // IsEvexEncodableInstruction(ins) is `true` for AVX/SSE instructions also which needs to be VEX encoded unless
     // explicitly
     // asked for EVEX.
-    if (IsEvexEncodedInstruction(ins) && TakesEvexPrefix(id))
+    if (IsEvexEncodableInstruction(ins) && TakesEvexPrefix(id))
     {
         // EVEX prefix encodes some bytes of the opcode and as a result, overall size of the instruction reduces.
         // Therefore, to estimate the size adding EVEX prefix size and size of instruction opcode bytes will always
@@ -2219,7 +2219,7 @@ unsigned emitter::emitGetAdjustedSize(instrDesc* id, code_t code) const
 
         adjustedSize = evexPrefixAdjustedSize;
     }
-    else if (IsVexEncodedInstruction(ins))
+    else if (IsVexEncodableInstruction(ins))
     {
         // VEX prefix encodes some bytes of the opcode and as a result, overall size of the instruction reduces.
         // Therefore, to estimate the size adding VEX prefix size and size of instruction opcode bytes will always
@@ -2704,7 +2704,7 @@ inline bool hasCodeMR(instruction ins)
 unsigned emitter::emitGetVexPrefixSize(instrDesc* id) const
 {
     instruction ins = id->idIns();
-    assert(IsVexEncodedInstruction(ins));
+    assert(IsVexEncodableInstruction(ins));
 
     if (EncodedBySSE38orSSE3A(ins))
     {
@@ -3036,7 +3036,7 @@ inline emitter::code_t emitter::insEncodeReg3456(const instrDesc* id, regNumber
     instruction ins = id->idIns();
 
     assert(reg < REG_STK);
-    assert(IsVexOrEvexEncodedInstruction(ins));
+    assert(IsVexOrEvexEncodableInstruction(ins));
     assert(hasVexOrEvexPrefix(code));
 
     // Get 4-bit register encoding
@@ -3050,7 +3050,7 @@ inline emitter::code_t emitter::insEncodeReg3456(const instrDesc* id, regNumber
 
     // Both prefix encodes register operand in 1's complement form
     assert(regBits <= 0xF);
-    if (UseEvexEncoding() && IsEvexEncodedInstruction(ins))
+    if (UseEvexEncoding() && IsEvexEncodableInstruction(ins))
     {
         if (TakesEvexPrefix(id) && codeEvexMigrationCheck(code))
         {
@@ -3072,7 +3072,7 @@ inline emitter::code_t emitter::insEncodeReg3456(const instrDesc* id, regNumber
             return code ^ regBits;
         }
     }
-    if (UseVEXEncoding() && IsVexEncodedInstruction(ins))
+    if (UseVEXEncoding() && IsVexEncodableInstruction(ins))
     {
 
         // Both prefix encodes register operand in 1's complement form
@@ -3412,7 +3412,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id, code_t code)
         (!id->idIsSmallDsc() && (IsExtendedReg(id->idReg3(), attr) || IsExtendedReg(id->idReg4(), attr))))
     {
         sz += emitGetRexPrefixSize(ins);
-        includeRexPrefixSize = !IsVexEncodedInstruction(ins);
+        includeRexPrefixSize = !IsVexEncodableInstruction(ins);
     }
 
     sz += emitInsSize(id, code, includeRexPrefixSize);
@@ -11084,7 +11084,7 @@ void emitter::emitDispIns(
 
         case IF_RWR_RRD_RRD:
         {
-            assert(IsVexEncodedInstruction(ins));
+            assert(IsVexEncodableInstruction(ins));
             assert(IsThreeOperandAVXInstruction(ins));
             regNumber reg2 = id->idReg2();
             regNumber reg3 = id->idReg3();
@@ -11107,7 +11107,7 @@ void emitter::emitDispIns(
         }
 
         case IF_RWR_RRD_RRD_CNS:
-            assert(IsVexOrEvexEncodedInstruction(ins));
+            assert(IsVexOrEvexEncodableInstruction(ins));
             assert(IsThreeOperandAVXInstruction(ins));
             printf("%s, ", emitRegName(id->idReg1(), attr));
             printf("%s, ", emitRegName(id->idReg2(), attr));
@@ -12056,7 +12056,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
             code += 4;
         }
     }
-    else if (!IsSSEInstruction(ins) && !IsVexOrEvexEncodedInstruction(ins))
+    else if (!IsSSEInstruction(ins) && !IsVexOrEvexEncodableInstruction(ins))
     {
         /* Is the operand size larger than a byte? */
 
@@ -12864,7 +12864,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
             code += 4;
         }
     }
-    else if (!IsSSEInstruction(ins) && !IsVexOrEvexEncodedInstruction(ins))
+    else if (!IsSSEInstruction(ins) && !IsVexOrEvexEncodableInstruction(ins))
     {
         // Is the operand size larger than a byte?
         switch (size)
@@ -13636,7 +13636,7 @@ BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id)
 
     // We would to update GC info correctly
     assert(!IsSSEInstruction(ins));
-    assert(!IsVexOrEvexEncodedInstruction(ins));
+    assert(!IsVexOrEvexEncodableInstruction(ins));
 
     // Get the 'base' opcode
     switch (ins)
@@ -14077,7 +14077,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
     else if ((code & 0xFF) == 0x00)
     {
         // This case happens for some SSE/AVX instructions only
-        assert(IsVexOrEvexEncodedInstruction(ins) || Is4ByteSSEInstruction(ins));
+        assert(IsVexOrEvexEncodableInstruction(ins) || Is4ByteSSEInstruction(ins));
 
         dst += emitOutputByte(dst, (code >> 8) & 0xFF);
         dst += emitOutputByte(dst, (0xC0 | regCode));
@@ -14277,7 +14277,7 @@ BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id)
     code_t code;
 
     instruction ins = id->idIns();
-    assert(IsVexOrEvexEncodedInstruction(ins));
+    assert(IsVexOrEvexEncodableInstruction(ins));
     assert(IsThreeOperandAVXInstruction(ins) || isAvxBlendv(ins));
     regNumber targetReg = id->idReg1();
     regNumber src1      = id->idReg2();
@@ -14323,7 +14323,7 @@ BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id)
     else if ((code & 0xFF) == 0x00)
     {
         // This case happens for AVX instructions only
-        assert(IsVexOrEvexEncodedInstruction(ins));
+        assert(IsVexOrEvexEncodableInstruction(ins));
 
         dst += emitOutputByte(dst, (code >> 8) & 0xFF);
         dst += emitOutputByte(dst, (0xC0 | regCode));
@@ -14721,7 +14721,7 @@ BYTE* emitter::emitOutputIV(BYTE* dst, instrDesc* id)
 
     // We would to update GC info correctly
     assert(!IsSSEInstruction(ins));
-    assert(!IsVexOrEvexEncodedInstruction(ins));
+    assert(!IsVexOrEvexEncodableInstruction(ins));
 
 #ifdef TARGET_AMD64
     // all these opcodes take a sign-extended 4-byte immediate, max
@@ -14815,7 +14815,7 @@ BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i)
 
     // SSE/AVX doesnt make any sense here
     assert(!IsSSEInstruction(ins));
-    assert(!IsVexOrEvexEncodedInstruction(ins));
+    assert(!IsVexOrEvexEncodableInstruction(ins));
 
     size_t ssz;
     size_t lsz;
@@ -15839,7 +15839,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             else if ((code & 0xFF) == 0x00)
             {
                 // This case happens for some SSE/AVX instructions only
-                assert(IsVexOrEvexEncodedInstruction(ins) || Is4ByteSSEInstruction(ins));
+                assert(IsVexOrEvexEncodableInstruction(ins) || Is4ByteSSEInstruction(ins));
 
                 dst += emitOutputByte(dst, (code >> 8) & 0xFF);
                 dst += emitOutputByte(dst, (0xC0 | regcode));
@@ -16142,7 +16142,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
 
         case IF_RWR_RRD_SRD:
         {
-            assert(IsVexOrEvexEncodedInstruction(ins));
+            assert(IsVexOrEvexEncodableInstruction(ins));
 
             code = insCodeRM(ins);
             code = AddSimdPrefixIfNeeded(id, code, size);
@@ -16167,7 +16167,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
         case IF_RWR_RRD_SRD_RRD:
         {
             // This should only be called on AVX instructions
-            assert(IsVexOrEvexEncodedInstruction(ins));
+            assert(IsVexOrEvexEncodableInstruction(ins));
             emitGetInsCns(id, &cnsVal);
 
             code = insCodeRM(ins);
@@ -16307,7 +16307,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
         case IF_RWR_RRD_MRD:
         {
             // This should only be called on AVX instructions
-            assert(IsVexOrEvexEncodedInstruction(ins));
+            assert(IsVexOrEvexEncodableInstruction(ins));
 
             code = insCodeRM(ins);
             code = AddSimdPrefixIfNeeded(id, code, size);
@@ -16332,7 +16332,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
         case IF_RWR_RRD_MRD_RRD:
         {
             // This should only be called on AVX instructions
-            assert(IsVexOrEvexEncodedInstruction(ins));
+            assert(IsVexOrEvexEncodableInstruction(ins));
             emitGetInsCns(id, &cnsVal);
 
             code = insCodeRM(ins);
diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h
index 2f6e89ed80064..16ac15eb5f1b3 100644
--- a/src/coreclr/jit/emitxarch.h
+++ b/src/coreclr/jit/emitxarch.h
@@ -105,9 +105,9 @@ static bool IsKInstruction(instruction ins);
 
 static regNumber getBmiRegNumber(instruction ins);
 static regNumber getSseShiftRegNumber(instruction ins);
-bool IsVexEncodedInstruction(instruction ins) const;
-bool IsEvexEncodedInstruction(instruction ins) const;
-bool IsVexOrEvexEncodedInstruction(instruction ins) const;
+bool IsVexEncodableInstruction(instruction ins) const;
+bool IsEvexEncodableInstruction(instruction ins) const;
+bool IsVexOrEvexEncodableInstruction(instruction ins) const;
 
 code_t insEncodeMIreg(const instrDesc* id, regNumber reg, emitAttr size, code_t code);
 
diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp
index f0e4c5596b927..f789f58b0f785 100644
--- a/src/coreclr/jit/instr.cpp
+++ b/src/coreclr/jit/instr.cpp
@@ -103,7 +103,7 @@ const char* CodeGen::genInsDisplayName(emitter::instrDesc* id)
 
     const emitter* emit = GetEmitter();
 
-    if (emit->IsVexOrEvexEncodedInstruction(ins))
+    if (emit->IsVexOrEvexEncodableInstruction(ins))
     {
         if (!emit->IsBMIInstruction(ins) && !emit->IsKInstruction(ins))
         {

From 63ccbb5d7850ecc58a59801b07ae5d952eb465cd Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Sun, 19 Mar 2023 08:46:17 -0700
Subject: [PATCH 03/16] Updating the JIT to support EVEX by default

---
 .../templates/runtimes/run-test-job.yml       | 13 ++-
 eng/pipelines/libraries/run-test-job.yml      |  2 +
 src/coreclr/jit/compiler.cpp                  | 80 +++++++++----------
 src/coreclr/jit/compiler.h                    | 13 ++-
 src/coreclr/jit/emitxarch.cpp                 | 31 +++----
 src/coreclr/jit/lsra.cpp                      |  7 +-
 src/tests/Common/testenvironment.proj         | 10 ++-
 7 files changed, 81 insertions(+), 75 deletions(-)

diff --git a/eng/pipelines/common/templates/runtimes/run-test-job.yml b/eng/pipelines/common/templates/runtimes/run-test-job.yml
index 1b9670bc971b4..63366eebd928c 100644
--- a/eng/pipelines/common/templates/runtimes/run-test-job.yml
+++ b/eng/pipelines/common/templates/runtimes/run-test-job.yml
@@ -407,9 +407,11 @@ jobs:
           - jitstress_isas_nohwintrinsic
           - jitstress_isas_nohwintrinsic_nosimd
           - jitstress_isas_nosimd
+          - jitstress_isas_x86_evex
           - jitstress_isas_x86_noaes
           - jitstress_isas_x86_noavx
           - jitstress_isas_x86_noavx2
+          - jitstress_isas_x86_noavx512
           - jitstress_isas_x86_nobmi1
           - jitstress_isas_x86_nobmi2
           - jitstress_isas_x86_nofma
@@ -427,6 +429,7 @@ jobs:
           - jitstress_isas_1_x86_noaes
           - jitstress_isas_1_x86_noavx
           - jitstress_isas_1_x86_noavx2
+          - jitstress_isas_1_x86_noavx512
           - jitstress_isas_1_x86_nobmi1
           - jitstress_isas_1_x86_nobmi2
           - jitstress_isas_1_x86_nofma
@@ -444,6 +447,7 @@ jobs:
           - jitstress_isas_2_x86_noaes
           - jitstress_isas_2_x86_noavx
           - jitstress_isas_2_x86_noavx2
+          - jitstress_isas_2_x86_noavx512
           - jitstress_isas_2_x86_nobmi1
           - jitstress_isas_2_x86_nobmi2
           - jitstress_isas_2_x86_nofma
@@ -460,8 +464,9 @@ jobs:
           - jitstress_isas_2_x86_nossse3
         ${{ if in(parameters.testGroup, 'jitstress-isas-avx512') }}:
           scenarios:
-          - jitstress_isas_avx512_forceevex
-          - jitstress_isas_avx512_forceevex_stresshighregs
+          - jitstress_isas_x86_evex
+          - jitstress_isas_x86_noavx512
+          - jitstressregs0x2000
         ${{ if in(parameters.testGroup, 'jitstressregs-x86') }}:
           scenarios:
           - jitstressregs1_x86_noavx
@@ -472,6 +477,7 @@ jobs:
           - jitstressregs0x10_x86_noavx
           - jitstressregs0x80_x86_noavx
           - jitstressregs0x1000_x86_noavx
+          - jitstressregs0x2000_x86_noavx
         ${{ if in(parameters.testGroup, 'jitstressregs' ) }}:
           scenarios:
           - jitstressregs1
@@ -482,6 +488,7 @@ jobs:
           - jitstressregs0x10
           - jitstressregs0x80
           - jitstressregs0x1000
+          - jitstressregs0x2000
         ${{ if in(parameters.testGroup, 'jitstress2-jitstressregs') }}:
           scenarios:
           - jitstress2_jitstressregs1
@@ -492,6 +499,7 @@ jobs:
           - jitstress2_jitstressregs0x10
           - jitstress2_jitstressregs0x80
           - jitstress2_jitstressregs0x1000
+          - jitstress2_jitstressregs0x2000
         ${{ if in(parameters.testGroup, 'gcstress0x3-gcstress0xc') }}:
           scenarios:
           - gcstress0x3
@@ -520,6 +528,7 @@ jobs:
           - jitstressregs0x10
           - jitstressregs0x80
           - jitstressregs0x1000
+          - jitstressregs0x2000
           - jitminopts
           - forcerelocs
           - gcstress0xc
diff --git a/eng/pipelines/libraries/run-test-job.yml b/eng/pipelines/libraries/run-test-job.yml
index ee1f82a633b2c..0654ce7629aaf 100644
--- a/eng/pipelines/libraries/run-test-job.yml
+++ b/eng/pipelines/libraries/run-test-job.yml
@@ -161,6 +161,7 @@ jobs:
               - jitstressregs0x10
               - jitstressregs0x80
               - jitstressregs0x1000
+              - jitstressregs0x2000
             ${{ if in(parameters.coreclrTestGroup, 'jitstress2-jitstressregs') }}:
               scenarios:
               - jitstress2_jitstressregs1
@@ -171,6 +172,7 @@ jobs:
               - jitstress2_jitstressregs0x10
               - jitstress2_jitstressregs0x80
               - jitstress2_jitstressregs0x1000
+              - jitstress2_jitstressregs0x2000
             ${{ if in(parameters.coreclrTestGroup, 'gcstress0x3-gcstress0xc') }}:
               scenarios:
               # Disable gcstress0x3 for now; it causes lots of test timeouts. Investigate this after
diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
index 8db104191d61c..1f46f4dee235e 100644
--- a/src/coreclr/jit/compiler.cpp
+++ b/src/coreclr/jit/compiler.cpp
@@ -2275,59 +2275,51 @@ void Compiler::compSetProcessor()
     {
         instructionSetFlags.AddInstructionSet(InstructionSet_Vector128);
     }
+
     if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX))
     {
         instructionSetFlags.AddInstructionSet(InstructionSet_Vector256);
     }
-    // x86-64-v4 feature level supports AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL and
-    // AVX512F/AVX512BW/AVX512CD/AVX512DQ/VX512VL have been shipped together historically.
-    // It is therefore unlikely that future CPUs only support "just one" and
-    // not worth the additional complexity in the JIT to support.
-    if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F) &&
-        instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW) &&
-        instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ))
-    {
-        // Using JitStressEVEXEncoding flag will force instructions which would
-        // otherwise use VEX encoding but can be EVEX encoded to use EVEX encoding
-        // This requires AVX512VL support.
 
-        // We can't use !DoJitStressEvexEncoding() yet because opts.compSupportsISA hasn't
-        // been set yet as that's what we're trying to set here
+    // x86-64-v4 feature level supports AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL
+    // These have been shipped together historically and at the time of this writing
+    // there exists no hardware which doesn't support the entire feature set. To simplify
+    // the overall JIT implementation, we currently require the entire set of ISAs to be
+    // supported and disable AVX512 support otherwise.
 
-        bool enableAvx512 = false;
+    if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW) &&
+        instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD) &&
+        instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ) &&
+        instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL))
+    {
+        assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F));
+        assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW_VL));
+        assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD_VL));
+        assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ_VL));
 
-#if defined(DEBUG)
-        if (JitConfig.JitStressEvexEncoding() && instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL))
-        {
-            enableAvx512 = true;
-        }
-#endif // DEBUG
+        instructionSetFlags.AddInstructionSet(InstructionSet_Vector512);
+    }
+    else
+    {
+        instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F);
+        instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_VL);
+        instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW);
+        instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW_VL);
+        instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ);
+        instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL);
+        instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD);
+        instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD_VL);
 
-        if (!enableAvx512)
-        {
-            instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F);
-            instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_VL);
-            instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW);
-            instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW_VL);
-            instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ);
-            instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL);
-            instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD);
-            instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD_VL);
 #ifdef TARGET_AMD64
-            instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_X64);
-            instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_VL_X64);
-            instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW_X64);
-            instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW_VL_X64);
-            instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD_X64);
-            instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD_VL_X64);
-            instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ_X64);
-            instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL_X64);
+        instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_X64);
+        instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_VL_X64);
+        instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW_X64);
+        instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW_VL_X64);
+        instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD_X64);
+        instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD_VL_X64);
+        instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ_X64);
+        instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL_X64);
 #endif // TARGET_AMD64
-        }
-        else
-        {
-            instructionSetFlags.AddInstructionSet(InstructionSet_Vector512);
-        }
     }
 #elif defined(TARGET_ARM64)
     if (instructionSetFlags.HasInstructionSet(InstructionSet_AdvSimd))
@@ -3394,7 +3386,7 @@ void Compiler::compInitOptions(JitFlags* jitFlags)
     rbmFltCalleeTrash   = RBM_FLT_CALLEE_TRASH_INIT;
     cntCalleeTrashFloat = CNT_CALLEE_TRASH_FLOAT_INIT;
 
-    if (DoJitStressEvexEncoding())
+    if (canUseEvexEncoding())
     {
         rbmAllFloat |= RBM_HIGHFLOAT;
         rbmFltCalleeTrash |= RBM_HIGHFLOAT;
diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index 23c6a9b364dfe..5aab92acc60bc 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -9222,10 +9222,19 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
 #ifdef DEBUG
         // Using JitStressEVEXEncoding flag will force instructions which would
         // otherwise use VEX encoding but can be EVEX encoded to use EVEX encoding
-        // This requires AVX512VL support.
+        // This requires AVX512F, AVX512BW, AVX512CD, AVX512DQ, and AVX512VL support
 
-        if (JitConfig.JitStressEvexEncoding() && compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL))
+        if (JitConfig.JitStressEvexEncoding() && IsBaselineVector512IsaSupported())
         {
+            assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F));
+            assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F_VL));
+            assert(compIsaSupportedDebugOnly(InstructionSet_AVX512BW));
+            assert(compIsaSupportedDebugOnly(InstructionSet_AVX512BW_VL));
+            assert(compIsaSupportedDebugOnly(InstructionSet_AVX512CD));
+            assert(compIsaSupportedDebugOnly(InstructionSet_AVX512CD_VL));
+            assert(compIsaSupportedDebugOnly(InstructionSet_AVX512DQ));
+            assert(compIsaSupportedDebugOnly(InstructionSet_AVX512DQ_VL));
+
             return true;
         }
 #endif // DEBUG
diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index c17c18dab8ba1..45e0d6485fe39 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -1013,16 +1013,6 @@ bool emitter::TakesSimdPrefix(const instrDesc* id) const
 
 //------------------------------------------------------------------------
 // TakesEvexPrefix: Checks if the instruction should be EVEX encoded.
-// TODO-XArch-AVX512: This check needs to be updated once AVX512 instructions are added.
-// Eventually, this should evolve to return `true` for the following cases:
-// - JitConfig.JitStressEvexEncoding flag is set.
-// - Is an new AVX512 instruction.
-// - Uses ZMM vector registers.
-// - Uses upper 128-bit or 256-bit registers for an AVX512VL ins.
-// - Uses Operand mask encoding: 64-bit opmask registers k0-k7 for conditional execution and merging of destination
-// operands.
-// - Need to encode functionality specific to Instruction classes(e.g.,embedded broadcast, embedded rounding control
-// etc.)
 //
 // Arguments:
 //    instruction -- processor instruction to check
@@ -1036,23 +1026,24 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const
 
     if (!IsEvexEncodableInstruction(ins))
     {
+        // Can't be EVEX encoded ever
         return false;
     }
 
-    if (!emitComp->DoJitStressEvexEncoding())
+    if (HasHighSIMDReg(id) || (id->idOpSize() == OPSZ64) || HasKMaskRegisterDest(ins))
     {
-        return false;
-    }
-
-    if (HasHighSIMDReg(id))
-    {
-        // TODO-XARCH-AVX512 remove this check once k registers have been implemented
-        assert(!HasKMaskRegisterDest(ins));
+        // Requires the EVEX encoding due to used registers
         return true;
     }
 
-    // TODO-XArch-AVX512: Revisit 'HasKMaskRegisterDest()' check once KMask support is added.
-    return !HasKMaskRegisterDest(ins);
+    // TODO-XArch-AVX512: This needs to return true when the id includes EVEX specific functionality:
+    // * masking
+    // * embedded broadcast
+    // * embedded rounding control
+    // * etc
+
+    // Only supports the EVEX encoding or in stress mode to always use EVEX encoding
+    return !IsVexEncodableInstruction(ins) || emitComp->DoJitStressEvexEncoding();
 }
 
 // Intel AVX-512 encoding is defined in "Intel 64 and ia-32 architectures software developer's manual volume 2", Section
diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp
index d8520dd7ece2a..afd8e2434cbaf 100644
--- a/src/coreclr/jit/lsra.cpp
+++ b/src/coreclr/jit/lsra.cpp
@@ -644,7 +644,7 @@ LinearScan::LinearScan(Compiler* theCompiler)
     rbmFltCalleeTrash = compiler->rbmFltCalleeTrash;
 #endif
 
-    if (!compiler->DoJitStressEvexEncoding())
+    if (!compiler->canUseEvexEncoding())
     {
         availableRegCount -= CNT_HIGHFLOAT;
         availableRegCount -= CNT_MASK_REGS;
@@ -718,10 +718,7 @@ LinearScan::LinearScan(Compiler* theCompiler)
 #endif // TARGET_AMD64 || TARGET_ARM64
 
 #if defined(TARGET_AMD64)
-    // TODO-XARCH-AVX512 switch this to canUseEvexEncoding() once we independently
-    // allow EVEX use from the stress flag (currently, if EVEX stress is turned off,
-    // we cannot use EVEX at all)
-    if (compiler->DoJitStressEvexEncoding())
+    if (compiler->canUseEvexEncoding())
     {
         availableFloatRegs |= RBM_HIGHFLOAT;
         availableDoubleRegs |= RBM_HIGHFLOAT;
diff --git a/src/tests/Common/testenvironment.proj b/src/tests/Common/testenvironment.proj
index 50de34656eca2..eb3656263eadc 100644
--- a/src/tests/Common/testenvironment.proj
+++ b/src/tests/Common/testenvironment.proj
@@ -21,6 +21,7 @@
       DOTNET_EnableAES;
       DOTNET_EnableAVX;
       DOTNET_EnableAVX2;
+      DOTNET_EnableAVX512F;
       DOTNET_EnableBMI1;
       DOTNET_EnableBMI2;
       DOTNET_EnableFMA;
@@ -105,9 +106,11 @@
     <TestEnvironment Include="jitstress2_tiered" JitStress="2" TieredCompilation="1" />
     <TestEnvironment Include="jitstress_isas_incompletehwintrinsic" EnableIncompleteISAClass="1"  />
     <TestEnvironment Include="jitstress_isas_nohwintrinsic" EnableHWIntrinsic="0" />
+    <TestEnvironment Include="jitstress_isas_x86_evex" JitStressEvexEncoding="1" />
     <TestEnvironment Include="jitstress_isas_x86_noaes" EnableAES="0" /> <!-- Depends on SSE2 -->
     <TestEnvironment Include="jitstress_isas_x86_noavx" EnableAVX="0" /> <!-- Depends on SSE42 -->
     <TestEnvironment Include="jitstress_isas_x86_noavx2" EnableAVX2="0" /> <!-- Depends on AVX -->
+    <TestEnvironment Include="jitstress_isas_x86_noavx512" EnableAVX512F="0" /> <!-- Depends on AVX2 -->
     <TestEnvironment Include="jitstress_isas_x86_nobmi1" EnableBMI1="0" /> <!-- No dependencies -->
     <TestEnvironment Include="jitstress_isas_x86_nobmi2" EnableBMI2="0" /> <!-- No dependencies -->
     <TestEnvironment Include="jitstress_isas_x86_nofma" EnableFMA="0" /> <!-- Depends on AVX -->
@@ -125,6 +128,7 @@
     <TestEnvironment Include="jitstress_isas_1_x86_noaes" JitStress="1" EnableAES="0" /> <!-- Depends on SSE2 -->
     <TestEnvironment Include="jitstress_isas_1_x86_noavx" JitStress="1" EnableAVX="0" /> <!-- Depends on SSE42 -->
     <TestEnvironment Include="jitstress_isas_1_x86_noavx2" JitStress="1" EnableAVX2="0" /> <!-- Depends on AVX -->
+    <TestEnvironment Include="jitstress_isas_1_x86_noavx512" JitStress="1" EnableAVX512F="0" /> <!-- Depends on AVX2 -->
     <TestEnvironment Include="jitstress_isas_1_x86_nobmi1" JitStress="1" EnableBMI1="0" /> <!-- No dependencies -->
     <TestEnvironment Include="jitstress_isas_1_x86_nobmi2" JitStress="1" EnableBMI2="0" /> <!-- No dependencies -->
     <TestEnvironment Include="jitstress_isas_1_x86_nofma" JitStress="1" EnableFMA="0" /> <!-- Depends on AVX -->
@@ -142,6 +146,7 @@
     <TestEnvironment Include="jitstress_isas_2_x86_noaes" JitStress="2" EnableAES="0" /> <!-- Depends on SSE2 -->
     <TestEnvironment Include="jitstress_isas_2_x86_noavx" JitStress="2" EnableAVX="0" /> <!-- Depends on SSE42 -->
     <TestEnvironment Include="jitstress_isas_2_x86_noavx2" JitStress="2" EnableAVX2="0" /> <!-- Depends on AVX -->
+    <TestEnvironment Include="jitstress_isas_2_x86_noavx512" JitStress="2" EnableAVX512F="0" /> <!-- Depends on AVX2 -->
     <TestEnvironment Include="jitstress_isas_2_x86_nobmi1" JitStress="2" EnableBMI1="0" /> <!-- No dependencies -->
     <TestEnvironment Include="jitstress_isas_2_x86_nobmi2" JitStress="2" EnableBMI2="0" /> <!-- No dependencies -->
     <TestEnvironment Include="jitstress_isas_2_x86_nofma" JitStress="2" EnableFMA="0" /> <!-- Depends on AVX -->
@@ -156,8 +161,6 @@
     <TestEnvironment Include="jitstress_isas_2_x86_nosse41" JitStress="2" EnableSSE41="0" /> <!-- Depends on SSSE3 and SSE3_4 -->
     <TestEnvironment Include="jitstress_isas_2_x86_nosse42" JitStress="2" EnableSSE42="0" /> <!-- Depends on SSE41 -->
     <TestEnvironment Include="jitstress_isas_2_x86_nossse3" JitStress="2" EnableSSSE3="0" /> <!-- Depends on SSE3 -->
-    <TestEnvironment Include="jitstress_isas_avx512" JitStressEvexEncoding="1" EnableIncompleteISAClass="1" /> <!-- Depends on AVX512 -->
-    <TestEnvironment Include="jitstress_isas_avx512_stresshighregs" JitStressEvexEncoding="1" JitStressRegs="0x2000" EnableIncompleteISAClass="1" /> <!-- Depends on AVX512 -->
     <TestEnvironment Include="jitstressregs1_x86_noavx" JitStressRegs="1" EnableAVX="0" />
     <TestEnvironment Include="jitstressregs2_x86_noavx" JitStressRegs="2" EnableAVX="0" />
     <TestEnvironment Include="jitstressregs3_x86_noavx" JitStressRegs="3" EnableAVX="0" />
@@ -166,6 +169,7 @@
     <TestEnvironment Include="jitstressregs0x10_x86_noavx" JitStressRegs="0x10" EnableAVX="0" />
     <TestEnvironment Include="jitstressregs0x80_x86_noavx" JitStressRegs="0x80" EnableAVX="0" />
     <TestEnvironment Include="jitstressregs0x1000_x86_noavx" JitStressRegs="0x1000" EnableAVX="0" />
+    <TestEnvironment Include="jitstressregs0x2000_x86_noavx" JitStressRegs="0x2000" EnableAVX="0" />
     <TestEnvironment Include="jitstressregs1" JitStressRegs="1" />
     <TestEnvironment Include="jitstressregs2" JitStressRegs="2" />
     <TestEnvironment Include="jitstressregs3" JitStressRegs="3" />
@@ -174,6 +178,7 @@
     <TestEnvironment Include="jitstressregs0x10" JitStressRegs="0x10" />
     <TestEnvironment Include="jitstressregs0x80" JitStressRegs="0x80" />
     <TestEnvironment Include="jitstressregs0x1000" JitStressRegs="0x1000" />
+    <TestEnvironment Include="jitstressregs0x2000" JitStressRegs="0x2000" />
     <TestEnvironment Include="jitstress2_jitstressregs1" JitStress="2" JitStressRegs="1" />
     <TestEnvironment Include="jitstress2_jitstressregs2" JitStress="2" JitStressRegs="2" />
     <TestEnvironment Include="jitstress2_jitstressregs3" JitStress="2" JitStressRegs="3" />
@@ -182,6 +187,7 @@
     <TestEnvironment Include="jitstress2_jitstressregs0x10" JitStress="2" JitStressRegs="0x10" />
     <TestEnvironment Include="jitstress2_jitstressregs0x80" JitStress="2" JitStressRegs="0x80" />
     <TestEnvironment Include="jitstress2_jitstressregs0x1000" JitStress="2" JitStressRegs="0x1000" />
+    <TestEnvironment Include="jitstress2_jitstressregs0x2000" JitStress="2" JitStressRegs="0x2000" />
     <!-- There are multiple cases of `jitstress_random_*` so a single pipeline can run random JitStress multiple times without fear
          of a problem due to repeating the same scenario name. Note that this file is invoked with only one of these at a time.
     -->

From 1db8c9af1eb19f83f807ed1554d896798cab70b9 Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Sun, 19 Mar 2023 09:05:01 -0700
Subject: [PATCH 04/16] Mark the AVX512 ISAs as "fully implemented" since they
 have no unimplemented instructions

---
 src/coreclr/jit/hwintrinsic.cpp      |  7 +++----
 src/coreclr/jit/hwintrinsicxarch.cpp | 17 +++++++++++++++++
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp
index 46dbe9c0e1885..666db1e36d54a 100644
--- a/src/coreclr/jit/hwintrinsic.cpp
+++ b/src/coreclr/jit/hwintrinsic.cpp
@@ -181,8 +181,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler*         comp,
         return NI_Illegal;
     }
 
-    bool isIsaSupported                         = comp->compSupportsHWIntrinsic(isa);
-    bool isBaselineVector512IsaUsedAndSupported = false;
+    bool isIsaSupported = comp->compSupportsHWIntrinsic(isa);
 
     bool isHardwareAcceleratedProp = (strcmp(methodName, "get_IsHardwareAccelerated") == 0);
 #ifdef TARGET_XARCH
@@ -202,7 +201,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler*         comp,
         }
         else if (strcmp(className, "Vector512") == 0)
         {
-            isBaselineVector512IsaUsedAndSupported = comp->IsBaselineVector512IsaSupported();
+            isa = InstructionSet_Vector512;
         }
     }
 #endif
@@ -237,7 +236,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler*         comp,
 
         if (isIsaSupported)
         {
-            if (comp->compExactlyDependsOn(isa) || isBaselineVector512IsaUsedAndSupported)
+            if (comp->compExactlyDependsOn(isa))
             {
                 return NI_IsSupported_True;
             }
diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp
index 977bb1b320a87..73e45c2655f90 100644
--- a/src/coreclr/jit/hwintrinsicxarch.cpp
+++ b/src/coreclr/jit/hwintrinsicxarch.cpp
@@ -431,6 +431,22 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa)
         case InstructionSet_AVX_X64:
         case InstructionSet_AVX2:
         case InstructionSet_AVX2_X64:
+        case InstructionSet_AVX512F:
+        case InstructionSet_AVX512F_VL:
+        case InstructionSet_AVX512F_VL_X64:
+        case InstructionSet_AVX512F_X64:
+        case InstructionSet_AVX512BW:
+        case InstructionSet_AVX512BW_VL:
+        case InstructionSet_AVX512BW_VL_X64:
+        case InstructionSet_AVX512BW_X64:
+        case InstructionSet_AVX512CD:
+        case InstructionSet_AVX512CD_VL:
+        case InstructionSet_AVX512CD_VL_X64:
+        case InstructionSet_AVX512CD_X64:
+        case InstructionSet_AVX512DQ:
+        case InstructionSet_AVX512DQ_VL:
+        case InstructionSet_AVX512DQ_VL_X64:
+        case InstructionSet_AVX512DQ_X64:
         case InstructionSet_AVXVNNI:
         case InstructionSet_AVXVNNI_X64:
         case InstructionSet_BMI1:
@@ -459,6 +475,7 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa)
         case InstructionSet_SSE42_X64:
         case InstructionSet_Vector128:
         case InstructionSet_Vector256:
+        case InstructionSet_Vector512:
         case InstructionSet_X86Base:
         case InstructionSet_X86Base_X64:
         case InstructionSet_X86Serialize:

From 15a075520896bb6d5e66840404c12fe04898a532 Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Sun, 19 Mar 2023 09:33:12 -0700
Subject: [PATCH 05/16] Simplify some of the EVEX related checks in emitxarch

---
 src/coreclr/jit/emitxarch.cpp | 197 +++++++++++++---------------------
 1 file changed, 72 insertions(+), 125 deletions(-)

diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index 45e0d6485fe39..a64cd88e01b02 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -207,7 +207,13 @@ bool emitter::IsEvexEncodableInstruction(instruction ins) const
 //
 bool emitter::IsVexOrEvexEncodableInstruction(instruction ins) const
 {
-    return IsVexEncodableInstruction(ins) || IsEvexEncodableInstruction(ins);
+    if (!UseVEXEncoding())
+    {
+        return false;
+    }
+
+    insFlags flags = CodeGenInterface::instInfo[ins];
+    return (flags & (Encoding_VEX | Encoding_EVEX)) != 0;
 }
 
 // Returns true if the AVX instruction is a binary operator that requires 3 operands.
@@ -1091,7 +1097,6 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const
 //
 emitter::code_t emitter::AddEvexPrefix(instruction ins, code_t code, emitAttr attr)
 {
-
     // Only AVX512 instructions require EVEX prefix
     assert(IsEvexEncodableInstruction(ins));
 
@@ -1120,24 +1125,7 @@ emitter::code_t emitter::AddEvexPrefix(instruction ins, code_t code, emitAttr at
 bool emitter::TakesVexPrefix(instruction ins) const
 {
     // special case vzeroupper as it requires 2-byte VEX prefix
-    // special case the fencing, movnti and the prefetch instructions as they never take a VEX prefix
-    switch (ins)
-    {
-        case INS_lfence:
-        case INS_mfence:
-        case INS_movnti:
-        case INS_prefetchnta:
-        case INS_prefetcht0:
-        case INS_prefetcht1:
-        case INS_prefetcht2:
-        case INS_sfence:
-        case INS_vzeroupper:
-            return false;
-        default:
-            break;
-    }
-
-    return IsVexEncodableInstruction(ins);
+    return IsVexEncodableInstruction(ins) && (ins != INS_vzeroupper);
 }
 
 // Add base VEX prefix without setting W, R, X, or B bits
@@ -1454,9 +1442,9 @@ emitter::code_t emitter::AddRexWPrefix(const instrDesc* id, code_t code)
 {
     instruction ins = id->idIns();
 
-    if (UseEvexEncoding() && IsEvexEncodableInstruction(ins))
+    if (IsVexOrEvexEncodableInstruction(ins))
     {
-        if (TakesEvexPrefix(id) && codeEvexMigrationCheck(code)) // TODO-XArch-AVX512: Remove codeEvexMigrationCheck().
+        if (TakesEvexPrefix(id) && codeEvexMigrationCheck(code))
         {
             // W-bit is available in 4-byte EVEX prefix that starts with byte 62.
             assert(hasEvexPrefix(code));
@@ -1464,11 +1452,10 @@ emitter::code_t emitter::AddRexWPrefix(const instrDesc* id, code_t code)
             // W-bit is the only bit that is added in non bit-inverted form.
             return emitter::code_t(code | 0x0000800000000000ULL);
         }
-    }
-    if (UseVEXEncoding() && IsVexEncodableInstruction(ins))
-    {
-        if (TakesVexPrefix(ins))
+        else
         {
+            assert(IsVexEncodableInstruction(ins));
+
             // W-bit is available only in 3-byte VEX prefix that starts with byte C4.
             assert(hasVexPrefix(code));
 
@@ -1490,7 +1477,7 @@ emitter::code_t emitter::AddRexRPrefix(const instrDesc* id, code_t code)
 {
     instruction ins = id->idIns();
 
-    if (UseEvexEncoding() && IsEvexEncodableInstruction(ins))
+    if (IsVexOrEvexEncodableInstruction(ins))
     {
         if (TakesEvexPrefix(id) && codeEvexMigrationCheck(code)) // TODO-XArch-AVX512: Remove codeEvexMigrationCheck().
         {
@@ -1500,11 +1487,10 @@ emitter::code_t emitter::AddRexRPrefix(const instrDesc* id, code_t code)
             // R-bit is added in bit-inverted form.
             return code & 0xFF7FFFFFFFFFFFFFULL;
         }
-    }
-    if (UseVEXEncoding() && IsVexEncodableInstruction(ins))
-    {
-        if (TakesVexPrefix(ins))
+        else
         {
+            assert(IsVexEncodableInstruction(ins));
+
             // R-bit is supported by both 2-byte and 3-byte VEX prefix
             assert(hasVexPrefix(code));
 
@@ -1520,20 +1506,20 @@ emitter::code_t emitter::AddRexXPrefix(const instrDesc* id, code_t code)
 {
     instruction ins = id->idIns();
 
-    if (UseEvexEncoding() && IsEvexEncodableInstruction(ins))
+    if (IsVexOrEvexEncodableInstruction(ins))
     {
-        if (TakesEvexPrefix(id))
+        if (TakesEvexPrefix(id) && codeEvexMigrationCheck(code)) // TODO-XArch-AVX512: Remove codeEvexMigrationCheck().
         {
             // X-bit is available in 4-byte EVEX prefix that starts with byte 62.
             assert(hasEvexPrefix(code));
+
             // X-bit is added in bit-inverted form.
             return code & 0xFFBFFFFFFFFFFFFFULL;
         }
-    }
-    if (UseVEXEncoding() && IsVexEncodableInstruction(ins))
-    {
-        if (TakesVexPrefix(ins))
+        else
         {
+            assert(IsVexEncodableInstruction(ins));
+
             // X-bit is available only in 3-byte VEX prefix that starts with byte C4.
             assert(hasVexPrefix(code));
 
@@ -1549,7 +1535,7 @@ emitter::code_t emitter::AddRexBPrefix(const instrDesc* id, code_t code)
 {
     instruction ins = id->idIns();
 
-    if (UseEvexEncoding() && IsEvexEncodableInstruction(ins))
+    if (IsVexOrEvexEncodableInstruction(ins))
     {
         if (TakesEvexPrefix(id) && codeEvexMigrationCheck(code)) // TODO-XArch-AVX512: Remove codeEvexMigrationCheck().
         {
@@ -1559,11 +1545,10 @@ emitter::code_t emitter::AddRexBPrefix(const instrDesc* id, code_t code)
             // B-bit is added in bit-inverted form.
             return code & 0xFFDFFFFFFFFFFFFFULL;
         }
-    }
-    if (UseVEXEncoding() && IsVexEncodableInstruction(ins))
-    {
-        if (TakesVexPrefix(ins))
+        else
         {
+            assert(IsVexEncodableInstruction(ins));
+
             // B-bit is available only in 3-byte VEX prefix that starts with byte C4.
             assert(hasVexPrefix(code));
 
@@ -1578,8 +1563,7 @@ emitter::code_t emitter::AddRexBPrefix(const instrDesc* id, code_t code)
 // Adds REX prefix (0x40) without W, R, X or B bits set
 emitter::code_t emitter::AddRexPrefix(instruction ins, code_t code)
 {
-    assert(!UseVEXEncoding() || !IsVexEncodableInstruction(ins));
-    assert(!UseEvexEncoding() || !IsEvexEncodableInstruction(ins));
+    assert(!IsVexEncodableInstruction(ins));
     return code | 0x4000000000ULL;
 }
 
@@ -2159,103 +2143,69 @@ unsigned emitter::emitGetAdjustedSize(instrDesc* id, code_t code) const
     instruction ins          = id->idIns();
     unsigned    adjustedSize = 0;
 
-    // TODO-XArch-AVX512: Remove redundant code and possiblly collapse EVEX and VEX into a single pathway
-    // IsEvexEncodableInstruction(ins) is `true` for AVX/SSE instructions also which needs to be VEX encoded unless
-    // explicitly
-    // asked for EVEX.
-    if (IsEvexEncodableInstruction(ins) && TakesEvexPrefix(id))
+    if (IsVexOrEvexEncodableInstruction(ins))
     {
-        // EVEX prefix encodes some bytes of the opcode and as a result, overall size of the instruction reduces.
-        // Therefore, to estimate the size adding EVEX prefix size and size of instruction opcode bytes will always
+        unsigned prefixAdjustedSize = 0;
+
+        // VEX/EVEX prefix encodes some bytes of the opcode and as a result, overall size of the instruction reduces.
+        // Therefore, to estimate the size adding VEX/EVEX prefix size and size of instruction opcode bytes will always
         // overstimate.
-        // Instead this routine will adjust the size of EVEX prefix based on the number of bytes of opcode it encodes so
-        // that
-        // instruction size estimate will be accurate.
-        // Basically this  will decrease the evexPrefixSize, so that opcodeSize + evexPrefixAdjustedSize will be the
+        //
+        // Instead this routine will adjust the size of VEX/EVEX prefix based on the number of bytes of opcode it encodes so
+        // that instruction size estimate will be accurate.
+        //
+        // Basically this  will decrease the prefixSize, so that opcodeSize + prefixAdjustedSize will be the
         // right size.
         //
-        // rightOpcodeSize + evexPrefixSize
-        //  = (opcodeSize - ExtrabytesSize) + evexPrefixSize
-        //  = opcodeSize + (evexPrefixSize - ExtrabytesSize)
-        //  = opcodeSize + evexPrefixAdjustedSize
-
-        unsigned evexPrefixAdjustedSize = emitGetEvexPrefixSize(id);
-        assert(evexPrefixAdjustedSize == 4);
-
-        // In this case, opcode will contains escape prefix at least one byte,
-        // simdPrefixAdjustedSize should be minus one.
-        evexPrefixAdjustedSize -= 1;
+        // rightOpcodeSize + prefixSize
+        //  = (opcodeSize - extraBytesSize) + prefixSize
+        //  = opcodeSize + (prefixSize - extraBytesSize)
+        //  = opcodeSize + prefixAdjustedSize
 
-        // Get the fourth byte in Opcode.
-        // If this byte is non-zero, then we should check whether the opcode contains SIMD prefix or not.
-        BYTE check = (code >> 24) & 0xFF;
-        if (check != 0)
+        if (TakesEvexPrefix(id))
         {
-            // 3-byte opcode: with the bytes ordered as 0x2211RM33 or
-            // 4-byte opcode: with the bytes ordered as 0x22114433
-            // Simd prefix is at the first byte.
-            BYTE sizePrefix = (code >> 16) & 0xFF;
-            if (sizePrefix != 0 && isPrefix(sizePrefix))
-            {
-                evexPrefixAdjustedSize -= 1;
-            }
-
-            // If the opcode size is 4 bytes, then the second escape prefix is at fourth byte in opcode.
-            // But in this case the opcode has not counted R\M part.
-            // opcodeSize + evexPrefixAdjustedSize - ExtraEscapePrefixSize + ModR\MSize
-            //=opcodeSize + evexPrefixAdjustedSize -1 + 1
-            //=opcodeSize + evexPrefixAdjustedSize
-            // So although we may have second byte escape prefix, we won't decrease evexPrefixAdjustedSize.
+            prefixAdjustedSize = emitGetEvexPrefixSize(id);
+            assert(prefixAdjustedSize == 4);
         }
+        else
+        {
+            assert(IsVexEncodableInstruction(ins));
 
-        adjustedSize = evexPrefixAdjustedSize;
-    }
-    else if (IsVexEncodableInstruction(ins))
-    {
-        // VEX prefix encodes some bytes of the opcode and as a result, overall size of the instruction reduces.
-        // Therefore, to estimate the size adding VEX prefix size and size of instruction opcode bytes will always
-        // overstimate.
-        // Instead this routine will adjust the size of VEX prefix based on the number of bytes of opcode it encodes so
-        // that
-        // instruction size estimate will be accurate.
-        // Basically this  will decrease the vexPrefixSize, so that opcodeSize + vexPrefixAdjustedSize will be the right
-        // size.
-        //
-        // rightOpcodeSize + vexPrefixSize
-        //  = (opcodeSize - ExtrabytesSize) + vexPrefixSize
-        //  = opcodeSize + (vexPrefixSize - ExtrabytesSize)
-        //  = opcodeSize + vexPrefixAdjustedSize
+            prefixAdjustedSize = emitGetVexPrefixSize(id);
+            assert((prefixAdjustedSize == 2) || (prefixAdjustedSize == 3));
+        }
 
-        unsigned simdPrefixAdjustedSize = emitGetVexPrefixSize(id);
-        assert((simdPrefixAdjustedSize == 2) || (simdPrefixAdjustedSize == 3));
+        assert(prefixAdjustedSize != 0);
 
         // In this case, opcode will contains escape prefix at least one byte,
-        // vexPrefixAdjustedSize should be minus one.
-        simdPrefixAdjustedSize -= 1;
+        // prefixAdjustedSize should be minus one.
+        prefixAdjustedSize -= 1;
 
         // Get the fourth byte in Opcode.
         // If this byte is non-zero, then we should check whether the opcode contains SIMD prefix or not.
         BYTE check = (code >> 24) & 0xFF;
+
         if (check != 0)
         {
             // 3-byte opcode: with the bytes ordered as 0x2211RM33 or
             // 4-byte opcode: with the bytes ordered as 0x22114433
             // Simd prefix is at the first byte.
             BYTE sizePrefix = (code >> 16) & 0xFF;
+
             if (sizePrefix != 0 && isPrefix(sizePrefix))
             {
-                simdPrefixAdjustedSize -= 1;
+                prefixAdjustedSize -= 1;
             }
 
             // If the opcode size is 4 bytes, then the second escape prefix is at fourth byte in opcode.
             // But in this case the opcode has not counted R\M part.
-            // opcodeSize + VexPrefixAdjustedSize - ExtraEscapePrefixSize + ModR\MSize
-            //=opcodeSize + VexPrefixAdjustedSize -1 + 1
-            //=opcodeSize + VexPrefixAdjustedSize
-            // So although we may have second byte escape prefix, we won't decrease vexPrefixAdjustedSize.
+            // opcodeSize + prefixAdjustedSize - extraEscapePrefixSize + modRMSize
+            //  = opcodeSize + prefixAdjustedSize -1 + 1
+            //  = opcodeSize + prefixAdjustedSize
+            // So although we may have second byte escape prefix, we won't decrease prefixAdjustedSize.
         }
 
-        adjustedSize = simdPrefixAdjustedSize;
+        adjustedSize = prefixAdjustedSize;
     }
     else if (Is4ByteSSEInstruction(ins))
     {
@@ -3041,15 +2991,16 @@ inline emitter::code_t emitter::insEncodeReg3456(const instrDesc* id, regNumber
 
     // Both prefix encodes register operand in 1's complement form
     assert(regBits <= 0xF);
-    if (UseEvexEncoding() && IsEvexEncodableInstruction(ins))
+
+    if (IsVexOrEvexEncodableInstruction(ins))
     {
-        if (TakesEvexPrefix(id) && codeEvexMigrationCheck(code))
+        if (TakesEvexPrefix(id) && codeEvexMigrationCheck(code)) // TODO-XArch-AVX512: Remove codeEvexMigrationCheck().
         {
-            assert(hasEvexPrefix(code) && TakesEvexPrefix(id));
+            assert(hasEvexPrefix(code));
 
-// TODO-XARCH-AVX512 I don't like that we redefine regBits on the EVEX case.
-// Rather see these paths cleaned up.
 #if defined(TARGET_AMD64)
+            // TODO-XARCH-AVX512 I don't like that we redefine regBits on the EVEX case.
+            // Rather see these paths cleaned up.
             regBits = HighAwareRegEncoding(reg);
 
             if (IsHighSIMDReg(reg))
@@ -3058,19 +3009,14 @@ inline emitter::code_t emitter::insEncodeReg3456(const instrDesc* id, regNumber
                 code = AddEvexVPrimePrefix(code);
             }
 #endif
+
             // Shift count = 5-bytes of opcode + 0-2 bits for EVEX
             regBits <<= 43;
             return code ^ regBits;
         }
-    }
-    if (UseVEXEncoding() && IsVexEncodableInstruction(ins))
-    {
-
-        // Both prefix encodes register operand in 1's complement form
-        assert(regBits <= 0xF);
-
-        if (TakesVexPrefix(ins))
+        else
         {
+            assert(IsVexEncodableInstruction(ins));
             assert(hasVexPrefix(code));
 
             // Shift count = 4-bytes of opcode + 0-2 bits for VEX
@@ -3078,6 +3024,7 @@ inline emitter::code_t emitter::insEncodeReg3456(const instrDesc* id, regNumber
             return code ^ regBits;
         }
     }
+
     return code ^ regBits;
 }
 

From 56f27aad8e2cd8435960a1d4805a7ade46a9cf16 Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Sun, 19 Mar 2023 09:48:04 -0700
Subject: [PATCH 06/16] Tweak the Vector512 ISA check to properly account for
 VL

---
 src/coreclr/jit/compiler.cpp | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp
index 1f46f4dee235e..8a6abbbe62f26 100644
--- a/src/coreclr/jit/compiler.cpp
+++ b/src/coreclr/jit/compiler.cpp
@@ -2287,15 +2287,15 @@ void Compiler::compSetProcessor()
     // the overall JIT implementation, we currently require the entire set of ISAs to be
     // supported and disable AVX512 support otherwise.
 
-    if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW) &&
-        instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD) &&
-        instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ) &&
-        instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL))
+    if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW_VL) &&
+        instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD_VL) &&
+        instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ_VL))
     {
+        assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW));
+        assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD));
+        assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ));
         assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F));
-        assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW_VL));
-        assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD_VL));
-        assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ_VL));
+        assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL));
 
         instructionSetFlags.AddInstructionSet(InstructionSet_Vector512);
     }

From d8f9ef768f7f1d4a1fc660264b5976cf426e109f Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Sun, 19 Mar 2023 10:05:22 -0700
Subject: [PATCH 07/16] Applying formatting patch

---
 src/coreclr/jit/emitxarch.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index a64cd88e01b02..07e01c335ac61 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -1036,7 +1036,7 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const
         return false;
     }
 
-    if (HasHighSIMDReg(id) || (id->idOpSize() == OPSZ64) || HasKMaskRegisterDest(ins))
+    if (HasHighSIMDReg(id) || (id->idOpSize() == EA_64BYTE) || HasKMaskRegisterDest(ins))
     {
         // Requires the EVEX encoding due to used registers
         return true;
@@ -2151,8 +2151,8 @@ unsigned emitter::emitGetAdjustedSize(instrDesc* id, code_t code) const
         // Therefore, to estimate the size adding VEX/EVEX prefix size and size of instruction opcode bytes will always
         // overstimate.
         //
-        // Instead this routine will adjust the size of VEX/EVEX prefix based on the number of bytes of opcode it encodes so
-        // that instruction size estimate will be accurate.
+        // Instead this routine will adjust the size of VEX/EVEX prefix based on the number of bytes of opcode it
+        // encodes so that instruction size estimate will be accurate.
         //
         // Basically this  will decrease the prefixSize, so that opcodeSize + prefixAdjustedSize will be the
         // right size.

From 09741714eaf465196bf93cf44ef84f868fbade0d Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Sun, 19 Mar 2023 16:20:29 -0700
Subject: [PATCH 08/16] Ensure we're checking for actual KMask usage and not
 just potential usage

---
 src/coreclr/jit/emitxarch.cpp | 52 ++++++++++++++++++++++++++++++-----
 src/coreclr/jit/emitxarch.h   |  4 ++-
 2 files changed, 48 insertions(+), 8 deletions(-)

diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index 07e01c335ac61..c905f60410ded 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -1036,17 +1036,19 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const
         return false;
     }
 
-    if (HasHighSIMDReg(id) || (id->idOpSize() == EA_64BYTE) || HasKMaskRegisterDest(ins))
+    if (HasHighSIMDReg(id) || (id->idOpSize() == EA_64BYTE) || HasMaskReg(id))
     {
         // Requires the EVEX encoding due to used registers
         return true;
     }
 
-    // TODO-XArch-AVX512: This needs to return true when the id includes EVEX specific functionality:
-    // * masking
-    // * embedded broadcast
-    // * embedded rounding control
-    // * etc
+    if (HasEmbeddedBroadcast(id))
+    {
+        // TODO-XArch-AVX512: This needs to return true when the id includes:
+        // * embedded rounding control
+        // * other EVEX specific functionality
+        return true;
+    }
 
     // Only supports the EVEX encoding or in stress mode to always use EVEX encoding
     return !IsVexEncodableInstruction(ins) || emitComp->DoJitStressEvexEncoding();
@@ -1302,7 +1304,7 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const
 }
 
 //------------------------------------------------------------------------
-// HasHighSIMReg: Checks if an instruction uses a high SIMD registers (mm16-mm31)
+// HasHighSIMDReg: Checks if an instruction uses a high SIMD registers (mm16-mm31)
 // and will require one of the EVEX high SIMD bits (EVEX.R', EVEX.V', EVEX.X)
 //
 // Arguments:
@@ -1345,6 +1347,42 @@ bool emitter::IsHighSIMDReg(regNumber reg) const
 #endif
 }
 
+//------------------------------------------------------------------------
+// HasMaskReg: Checks if an instruction uses a KMask registers (k0-k7)
+//
+// Arguments:
+// id -- instruction descriptor for encoding
+//
+// Return Value:
+// true if instruction will require EVEX encoding for its register operands.
+bool emitter::HasMaskReg(const instrDesc* id) const
+{
+    if (isMaskReg(id->idReg1()))
+    {
+        assert(HasKMaskRegisterDest(id->idIns()));
+        return true;
+    }
+
+#if defined(DEBUG)
+    assert(!isMaskReg(id->idReg2()));
+
+    if (!id->idIsSmallDsc())
+    {
+        if (id->idHasReg3())
+        {
+            assert(!isMaskReg(id->idReg3()));
+        }
+
+        if (id->idHasReg4())
+        {
+            assert(!isMaskReg(id->idReg4()));
+        }
+    }
+#endif // DEBUG
+
+    return false;
+}
+
 // Returns true if using this register will require a REX.* prefix.
 // Since XMM registers overlap with YMM registers, this routine
 // can also be used to know whether a YMM register if the
diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h
index 16ac15eb5f1b3..771051b6ad1d1 100644
--- a/src/coreclr/jit/emitxarch.h
+++ b/src/coreclr/jit/emitxarch.h
@@ -812,7 +812,7 @@ inline bool emitIsUncondJump(instrDesc* jmp)
 // Returns:
 //    `true` if the instruction does embedded broadcast.
 //
-inline bool HasEmbeddedBroadcast(instrDesc* id)
+inline bool HasEmbeddedBroadcast(const instrDesc* id) const
 {
     return false;
 }
@@ -820,4 +820,6 @@ inline bool HasEmbeddedBroadcast(instrDesc* id)
 inline bool HasHighSIMDReg(const instrDesc* id) const;
 inline bool IsHighSIMDReg(regNumber) const;
 
+inline bool HasMaskReg(const instrDesc* id) const;
+
 #endif // TARGET_XARCH

From 2328f46a7e16382e3ac3add35b0280f3e01b9634 Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Mon, 20 Mar 2023 08:03:53 -0700
Subject: [PATCH 09/16] Fixing CORJIT_ALLOCMEM_FLG_RODATA_64BYTE_ALIGN for the
 managed VM

---
 src/coreclr/jit/emit.cpp                              | 11 ++++++++---
 src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs |  2 +-
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp
index dbcd64b7254ee..c61b9e885f031 100644
--- a/src/coreclr/jit/emit.cpp
+++ b/src/coreclr/jit/emit.cpp
@@ -6546,7 +6546,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
     coldCodeBlock = nullptr;
 
     // This restricts the data alignment to: 4, 8, 16, 32 or 64 bytes
-    // Alignments greater than 32 would require VM support in ICorJitInfo::allocMem
+    // Alignments greater than 64 would require VM support in ICorJitInfo::allocMem
     uint32_t dataAlignment = emitConsDsc.alignment;
     assert((dataSection::MIN_DATA_ALIGN <= dataAlignment) && (dataAlignment <= dataSection::MAX_DATA_ALIGN) &&
            isPow2(dataAlignment));
@@ -6665,11 +6665,16 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
     {
         assert(((size_t)codeBlock & 15) == 0);
     }
-    if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_RODATA_32BYTE_ALIGN) != 0)
+
+    if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_RODATA_64BYTE_ALIGN) != 0)
+    {
+        assert(((size_t)consBlock & 63) == 0);
+    }
+    else if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_RODATA_32BYTE_ALIGN) != 0)
     {
         assert(((size_t)consBlock & 31) == 0);
     }
-    if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_RODATA_16BYTE_ALIGN) != 0)
+    else if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_RODATA_16BYTE_ALIGN) != 0)
     {
         assert(((size_t)consBlock & 15) == 0);
     }
diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs
index 98afd9ae0902a..7126cb7315111 100644
--- a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs
+++ b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs
@@ -742,7 +742,7 @@ public enum CorJitAllocMemFlag
         CORJIT_ALLOCMEM_FLG_RODATA_16BYTE_ALIGN = 0x00000002, // The read-only data will be 16-byte aligned
         CORJIT_ALLOCMEM_FLG_32BYTE_ALIGN   = 0x00000004, // The code will be 32-byte aligned
         CORJIT_ALLOCMEM_FLG_RODATA_32BYTE_ALIGN = 0x00000008, // The read-only data will be 32-byte aligned
-        CORJIT_ALLOCMEM_FLG_RODATA_64BYTE_ALIGN = 0x00000008, // The read-only data will be 64-byte aligned
+        CORJIT_ALLOCMEM_FLG_RODATA_64BYTE_ALIGN = 0x00000010, // The read-only data will be 64-byte aligned
     }
 
     public enum CorJitFuncKind

From 081516d41c4f8efcfe9edc5eee5e146fc5d9a761 Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Mon, 20 Mar 2023 08:36:31 -0700
Subject: [PATCH 10/16] Fixing the DoJitStressEvexEncoding check to account for
 VEX vs EVEX differences

---
 src/coreclr/jit/emitxarch.cpp | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index c905f60410ded..e73bee5fb5ca4 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -1032,10 +1032,16 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const
 
     if (!IsEvexEncodableInstruction(ins))
     {
-        // Can't be EVEX encoded ever
+        // Never supports the EVEX encoding
         return false;
     }
 
+    if (!IsVexEncodableInstruction(ins))
+    {
+        // Only supports the EVEX encoding
+        return true;
+    }
+
     if (HasHighSIMDReg(id) || (id->idOpSize() == EA_64BYTE) || HasMaskReg(id))
     {
         // Requires the EVEX encoding due to used registers
@@ -1044,14 +1050,28 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const
 
     if (HasEmbeddedBroadcast(id))
     {
+        // Requires the EVEX encoding due to embedded functionality
+        //
         // TODO-XArch-AVX512: This needs to return true when the id includes:
         // * embedded rounding control
         // * other EVEX specific functionality
         return true;
     }
 
-    // Only supports the EVEX encoding or in stress mode to always use EVEX encoding
-    return !IsVexEncodableInstruction(ins) || emitComp->DoJitStressEvexEncoding();
+#if defined(DEBUG)
+    if (emitComp->DoJitStressEvexEncoding())
+    {
+        // Requires the EVEX encoding due to STRESS mode and no change in semantics
+        //
+        // Some instructions, like VCMPEQW return the value in a SIMD register for
+        // VEX but in a MASK register for EVEX. Such instructions will have already
+        // returned TRUE if they should have used EVEX due to the HasMaskReg(id)
+        // check above so we need to still return false here to preserve semantics.
+        return !HasKMaskRegisterDest(ins);
+    }
+#endif // DEBUG
+
+    return false;
 }
 
 // Intel AVX-512 encoding is defined in "Intel 64 and ia-32 architectures software developer's manual volume 2", Section

From 222e97207af11202ec019995a0a54dbf2d7a4c90 Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Tue, 21 Mar 2023 06:49:11 -0700
Subject: [PATCH 11/16] Break apart an overly long assert

---
 src/coreclr/jit/emitxarch.cpp | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp
index 56166f4a55b04..4fab45b3b94b9 100644
--- a/src/coreclr/jit/emitxarch.cpp
+++ b/src/coreclr/jit/emitxarch.cpp
@@ -13416,9 +13416,17 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
         // When SMALL_CODE is set, we only expect 4-byte alignment, otherwise
         // we expect the same alignment as the size of the constant.
 
-        assert((emitChkAlign == false) || (ins == INS_lea) ||
-               ((emitComp->compCodeOpt() == Compiler::SMALL_CODE) && (((size_t)addr & 3) == 0)) ||
-               (((size_t)addr & (byteSize - 1)) == 0));
+        if (emitChkAlign && (ins != INS_lea))
+        {
+            if (emitComp->compCodeOpt() == Compiler::SMALL_CODE)
+            {
+                assert((reinterpret_cast<size_t>(addr) & 3) == 0);
+            }
+            else
+            {
+                assert((reinterpret_cast<size_t>(addr) & (byteSize - 1)) == 0);
+            }
+        }
 #endif // DEBUG
     }
     else

From 0b9dbb382bdd8d8d70301d15c9ca896a9d0d0f26 Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Tue, 21 Mar 2023 09:18:53 -0700
Subject: [PATCH 12/16] Ensure IsBaselineVector512IsaSupported works on x86

---
 src/coreclr/jit/compiler.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h
index 0f83c632a6d16..a1f1a879f2b2d 100644
--- a/src/coreclr/jit/compiler.h
+++ b/src/coreclr/jit/compiler.h
@@ -9170,7 +9170,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     //
     bool IsBaselineVector512IsaSupportedDebugOnly() const
     {
-#ifdef TARGET_AMD64
+#ifdef TARGET_XARCH
         return (compIsaSupportedDebugOnly(InstructionSet_Vector512));
 #else
         return false;
@@ -9186,7 +9186,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
     //
     bool IsBaselineVector512IsaSupported() const
     {
-#ifdef TARGET_AMD64
+#ifdef TARGET_XARCH
         return (compExactlyDependsOn(InstructionSet_Vector512));
 #else
         return false;

From 9dfbd7fb4108381a1fc6314f070db8a1ce22c7af Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Tue, 21 Mar 2023 11:31:04 -0700
Subject: [PATCH 13/16] Block NI_Vector512_ExtractMostSignificantBits on x86
 pending decomposition work

---
 src/coreclr/jit/hwintrinsicxarch.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp
index f572e538689d8..3887dc8016b77 100644
--- a/src/coreclr/jit/hwintrinsicxarch.cpp
+++ b/src/coreclr/jit/hwintrinsicxarch.cpp
@@ -1341,6 +1341,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
 
         case NI_Vector512_ExtractMostSignificantBits:
         {
+#if defined(TARGET_X86)
+            // TODO-XARCH-CQ: It may be beneficial to decompose this operation
+            break;
+#endif // TARGET_X86
+
             if (IsBaselineVector512IsaSupported())
             {
                 var_types simdType = getSIMDTypeForSize(simdSize);

From 1f6fa3da996d16a7a65e98797910b6c7d938c642 Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Tue, 21 Mar 2023 12:06:30 -0700
Subject: [PATCH 14/16] Ensure we don't overwrite 64-byte alignment for SPMI

---
 src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp b/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp
index c627a042d4f35..df29582d30b85 100644
--- a/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp
+++ b/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp
@@ -1657,7 +1657,7 @@ void MyICJI::allocMem(AllocMemArgs* pArgs)
         {
             roDataAlignment = 64;
         }
-        if ((pArgs->flag & CORJIT_ALLOCMEM_FLG_RODATA_32BYTE_ALIGN) != 0)
+        else if ((pArgs->flag & CORJIT_ALLOCMEM_FLG_RODATA_32BYTE_ALIGN) != 0)
         {
             roDataAlignment = 32;
         }

From b713f69bd3fa6698e480822587eb05f486de6d9c Mon Sep 17 00:00:00 2001
From: Tanner Gooding <tagoo@outlook.com>
Date: Tue, 21 Mar 2023 12:09:52 -0700
Subject: [PATCH 15/16] Include Vector512 HWIntrinsic tests by default

---
 src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_r.csproj  | 4 ++--
 src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_r.csproj b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_r.csproj
index 696c3d04a6dd1..4e87ff605efc7 100644
--- a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_r.csproj
+++ b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_r.csproj
@@ -21,8 +21,8 @@
     <MergedWrapperProjectReference Remove="General/Vector64_1/**/*.csproj" Condition="'$(_IncludeArm64HWIntrinsicTests)' != 'true'" />
     <MergedWrapperProjectReference Remove="General/Vector256/**/*.csproj" Condition="'$(_IncludeXarchHWIntrinsicTests)' != 'true'" />
     <MergedWrapperProjectReference Remove="General/Vector256_1/**/*.csproj" Condition="'$(_IncludeXarchHWIntrinsicTests)' != 'true'" />
-    <MergedWrapperProjectReference Remove="General/Vector512/**/*.csproj" Condition="'$(CLRTestPriorityToBuild)' &lt; '1'" />
-    <MergedWrapperProjectReference Remove="General/Vector512_1/**/*.csproj" Condition="'$(CLRTestPriorityToBuild)' &lt; '1'" />
+    <MergedWrapperProjectReference Remove="General/Vector512/**/*.csproj" Condition="'$(_IncludeXarchHWIntrinsicTests)' != 'true'" />
+    <MergedWrapperProjectReference Remove="General/Vector512_1/**/*.csproj" Condition="'$(_IncludeXarchHWIntrinsicTests)' != 'true'" />
     <MergedWrapperProjectReference Remove="X86/**/*.csproj" Condition="'$(_IncludeXarchHWIntrinsicTests)' != 'true'" />
 
   </ItemGroup>
diff --git a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj
index e9fd5bcfd4999..25f6749739539 100644
--- a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj
+++ b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj
@@ -22,8 +22,8 @@
     <MergedWrapperProjectReference Remove="General/Vector64_1/**/*_ro.csproj" Condition="'$(_IncludeArm64HWIntrinsicTests)' != 'true'" />
     <MergedWrapperProjectReference Remove="General/Vector256/**/*_ro.csproj" Condition="'$(_IncludeXarchHWIntrinsicTests)' != 'true'" />
     <MergedWrapperProjectReference Remove="General/Vector256_1/**/*_ro.csproj" Condition="'$(_IncludeXarchHWIntrinsicTests)' != 'true'" />
-    <MergedWrapperProjectReference Remove="General/Vector512/**/*_ro.csproj" Condition="'$(CLRTestPriorityToBuild)' &lt; '1'" />
-    <MergedWrapperProjectReference Remove="General/Vector512_1/**/*_ro.csproj" Condition="'$(CLRTestPriorityToBuild)' &lt; '1'" />
+    <MergedWrapperProjectReference Remove="General/Vector512/**/*_ro.csproj" Condition="'$(_IncludeXarchHWIntrinsicTests)' != 'true'" />
+    <MergedWrapperProjectReference Remove="General/Vector512_1/**/*_ro.csproj" Condition="'$(_IncludeXarchHWIntrinsicTests)' != 'true'" />
     <MergedWrapperProjectReference Remove="X86/**/*_ro.csproj" Condition="'$(_IncludeXarchHWIntrinsicTests)' != 'true'" />
   </ItemGroup>
 

From 5dbbc054810cc1f7b92958a411df2b2fa23fb729 Mon Sep 17 00:00:00 2001
From: Bruce Forstall <brucefo@microsoft.com>
Date: Thu, 23 Mar 2023 23:50:14 -0700
Subject: [PATCH 16/16] WIP

---
 src/coreclr/inc/clrconfigvalues.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h
index 9dd74d914ad74..c2c28da395920 100644
--- a/src/coreclr/inc/clrconfigvalues.h
+++ b/src/coreclr/inc/clrconfigvalues.h
@@ -754,7 +754,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512CD,     W("EnableAVX512CD"),     1
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512CD_VL,  W("EnableAVX512CD_VL"),  1, "Allows AVX512CD_VL+ hardware intrinsics to be disabled")
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512DQ,     W("EnableAVX512DQ"),     1, "Allows AVX512DQ+ hardware intrinsics to be disabled")
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512DQ_VL,  W("EnableAVX512DQ_VL"),  1, "Allows AVX512DQ_VL+ hardware intrinsics to be disabled")
-RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512F,      W("EnableAVX512F"),      1, "Allows AVX512F+ hardware intrinsics to be disabled")
+RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512F,      W("EnableAVX512F"),      0, "Allows AVX512F+ hardware intrinsics to be disabled")
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512F_VL,   W("EnableAVX512F_VL"),   1, "Allows AVX512F_VL+ hardware intrinsics to be disabled")
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNI,      W("EnableAVXVNNI"),      1, "Allows AVX VNNI+ hardware intrinsics to be disabled")
 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI1,         W("EnableBMI1"),         1, "Allows BMI1+ hardware intrinsics to be disabled")