From bdce1f4a6155e4230e70da4c4612050e8cea518b Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 19 Mar 2023 08:02:54 -0700 Subject: [PATCH 01/16] Remove JitForceEVEXEncoding in favor of the existing AltJit enablement --- src/coreclr/jit/compiler.cpp | 49 +++++++++++++++++++++++---- src/coreclr/jit/compiler.h | 22 +----------- src/coreclr/jit/jitconfigvalues.h | 1 - src/tests/Common/testenvironment.proj | 8 ++--- 4 files changed, 47 insertions(+), 33 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 97f9cabaf6f47..8db104191d61c 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2289,8 +2289,7 @@ void Compiler::compSetProcessor() { // Using JitStressEVEXEncoding flag will force instructions which would // otherwise use VEX encoding but can be EVEX encoded to use EVEX encoding - // This requires AVX512VL support. JitForceEVEXEncoding forces this encoding, thus - // causing failure if not running on compatible hardware. + // This requires AVX512VL support. // We can't use !DoJitStressEvexEncoding() yet because opts.compSupportsISA hasn't // been set yet as that's what we're trying to set here @@ -2298,11 +2297,7 @@ void Compiler::compSetProcessor() bool enableAvx512 = false; #if defined(DEBUG) - if (JitConfig.JitForceEVEXEncoding()) - { - enableAvx512 = true; - } - else if (JitConfig.JitStressEvexEncoding() && instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL)) + if (JitConfig.JitStressEvexEncoding() && instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL)) { enableAvx512 = true; } @@ -6028,6 +6023,46 @@ int Compiler::compCompile(CORINFO_MODULE_HANDLE classPtr, { instructionSetFlags.AddInstructionSet(InstructionSet_AVXVNNI); } + + if (JitConfig.EnableAVX512F() != 0) + { + instructionSetFlags.AddInstructionSet(InstructionSet_AVX512F); + } + + if (JitConfig.EnableAVX512F_VL() != 0) + { + instructionSetFlags.AddInstructionSet(InstructionSet_AVX512F_VL); + } + + if (JitConfig.EnableAVX512BW() != 0) + { + instructionSetFlags.AddInstructionSet(InstructionSet_AVX512BW); + } + + if (JitConfig.EnableAVX512BW_VL() != 0) + { + instructionSetFlags.AddInstructionSet(InstructionSet_AVX512BW_VL); + } + + if (JitConfig.EnableAVX512CD() != 0) + { + instructionSetFlags.AddInstructionSet(InstructionSet_AVX512CD); + } + + if (JitConfig.EnableAVX512CD_VL() != 0) + { + instructionSetFlags.AddInstructionSet(InstructionSet_AVX512CD_VL); + } + + if (JitConfig.EnableAVX512DQ() != 0) + { + instructionSetFlags.AddInstructionSet(InstructionSet_AVX512DQ); + } + + if (JitConfig.EnableAVX512DQ_VL() != 0) + { + instructionSetFlags.AddInstructionSet(InstructionSet_AVX512DQ_VL); + } #endif // These calls are important and explicitly ordered to ensure that the flags are correct in diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 8a6f8e775ae14..23c6a9b364dfe 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9197,13 +9197,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #ifdef TARGET_XARCH bool canUseVexEncoding() const { -#ifdef DEBUG - if (JitConfig.JitForceEVEXEncoding()) - { - return true; - } -#endif // DEBUG - return compOpportunisticallyDependsOn(InstructionSet_AVX); } @@ -9215,13 +9208,6 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // bool canUseEvexEncoding() const { -#ifdef DEBUG - if (JitConfig.JitForceEVEXEncoding()) - { - return true; - } -#endif // DEBUG - return compOpportunisticallyDependsOn(InstructionSet_AVX512F); } @@ -9236,13 +9222,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #ifdef DEBUG // Using JitStressEVEXEncoding flag will force instructions which would // otherwise use VEX encoding but can be EVEX encoded to use EVEX encoding - // This requires AVX512VL support. JitForceEVEXEncoding forces this encoding, thus - // causing failure if not running on compatible hardware. - - if (JitConfig.JitForceEVEXEncoding()) - { - return true; - } + // This requires AVX512VL support. if (JitConfig.JitStressEvexEncoding() && compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL)) { diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 5d0ea994c46c8..449a02bcd0a22 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -304,7 +304,6 @@ CONFIG_INTEGER(EnableMultiRegLocals, W("EnableMultiRegLocals"), 1) // Enable the #if defined(DEBUG) CONFIG_INTEGER(JitStressEvexEncoding, W("JitStressEvexEncoding"), 0) // Enable EVEX encoding for SIMD instructions when // AVX-512VL is available. -CONFIG_INTEGER(JitForceEVEXEncoding, W("JitForceEVEXEncoding"), 0) // Force EVEX encoding for SIMD instructions. #endif // clang-format off diff --git a/src/tests/Common/testenvironment.proj b/src/tests/Common/testenvironment.proj index 70c89a0787687..50de34656eca2 100644 --- a/src/tests/Common/testenvironment.proj +++ b/src/tests/Common/testenvironment.proj @@ -36,7 +36,7 @@ DOTNET_EnableSSE41; DOTNET_EnableSSE42; DOTNET_EnableSSSE3; - DOTNET_JitForceEVEXEncoding; + DOTNET_JitStressEvexEncoding; DOTNET_ForceRelocs; DOTNET_GCStress; DOTNET_GCName; @@ -156,8 +156,8 @@ - - + + @@ -260,7 +260,7 @@ 2) Otherwise, the test scenario defines such DOTNET_* environment variable and the specified value of this variable can be extracted by using Metadata() item function. - + 3) This is where we process a pseudo-value of "random" in the DOTNET table into an actual random number. --> From 30f54f19f7ac35bc45391a4d5ab4bbb5f63097c2 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 19 Mar 2023 08:21:38 -0700 Subject: [PATCH 02/16] Rename IsVexEncodingInstruction to IsVexEncodableInstruction --- src/coreclr/jit/emitxarch.cpp | 92 +++++++++++++++++------------------ src/coreclr/jit/emitxarch.h | 6 +-- src/coreclr/jit/instr.cpp | 2 +- 3 files changed, 50 insertions(+), 50 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index e239810c575ed..c17c18dab8ba1 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -159,7 +159,7 @@ regNumber emitter::getSseShiftRegNumber(instruction ins) } } -bool emitter::IsVexEncodedInstruction(instruction ins) const +bool emitter::IsVexEncodableInstruction(instruction ins) const { if (!UseVEXEncoding()) { @@ -174,7 +174,7 @@ bool emitter::IsVexEncodedInstruction(instruction ins) const } //------------------------------------------------------------------------ -// IsEvexEncodedInstruction: Answer the question- Can this instruction be Evex encoded. +// IsEvexEncodableInstruction: Answer the question- Can this instruction be Evex encoded. // // Arguments: // ins - The instruction to check. @@ -182,7 +182,7 @@ bool emitter::IsVexEncodedInstruction(instruction ins) const // Returns: // `true` if ins can be Evex encoded. // -bool emitter::IsEvexEncodedInstruction(instruction ins) const +bool emitter::IsEvexEncodableInstruction(instruction ins) const { if (!UseEvexEncoding()) { @@ -205,9 +205,9 @@ bool emitter::IsEvexEncodedInstruction(instruction ins) const // Returns: // `true` if ins is a SIMD instruction. // -bool emitter::IsVexOrEvexEncodedInstruction(instruction ins) const +bool emitter::IsVexOrEvexEncodableInstruction(instruction ins) const { - return IsVexEncodedInstruction(ins) || IsEvexEncodedInstruction(ins); + return IsVexEncodableInstruction(ins) || IsEvexEncodableInstruction(ins); } // Returns true if the AVX instruction is a binary operator that requires 3 operands. @@ -1034,7 +1034,7 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const { instruction ins = id->idIns(); - if (!IsEvexEncodedInstruction(ins)) + if (!IsEvexEncodableInstruction(ins)) { return false; } @@ -1102,7 +1102,7 @@ emitter::code_t emitter::AddEvexPrefix(instruction ins, code_t code, emitAttr at { // Only AVX512 instructions require EVEX prefix - assert(IsEvexEncodedInstruction(ins)); + assert(IsEvexEncodableInstruction(ins)); // Shouldn't have already added EVEX prefix assert(!hasEvexPrefix(code)); @@ -1146,7 +1146,7 @@ bool emitter::TakesVexPrefix(instruction ins) const break; } - return IsVexEncodedInstruction(ins); + return IsVexEncodableInstruction(ins); } // Add base VEX prefix without setting W, R, X, or B bits @@ -1183,7 +1183,7 @@ emitter::code_t emitter::AddVexPrefix(instruction ins, code_t code, emitAttr att // emitted, by simply checking that all the requirements were met. // Only AVX instructions require VEX prefix - assert(IsVexEncodedInstruction(ins)); + assert(IsVexEncodableInstruction(ins)); // Shouldn't have already added VEX prefix assert(!hasVexPrefix(code)); @@ -1463,7 +1463,7 @@ emitter::code_t emitter::AddRexWPrefix(const instrDesc* id, code_t code) { instruction ins = id->idIns(); - if (UseEvexEncoding() && IsEvexEncodedInstruction(ins)) + if (UseEvexEncoding() && IsEvexEncodableInstruction(ins)) { if (TakesEvexPrefix(id) && codeEvexMigrationCheck(code)) // TODO-XArch-AVX512: Remove codeEvexMigrationCheck(). { @@ -1474,7 +1474,7 @@ emitter::code_t emitter::AddRexWPrefix(const instrDesc* id, code_t code) return emitter::code_t(code | 0x0000800000000000ULL); } } - if (UseVEXEncoding() && IsVexEncodedInstruction(ins)) + if (UseVEXEncoding() && IsVexEncodableInstruction(ins)) { if (TakesVexPrefix(ins)) { @@ -1499,7 +1499,7 @@ emitter::code_t emitter::AddRexRPrefix(const instrDesc* id, code_t code) { instruction ins = id->idIns(); - if (UseEvexEncoding() && IsEvexEncodedInstruction(ins)) + if (UseEvexEncoding() && IsEvexEncodableInstruction(ins)) { if (TakesEvexPrefix(id) && codeEvexMigrationCheck(code)) // TODO-XArch-AVX512: Remove codeEvexMigrationCheck(). { @@ -1510,7 +1510,7 @@ emitter::code_t emitter::AddRexRPrefix(const instrDesc* id, code_t code) return code & 0xFF7FFFFFFFFFFFFFULL; } } - if (UseVEXEncoding() && IsVexEncodedInstruction(ins)) + if (UseVEXEncoding() && IsVexEncodableInstruction(ins)) { if (TakesVexPrefix(ins)) { @@ -1529,7 +1529,7 @@ emitter::code_t emitter::AddRexXPrefix(const instrDesc* id, code_t code) { instruction ins = id->idIns(); - if (UseEvexEncoding() && IsEvexEncodedInstruction(ins)) + if (UseEvexEncoding() && IsEvexEncodableInstruction(ins)) { if (TakesEvexPrefix(id)) { @@ -1539,7 +1539,7 @@ emitter::code_t emitter::AddRexXPrefix(const instrDesc* id, code_t code) return code & 0xFFBFFFFFFFFFFFFFULL; } } - if (UseVEXEncoding() && IsVexEncodedInstruction(ins)) + if (UseVEXEncoding() && IsVexEncodableInstruction(ins)) { if (TakesVexPrefix(ins)) { @@ -1558,7 +1558,7 @@ emitter::code_t emitter::AddRexBPrefix(const instrDesc* id, code_t code) { instruction ins = id->idIns(); - if (UseEvexEncoding() && IsEvexEncodedInstruction(ins)) + if (UseEvexEncoding() && IsEvexEncodableInstruction(ins)) { if (TakesEvexPrefix(id) && codeEvexMigrationCheck(code)) // TODO-XArch-AVX512: Remove codeEvexMigrationCheck(). { @@ -1569,7 +1569,7 @@ emitter::code_t emitter::AddRexBPrefix(const instrDesc* id, code_t code) return code & 0xFFDFFFFFFFFFFFFFULL; } } - if (UseVEXEncoding() && IsVexEncodedInstruction(ins)) + if (UseVEXEncoding() && IsVexEncodableInstruction(ins)) { if (TakesVexPrefix(ins)) { @@ -1587,8 +1587,8 @@ emitter::code_t emitter::AddRexBPrefix(const instrDesc* id, code_t code) // Adds REX prefix (0x40) without W, R, X or B bits set emitter::code_t emitter::AddRexPrefix(instruction ins, code_t code) { - assert(!UseVEXEncoding() || !IsVexEncodedInstruction(ins)); - assert(!UseEvexEncoding() || !IsEvexEncodedInstruction(ins)); + assert(!UseVEXEncoding() || !IsVexEncodableInstruction(ins)); + assert(!UseEvexEncoding() || !IsEvexEncodableInstruction(ins)); return code | 0x4000000000ULL; } @@ -1663,7 +1663,7 @@ bool isPrefix(BYTE b) // emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) const { - assert(IsEvexEncodedInstruction(ins)); + assert(IsEvexEncodableInstruction(ins)); code_t evexPrefix = (code >> 32) & 0xFFFFFFFF; code &= 0x00000000FFFFFFFFLL; @@ -1799,7 +1799,7 @@ emitter::code_t emitter::emitExtractEvexPrefix(instruction ins, code_t& code) co // emitter::code_t emitter::emitExtractVexPrefix(instruction ins, code_t& code) const { - assert(IsVexEncodedInstruction(ins)); + assert(IsVexEncodableInstruction(ins)); code_t vexPrefix = (code >> 32) & 0x00FFFFFF; code &= 0x00000000FFFFFFFFLL; @@ -2128,7 +2128,7 @@ unsigned emitter::emitGetRexPrefixSize(instruction ins) { // In case of AVX instructions, REX prefixes are part of VEX prefix. // And hence requires no additional byte to encode REX prefixes. - if (IsVexOrEvexEncodedInstruction(ins)) + if (IsVexOrEvexEncodableInstruction(ins)) { return 0; } @@ -2148,7 +2148,7 @@ unsigned emitter::emitGetRexPrefixSize(instruction ins) // unsigned emitter::emitGetEvexPrefixSize(instrDesc* id) const { - assert(IsEvexEncodedInstruction(id->idIns())); + assert(IsEvexEncodableInstruction(id->idIns())); return 4; } @@ -2169,10 +2169,10 @@ unsigned emitter::emitGetAdjustedSize(instrDesc* id, code_t code) const unsigned adjustedSize = 0; // TODO-XArch-AVX512: Remove redundant code and possiblly collapse EVEX and VEX into a single pathway - // IsEvexEncodedInstruction(ins) is `true` for AVX/SSE instructions also which needs to be VEX encoded unless + // IsEvexEncodableInstruction(ins) is `true` for AVX/SSE instructions also which needs to be VEX encoded unless // explicitly // asked for EVEX. - if (IsEvexEncodedInstruction(ins) && TakesEvexPrefix(id)) + if (IsEvexEncodableInstruction(ins) && TakesEvexPrefix(id)) { // EVEX prefix encodes some bytes of the opcode and as a result, overall size of the instruction reduces. // Therefore, to estimate the size adding EVEX prefix size and size of instruction opcode bytes will always @@ -2219,7 +2219,7 @@ unsigned emitter::emitGetAdjustedSize(instrDesc* id, code_t code) const adjustedSize = evexPrefixAdjustedSize; } - else if (IsVexEncodedInstruction(ins)) + else if (IsVexEncodableInstruction(ins)) { // VEX prefix encodes some bytes of the opcode and as a result, overall size of the instruction reduces. // Therefore, to estimate the size adding VEX prefix size and size of instruction opcode bytes will always @@ -2704,7 +2704,7 @@ inline bool hasCodeMR(instruction ins) unsigned emitter::emitGetVexPrefixSize(instrDesc* id) const { instruction ins = id->idIns(); - assert(IsVexEncodedInstruction(ins)); + assert(IsVexEncodableInstruction(ins)); if (EncodedBySSE38orSSE3A(ins)) { @@ -3036,7 +3036,7 @@ inline emitter::code_t emitter::insEncodeReg3456(const instrDesc* id, regNumber instruction ins = id->idIns(); assert(reg < REG_STK); - assert(IsVexOrEvexEncodedInstruction(ins)); + assert(IsVexOrEvexEncodableInstruction(ins)); assert(hasVexOrEvexPrefix(code)); // Get 4-bit register encoding @@ -3050,7 +3050,7 @@ inline emitter::code_t emitter::insEncodeReg3456(const instrDesc* id, regNumber // Both prefix encodes register operand in 1's complement form assert(regBits <= 0xF); - if (UseEvexEncoding() && IsEvexEncodedInstruction(ins)) + if (UseEvexEncoding() && IsEvexEncodableInstruction(ins)) { if (TakesEvexPrefix(id) && codeEvexMigrationCheck(code)) { @@ -3072,7 +3072,7 @@ inline emitter::code_t emitter::insEncodeReg3456(const instrDesc* id, regNumber return code ^ regBits; } } - if (UseVEXEncoding() && IsVexEncodedInstruction(ins)) + if (UseVEXEncoding() && IsVexEncodableInstruction(ins)) { // Both prefix encodes register operand in 1's complement form @@ -3412,7 +3412,7 @@ inline UNATIVE_OFFSET emitter::emitInsSizeRR(instrDesc* id, code_t code) (!id->idIsSmallDsc() && (IsExtendedReg(id->idReg3(), attr) || IsExtendedReg(id->idReg4(), attr)))) { sz += emitGetRexPrefixSize(ins); - includeRexPrefixSize = !IsVexEncodedInstruction(ins); + includeRexPrefixSize = !IsVexEncodableInstruction(ins); } sz += emitInsSize(id, code, includeRexPrefixSize); @@ -11084,7 +11084,7 @@ void emitter::emitDispIns( case IF_RWR_RRD_RRD: { - assert(IsVexEncodedInstruction(ins)); + assert(IsVexEncodableInstruction(ins)); assert(IsThreeOperandAVXInstruction(ins)); regNumber reg2 = id->idReg2(); regNumber reg3 = id->idReg3(); @@ -11107,7 +11107,7 @@ void emitter::emitDispIns( } case IF_RWR_RRD_RRD_CNS: - assert(IsVexOrEvexEncodedInstruction(ins)); + assert(IsVexOrEvexEncodableInstruction(ins)); assert(IsThreeOperandAVXInstruction(ins)); printf("%s, ", emitRegName(id->idReg1(), attr)); printf("%s, ", emitRegName(id->idReg2(), attr)); @@ -12056,7 +12056,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) code += 4; } } - else if (!IsSSEInstruction(ins) && !IsVexOrEvexEncodedInstruction(ins)) + else if (!IsSSEInstruction(ins) && !IsVexOrEvexEncodableInstruction(ins)) { /* Is the operand size larger than a byte? */ @@ -12864,7 +12864,7 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) code += 4; } } - else if (!IsSSEInstruction(ins) && !IsVexOrEvexEncodedInstruction(ins)) + else if (!IsSSEInstruction(ins) && !IsVexOrEvexEncodableInstruction(ins)) { // Is the operand size larger than a byte? switch (size) @@ -13636,7 +13636,7 @@ BYTE* emitter::emitOutputR(BYTE* dst, instrDesc* id) // We would to update GC info correctly assert(!IsSSEInstruction(ins)); - assert(!IsVexOrEvexEncodedInstruction(ins)); + assert(!IsVexOrEvexEncodableInstruction(ins)); // Get the 'base' opcode switch (ins) @@ -14077,7 +14077,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id) else if ((code & 0xFF) == 0x00) { // This case happens for some SSE/AVX instructions only - assert(IsVexOrEvexEncodedInstruction(ins) || Is4ByteSSEInstruction(ins)); + assert(IsVexOrEvexEncodableInstruction(ins) || Is4ByteSSEInstruction(ins)); dst += emitOutputByte(dst, (code >> 8) & 0xFF); dst += emitOutputByte(dst, (0xC0 | regCode)); @@ -14277,7 +14277,7 @@ BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id) code_t code; instruction ins = id->idIns(); - assert(IsVexOrEvexEncodedInstruction(ins)); + assert(IsVexOrEvexEncodableInstruction(ins)); assert(IsThreeOperandAVXInstruction(ins) || isAvxBlendv(ins)); regNumber targetReg = id->idReg1(); regNumber src1 = id->idReg2(); @@ -14323,7 +14323,7 @@ BYTE* emitter::emitOutputRRR(BYTE* dst, instrDesc* id) else if ((code & 0xFF) == 0x00) { // This case happens for AVX instructions only - assert(IsVexOrEvexEncodedInstruction(ins)); + assert(IsVexOrEvexEncodableInstruction(ins)); dst += emitOutputByte(dst, (code >> 8) & 0xFF); dst += emitOutputByte(dst, (0xC0 | regCode)); @@ -14721,7 +14721,7 @@ BYTE* emitter::emitOutputIV(BYTE* dst, instrDesc* id) // We would to update GC info correctly assert(!IsSSEInstruction(ins)); - assert(!IsVexOrEvexEncodedInstruction(ins)); + assert(!IsVexOrEvexEncodableInstruction(ins)); #ifdef TARGET_AMD64 // all these opcodes take a sign-extended 4-byte immediate, max @@ -14815,7 +14815,7 @@ BYTE* emitter::emitOutputLJ(insGroup* ig, BYTE* dst, instrDesc* i) // SSE/AVX doesnt make any sense here assert(!IsSSEInstruction(ins)); - assert(!IsVexOrEvexEncodedInstruction(ins)); + assert(!IsVexOrEvexEncodableInstruction(ins)); size_t ssz; size_t lsz; @@ -15839,7 +15839,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) else if ((code & 0xFF) == 0x00) { // This case happens for some SSE/AVX instructions only - assert(IsVexOrEvexEncodedInstruction(ins) || Is4ByteSSEInstruction(ins)); + assert(IsVexOrEvexEncodableInstruction(ins) || Is4ByteSSEInstruction(ins)); dst += emitOutputByte(dst, (code >> 8) & 0xFF); dst += emitOutputByte(dst, (0xC0 | regcode)); @@ -16142,7 +16142,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_RWR_RRD_SRD: { - assert(IsVexOrEvexEncodedInstruction(ins)); + assert(IsVexOrEvexEncodableInstruction(ins)); code = insCodeRM(ins); code = AddSimdPrefixIfNeeded(id, code, size); @@ -16167,7 +16167,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_RWR_RRD_SRD_RRD: { // This should only be called on AVX instructions - assert(IsVexOrEvexEncodedInstruction(ins)); + assert(IsVexOrEvexEncodableInstruction(ins)); emitGetInsCns(id, &cnsVal); code = insCodeRM(ins); @@ -16307,7 +16307,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_RWR_RRD_MRD: { // This should only be called on AVX instructions - assert(IsVexOrEvexEncodedInstruction(ins)); + assert(IsVexOrEvexEncodableInstruction(ins)); code = insCodeRM(ins); code = AddSimdPrefixIfNeeded(id, code, size); @@ -16332,7 +16332,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp) case IF_RWR_RRD_MRD_RRD: { // This should only be called on AVX instructions - assert(IsVexOrEvexEncodedInstruction(ins)); + assert(IsVexOrEvexEncodableInstruction(ins)); emitGetInsCns(id, &cnsVal); code = insCodeRM(ins); diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 2f6e89ed80064..16ac15eb5f1b3 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -105,9 +105,9 @@ static bool IsKInstruction(instruction ins); static regNumber getBmiRegNumber(instruction ins); static regNumber getSseShiftRegNumber(instruction ins); -bool IsVexEncodedInstruction(instruction ins) const; -bool IsEvexEncodedInstruction(instruction ins) const; -bool IsVexOrEvexEncodedInstruction(instruction ins) const; +bool IsVexEncodableInstruction(instruction ins) const; +bool IsEvexEncodableInstruction(instruction ins) const; +bool IsVexOrEvexEncodableInstruction(instruction ins) const; code_t insEncodeMIreg(const instrDesc* id, regNumber reg, emitAttr size, code_t code); diff --git a/src/coreclr/jit/instr.cpp b/src/coreclr/jit/instr.cpp index f0e4c5596b927..f789f58b0f785 100644 --- a/src/coreclr/jit/instr.cpp +++ b/src/coreclr/jit/instr.cpp @@ -103,7 +103,7 @@ const char* CodeGen::genInsDisplayName(emitter::instrDesc* id) const emitter* emit = GetEmitter(); - if (emit->IsVexOrEvexEncodedInstruction(ins)) + if (emit->IsVexOrEvexEncodableInstruction(ins)) { if (!emit->IsBMIInstruction(ins) && !emit->IsKInstruction(ins)) { From 63ccbb5d7850ecc58a59801b07ae5d952eb465cd Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 19 Mar 2023 08:46:17 -0700 Subject: [PATCH 03/16] Updating the JIT to support EVEX by default --- .../templates/runtimes/run-test-job.yml | 13 ++- eng/pipelines/libraries/run-test-job.yml | 2 + src/coreclr/jit/compiler.cpp | 80 +++++++++---------- src/coreclr/jit/compiler.h | 13 ++- src/coreclr/jit/emitxarch.cpp | 31 +++---- src/coreclr/jit/lsra.cpp | 7 +- src/tests/Common/testenvironment.proj | 10 ++- 7 files changed, 81 insertions(+), 75 deletions(-) diff --git a/eng/pipelines/common/templates/runtimes/run-test-job.yml b/eng/pipelines/common/templates/runtimes/run-test-job.yml index 1b9670bc971b4..63366eebd928c 100644 --- a/eng/pipelines/common/templates/runtimes/run-test-job.yml +++ b/eng/pipelines/common/templates/runtimes/run-test-job.yml @@ -407,9 +407,11 @@ jobs: - jitstress_isas_nohwintrinsic - jitstress_isas_nohwintrinsic_nosimd - jitstress_isas_nosimd + - jitstress_isas_x86_evex - jitstress_isas_x86_noaes - jitstress_isas_x86_noavx - jitstress_isas_x86_noavx2 + - jitstress_isas_x86_noavx512 - jitstress_isas_x86_nobmi1 - jitstress_isas_x86_nobmi2 - jitstress_isas_x86_nofma @@ -427,6 +429,7 @@ jobs: - jitstress_isas_1_x86_noaes - jitstress_isas_1_x86_noavx - jitstress_isas_1_x86_noavx2 + - jitstress_isas_1_x86_noavx512 - jitstress_isas_1_x86_nobmi1 - jitstress_isas_1_x86_nobmi2 - jitstress_isas_1_x86_nofma @@ -444,6 +447,7 @@ jobs: - jitstress_isas_2_x86_noaes - jitstress_isas_2_x86_noavx - jitstress_isas_2_x86_noavx2 + - jitstress_isas_2_x86_noavx512 - jitstress_isas_2_x86_nobmi1 - jitstress_isas_2_x86_nobmi2 - jitstress_isas_2_x86_nofma @@ -460,8 +464,9 @@ jobs: - jitstress_isas_2_x86_nossse3 ${{ if in(parameters.testGroup, 'jitstress-isas-avx512') }}: scenarios: - - jitstress_isas_avx512_forceevex - - jitstress_isas_avx512_forceevex_stresshighregs + - jitstress_isas_x86_evex + - jitstress_isas_x86_noavx512 + - jitstressregs0x2000 ${{ if in(parameters.testGroup, 'jitstressregs-x86') }}: scenarios: - jitstressregs1_x86_noavx @@ -472,6 +477,7 @@ jobs: - jitstressregs0x10_x86_noavx - jitstressregs0x80_x86_noavx - jitstressregs0x1000_x86_noavx + - jitstressregs0x2000_x86_noavx ${{ if in(parameters.testGroup, 'jitstressregs' ) }}: scenarios: - jitstressregs1 @@ -482,6 +488,7 @@ jobs: - jitstressregs0x10 - jitstressregs0x80 - jitstressregs0x1000 + - jitstressregs0x2000 ${{ if in(parameters.testGroup, 'jitstress2-jitstressregs') }}: scenarios: - jitstress2_jitstressregs1 @@ -492,6 +499,7 @@ jobs: - jitstress2_jitstressregs0x10 - jitstress2_jitstressregs0x80 - jitstress2_jitstressregs0x1000 + - jitstress2_jitstressregs0x2000 ${{ if in(parameters.testGroup, 'gcstress0x3-gcstress0xc') }}: scenarios: - gcstress0x3 @@ -520,6 +528,7 @@ jobs: - jitstressregs0x10 - jitstressregs0x80 - jitstressregs0x1000 + - jitstressregs0x2000 - jitminopts - forcerelocs - gcstress0xc diff --git a/eng/pipelines/libraries/run-test-job.yml b/eng/pipelines/libraries/run-test-job.yml index ee1f82a633b2c..0654ce7629aaf 100644 --- a/eng/pipelines/libraries/run-test-job.yml +++ b/eng/pipelines/libraries/run-test-job.yml @@ -161,6 +161,7 @@ jobs: - jitstressregs0x10 - jitstressregs0x80 - jitstressregs0x1000 + - jitstressregs0x2000 ${{ if in(parameters.coreclrTestGroup, 'jitstress2-jitstressregs') }}: scenarios: - jitstress2_jitstressregs1 @@ -171,6 +172,7 @@ jobs: - jitstress2_jitstressregs0x10 - jitstress2_jitstressregs0x80 - jitstress2_jitstressregs0x1000 + - jitstress2_jitstressregs0x2000 ${{ if in(parameters.coreclrTestGroup, 'gcstress0x3-gcstress0xc') }}: scenarios: # Disable gcstress0x3 for now; it causes lots of test timeouts. Investigate this after diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 8db104191d61c..1f46f4dee235e 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2275,59 +2275,51 @@ void Compiler::compSetProcessor() { instructionSetFlags.AddInstructionSet(InstructionSet_Vector128); } + if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX)) { instructionSetFlags.AddInstructionSet(InstructionSet_Vector256); } - // x86-64-v4 feature level supports AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL and - // AVX512F/AVX512BW/AVX512CD/AVX512DQ/VX512VL have been shipped together historically. - // It is therefore unlikely that future CPUs only support "just one" and - // not worth the additional complexity in the JIT to support. - if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F) && - instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW) && - instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ)) - { - // Using JitStressEVEXEncoding flag will force instructions which would - // otherwise use VEX encoding but can be EVEX encoded to use EVEX encoding - // This requires AVX512VL support. - // We can't use !DoJitStressEvexEncoding() yet because opts.compSupportsISA hasn't - // been set yet as that's what we're trying to set here + // x86-64-v4 feature level supports AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL + // These have been shipped together historically and at the time of this writing + // there exists no hardware which doesn't support the entire feature set. To simplify + // the overall JIT implementation, we currently require the entire set of ISAs to be + // supported and disable AVX512 support otherwise. - bool enableAvx512 = false; + if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW) && + instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD) && + instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ) && + instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL)) + { + assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F)); + assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW_VL)); + assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD_VL)); + assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ_VL)); -#if defined(DEBUG) - if (JitConfig.JitStressEvexEncoding() && instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL)) - { - enableAvx512 = true; - } -#endif // DEBUG + instructionSetFlags.AddInstructionSet(InstructionSet_Vector512); + } + else + { + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_VL); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW_VL); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD_VL); - if (!enableAvx512) - { - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_VL); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW_VL); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD_VL); #ifdef TARGET_AMD64 - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_X64); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_VL_X64); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW_X64); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW_VL_X64); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD_X64); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD_VL_X64); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ_X64); - instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL_X64); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_X64); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512F_VL_X64); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW_X64); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512BW_VL_X64); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD_X64); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512CD_VL_X64); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ_X64); + instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX512DQ_VL_X64); #endif // TARGET_AMD64 - } - else - { - instructionSetFlags.AddInstructionSet(InstructionSet_Vector512); - } } #elif defined(TARGET_ARM64) if (instructionSetFlags.HasInstructionSet(InstructionSet_AdvSimd)) @@ -3394,7 +3386,7 @@ void Compiler::compInitOptions(JitFlags* jitFlags) rbmFltCalleeTrash = RBM_FLT_CALLEE_TRASH_INIT; cntCalleeTrashFloat = CNT_CALLEE_TRASH_FLOAT_INIT; - if (DoJitStressEvexEncoding()) + if (canUseEvexEncoding()) { rbmAllFloat |= RBM_HIGHFLOAT; rbmFltCalleeTrash |= RBM_HIGHFLOAT; diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 23c6a9b364dfe..5aab92acc60bc 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9222,10 +9222,19 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX #ifdef DEBUG // Using JitStressEVEXEncoding flag will force instructions which would // otherwise use VEX encoding but can be EVEX encoded to use EVEX encoding - // This requires AVX512VL support. + // This requires AVX512F, AVX512BW, AVX512CD, AVX512DQ, and AVX512VL support - if (JitConfig.JitStressEvexEncoding() && compOpportunisticallyDependsOn(InstructionSet_AVX512F_VL)) + if (JitConfig.JitStressEvexEncoding() && IsBaselineVector512IsaSupported()) { + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F)); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512F_VL)); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512BW)); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512BW_VL)); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512CD)); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512CD_VL)); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512DQ)); + assert(compIsaSupportedDebugOnly(InstructionSet_AVX512DQ_VL)); + return true; } #endif // DEBUG diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index c17c18dab8ba1..45e0d6485fe39 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -1013,16 +1013,6 @@ bool emitter::TakesSimdPrefix(const instrDesc* id) const //------------------------------------------------------------------------ // TakesEvexPrefix: Checks if the instruction should be EVEX encoded. -// TODO-XArch-AVX512: This check needs to be updated once AVX512 instructions are added. -// Eventually, this should evolve to return `true` for the following cases: -// - JitConfig.JitStressEvexEncoding flag is set. -// - Is an new AVX512 instruction. -// - Uses ZMM vector registers. -// - Uses upper 128-bit or 256-bit registers for an AVX512VL ins. -// - Uses Operand mask encoding: 64-bit opmask registers k0-k7 for conditional execution and merging of destination -// operands. -// - Need to encode functionality specific to Instruction classes(e.g.,embedded broadcast, embedded rounding control -// etc.) // // Arguments: // instruction -- processor instruction to check @@ -1036,23 +1026,24 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const if (!IsEvexEncodableInstruction(ins)) { + // Can't be EVEX encoded ever return false; } - if (!emitComp->DoJitStressEvexEncoding()) + if (HasHighSIMDReg(id) || (id->idOpSize() == OPSZ64) || HasKMaskRegisterDest(ins)) { - return false; - } - - if (HasHighSIMDReg(id)) - { - // TODO-XARCH-AVX512 remove this check once k registers have been implemented - assert(!HasKMaskRegisterDest(ins)); + // Requires the EVEX encoding due to used registers return true; } - // TODO-XArch-AVX512: Revisit 'HasKMaskRegisterDest()' check once KMask support is added. - return !HasKMaskRegisterDest(ins); + // TODO-XArch-AVX512: This needs to return true when the id includes EVEX specific functionality: + // * masking + // * embedded broadcast + // * embedded rounding control + // * etc + + // Only supports the EVEX encoding or in stress mode to always use EVEX encoding + return !IsVexEncodableInstruction(ins) || emitComp->DoJitStressEvexEncoding(); } // Intel AVX-512 encoding is defined in "Intel 64 and ia-32 architectures software developer's manual volume 2", Section diff --git a/src/coreclr/jit/lsra.cpp b/src/coreclr/jit/lsra.cpp index d8520dd7ece2a..afd8e2434cbaf 100644 --- a/src/coreclr/jit/lsra.cpp +++ b/src/coreclr/jit/lsra.cpp @@ -644,7 +644,7 @@ LinearScan::LinearScan(Compiler* theCompiler) rbmFltCalleeTrash = compiler->rbmFltCalleeTrash; #endif - if (!compiler->DoJitStressEvexEncoding()) + if (!compiler->canUseEvexEncoding()) { availableRegCount -= CNT_HIGHFLOAT; availableRegCount -= CNT_MASK_REGS; @@ -718,10 +718,7 @@ LinearScan::LinearScan(Compiler* theCompiler) #endif // TARGET_AMD64 || TARGET_ARM64 #if defined(TARGET_AMD64) - // TODO-XARCH-AVX512 switch this to canUseEvexEncoding() once we independently - // allow EVEX use from the stress flag (currently, if EVEX stress is turned off, - // we cannot use EVEX at all) - if (compiler->DoJitStressEvexEncoding()) + if (compiler->canUseEvexEncoding()) { availableFloatRegs |= RBM_HIGHFLOAT; availableDoubleRegs |= RBM_HIGHFLOAT; diff --git a/src/tests/Common/testenvironment.proj b/src/tests/Common/testenvironment.proj index 50de34656eca2..eb3656263eadc 100644 --- a/src/tests/Common/testenvironment.proj +++ b/src/tests/Common/testenvironment.proj @@ -21,6 +21,7 @@ DOTNET_EnableAES; DOTNET_EnableAVX; DOTNET_EnableAVX2; + DOTNET_EnableAVX512F; DOTNET_EnableBMI1; DOTNET_EnableBMI2; DOTNET_EnableFMA; @@ -105,9 +106,11 @@ + + @@ -125,6 +128,7 @@ + @@ -142,6 +146,7 @@ + @@ -156,8 +161,6 @@ - - @@ -166,6 +169,7 @@ + @@ -174,6 +178,7 @@ + @@ -182,6 +187,7 @@ + From 1db8c9af1eb19f83f807ed1554d896798cab70b9 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 19 Mar 2023 09:05:01 -0700 Subject: [PATCH 04/16] Mark the AVX512 ISAs as "fully implemented" since they have no unimplemented instructions --- src/coreclr/jit/hwintrinsic.cpp | 7 +++---- src/coreclr/jit/hwintrinsicxarch.cpp | 17 +++++++++++++++++ 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/hwintrinsic.cpp b/src/coreclr/jit/hwintrinsic.cpp index 46dbe9c0e1885..666db1e36d54a 100644 --- a/src/coreclr/jit/hwintrinsic.cpp +++ b/src/coreclr/jit/hwintrinsic.cpp @@ -181,8 +181,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, return NI_Illegal; } - bool isIsaSupported = comp->compSupportsHWIntrinsic(isa); - bool isBaselineVector512IsaUsedAndSupported = false; + bool isIsaSupported = comp->compSupportsHWIntrinsic(isa); bool isHardwareAcceleratedProp = (strcmp(methodName, "get_IsHardwareAccelerated") == 0); #ifdef TARGET_XARCH @@ -202,7 +201,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, } else if (strcmp(className, "Vector512") == 0) { - isBaselineVector512IsaUsedAndSupported = comp->IsBaselineVector512IsaSupported(); + isa = InstructionSet_Vector512; } } #endif @@ -237,7 +236,7 @@ NamedIntrinsic HWIntrinsicInfo::lookupId(Compiler* comp, if (isIsaSupported) { - if (comp->compExactlyDependsOn(isa) || isBaselineVector512IsaUsedAndSupported) + if (comp->compExactlyDependsOn(isa)) { return NI_IsSupported_True; } diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 977bb1b320a87..73e45c2655f90 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -431,6 +431,22 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa) case InstructionSet_AVX_X64: case InstructionSet_AVX2: case InstructionSet_AVX2_X64: + case InstructionSet_AVX512F: + case InstructionSet_AVX512F_VL: + case InstructionSet_AVX512F_VL_X64: + case InstructionSet_AVX512F_X64: + case InstructionSet_AVX512BW: + case InstructionSet_AVX512BW_VL: + case InstructionSet_AVX512BW_VL_X64: + case InstructionSet_AVX512BW_X64: + case InstructionSet_AVX512CD: + case InstructionSet_AVX512CD_VL: + case InstructionSet_AVX512CD_VL_X64: + case InstructionSet_AVX512CD_X64: + case InstructionSet_AVX512DQ: + case InstructionSet_AVX512DQ_VL: + case InstructionSet_AVX512DQ_VL_X64: + case InstructionSet_AVX512DQ_X64: case InstructionSet_AVXVNNI: case InstructionSet_AVXVNNI_X64: case InstructionSet_BMI1: @@ -459,6 +475,7 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa) case InstructionSet_SSE42_X64: case InstructionSet_Vector128: case InstructionSet_Vector256: + case InstructionSet_Vector512: case InstructionSet_X86Base: case InstructionSet_X86Base_X64: case InstructionSet_X86Serialize: From 15a075520896bb6d5e66840404c12fe04898a532 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 19 Mar 2023 09:33:12 -0700 Subject: [PATCH 05/16] Simplify some of the EVEX related checks in emitxarch --- src/coreclr/jit/emitxarch.cpp | 197 +++++++++++++--------------------- 1 file changed, 72 insertions(+), 125 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 45e0d6485fe39..a64cd88e01b02 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -207,7 +207,13 @@ bool emitter::IsEvexEncodableInstruction(instruction ins) const // bool emitter::IsVexOrEvexEncodableInstruction(instruction ins) const { - return IsVexEncodableInstruction(ins) || IsEvexEncodableInstruction(ins); + if (!UseVEXEncoding()) + { + return false; + } + + insFlags flags = CodeGenInterface::instInfo[ins]; + return (flags & (Encoding_VEX | Encoding_EVEX)) != 0; } // Returns true if the AVX instruction is a binary operator that requires 3 operands. @@ -1091,7 +1097,6 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const // emitter::code_t emitter::AddEvexPrefix(instruction ins, code_t code, emitAttr attr) { - // Only AVX512 instructions require EVEX prefix assert(IsEvexEncodableInstruction(ins)); @@ -1120,24 +1125,7 @@ emitter::code_t emitter::AddEvexPrefix(instruction ins, code_t code, emitAttr at bool emitter::TakesVexPrefix(instruction ins) const { // special case vzeroupper as it requires 2-byte VEX prefix - // special case the fencing, movnti and the prefetch instructions as they never take a VEX prefix - switch (ins) - { - case INS_lfence: - case INS_mfence: - case INS_movnti: - case INS_prefetchnta: - case INS_prefetcht0: - case INS_prefetcht1: - case INS_prefetcht2: - case INS_sfence: - case INS_vzeroupper: - return false; - default: - break; - } - - return IsVexEncodableInstruction(ins); + return IsVexEncodableInstruction(ins) && (ins != INS_vzeroupper); } // Add base VEX prefix without setting W, R, X, or B bits @@ -1454,9 +1442,9 @@ emitter::code_t emitter::AddRexWPrefix(const instrDesc* id, code_t code) { instruction ins = id->idIns(); - if (UseEvexEncoding() && IsEvexEncodableInstruction(ins)) + if (IsVexOrEvexEncodableInstruction(ins)) { - if (TakesEvexPrefix(id) && codeEvexMigrationCheck(code)) // TODO-XArch-AVX512: Remove codeEvexMigrationCheck(). + if (TakesEvexPrefix(id) && codeEvexMigrationCheck(code)) { // W-bit is available in 4-byte EVEX prefix that starts with byte 62. assert(hasEvexPrefix(code)); @@ -1464,11 +1452,10 @@ emitter::code_t emitter::AddRexWPrefix(const instrDesc* id, code_t code) // W-bit is the only bit that is added in non bit-inverted form. return emitter::code_t(code | 0x0000800000000000ULL); } - } - if (UseVEXEncoding() && IsVexEncodableInstruction(ins)) - { - if (TakesVexPrefix(ins)) + else { + assert(IsVexEncodableInstruction(ins)); + // W-bit is available only in 3-byte VEX prefix that starts with byte C4. assert(hasVexPrefix(code)); @@ -1490,7 +1477,7 @@ emitter::code_t emitter::AddRexRPrefix(const instrDesc* id, code_t code) { instruction ins = id->idIns(); - if (UseEvexEncoding() && IsEvexEncodableInstruction(ins)) + if (IsVexOrEvexEncodableInstruction(ins)) { if (TakesEvexPrefix(id) && codeEvexMigrationCheck(code)) // TODO-XArch-AVX512: Remove codeEvexMigrationCheck(). { @@ -1500,11 +1487,10 @@ emitter::code_t emitter::AddRexRPrefix(const instrDesc* id, code_t code) // R-bit is added in bit-inverted form. return code & 0xFF7FFFFFFFFFFFFFULL; } - } - if (UseVEXEncoding() && IsVexEncodableInstruction(ins)) - { - if (TakesVexPrefix(ins)) + else { + assert(IsVexEncodableInstruction(ins)); + // R-bit is supported by both 2-byte and 3-byte VEX prefix assert(hasVexPrefix(code)); @@ -1520,20 +1506,20 @@ emitter::code_t emitter::AddRexXPrefix(const instrDesc* id, code_t code) { instruction ins = id->idIns(); - if (UseEvexEncoding() && IsEvexEncodableInstruction(ins)) + if (IsVexOrEvexEncodableInstruction(ins)) { - if (TakesEvexPrefix(id)) + if (TakesEvexPrefix(id) && codeEvexMigrationCheck(code)) // TODO-XArch-AVX512: Remove codeEvexMigrationCheck(). { // X-bit is available in 4-byte EVEX prefix that starts with byte 62. assert(hasEvexPrefix(code)); + // X-bit is added in bit-inverted form. return code & 0xFFBFFFFFFFFFFFFFULL; } - } - if (UseVEXEncoding() && IsVexEncodableInstruction(ins)) - { - if (TakesVexPrefix(ins)) + else { + assert(IsVexEncodableInstruction(ins)); + // X-bit is available only in 3-byte VEX prefix that starts with byte C4. assert(hasVexPrefix(code)); @@ -1549,7 +1535,7 @@ emitter::code_t emitter::AddRexBPrefix(const instrDesc* id, code_t code) { instruction ins = id->idIns(); - if (UseEvexEncoding() && IsEvexEncodableInstruction(ins)) + if (IsVexOrEvexEncodableInstruction(ins)) { if (TakesEvexPrefix(id) && codeEvexMigrationCheck(code)) // TODO-XArch-AVX512: Remove codeEvexMigrationCheck(). { @@ -1559,11 +1545,10 @@ emitter::code_t emitter::AddRexBPrefix(const instrDesc* id, code_t code) // B-bit is added in bit-inverted form. return code & 0xFFDFFFFFFFFFFFFFULL; } - } - if (UseVEXEncoding() && IsVexEncodableInstruction(ins)) - { - if (TakesVexPrefix(ins)) + else { + assert(IsVexEncodableInstruction(ins)); + // B-bit is available only in 3-byte VEX prefix that starts with byte C4. assert(hasVexPrefix(code)); @@ -1578,8 +1563,7 @@ emitter::code_t emitter::AddRexBPrefix(const instrDesc* id, code_t code) // Adds REX prefix (0x40) without W, R, X or B bits set emitter::code_t emitter::AddRexPrefix(instruction ins, code_t code) { - assert(!UseVEXEncoding() || !IsVexEncodableInstruction(ins)); - assert(!UseEvexEncoding() || !IsEvexEncodableInstruction(ins)); + assert(!IsVexEncodableInstruction(ins)); return code | 0x4000000000ULL; } @@ -2159,103 +2143,69 @@ unsigned emitter::emitGetAdjustedSize(instrDesc* id, code_t code) const instruction ins = id->idIns(); unsigned adjustedSize = 0; - // TODO-XArch-AVX512: Remove redundant code and possiblly collapse EVEX and VEX into a single pathway - // IsEvexEncodableInstruction(ins) is `true` for AVX/SSE instructions also which needs to be VEX encoded unless - // explicitly - // asked for EVEX. - if (IsEvexEncodableInstruction(ins) && TakesEvexPrefix(id)) + if (IsVexOrEvexEncodableInstruction(ins)) { - // EVEX prefix encodes some bytes of the opcode and as a result, overall size of the instruction reduces. - // Therefore, to estimate the size adding EVEX prefix size and size of instruction opcode bytes will always + unsigned prefixAdjustedSize = 0; + + // VEX/EVEX prefix encodes some bytes of the opcode and as a result, overall size of the instruction reduces. + // Therefore, to estimate the size adding VEX/EVEX prefix size and size of instruction opcode bytes will always // overstimate. - // Instead this routine will adjust the size of EVEX prefix based on the number of bytes of opcode it encodes so - // that - // instruction size estimate will be accurate. - // Basically this will decrease the evexPrefixSize, so that opcodeSize + evexPrefixAdjustedSize will be the + // + // Instead this routine will adjust the size of VEX/EVEX prefix based on the number of bytes of opcode it encodes so + // that instruction size estimate will be accurate. + // + // Basically this will decrease the prefixSize, so that opcodeSize + prefixAdjustedSize will be the // right size. // - // rightOpcodeSize + evexPrefixSize - // = (opcodeSize - ExtrabytesSize) + evexPrefixSize - // = opcodeSize + (evexPrefixSize - ExtrabytesSize) - // = opcodeSize + evexPrefixAdjustedSize - - unsigned evexPrefixAdjustedSize = emitGetEvexPrefixSize(id); - assert(evexPrefixAdjustedSize == 4); - - // In this case, opcode will contains escape prefix at least one byte, - // simdPrefixAdjustedSize should be minus one. - evexPrefixAdjustedSize -= 1; + // rightOpcodeSize + prefixSize + // = (opcodeSize - extraBytesSize) + prefixSize + // = opcodeSize + (prefixSize - extraBytesSize) + // = opcodeSize + prefixAdjustedSize - // Get the fourth byte in Opcode. - // If this byte is non-zero, then we should check whether the opcode contains SIMD prefix or not. - BYTE check = (code >> 24) & 0xFF; - if (check != 0) + if (TakesEvexPrefix(id)) { - // 3-byte opcode: with the bytes ordered as 0x2211RM33 or - // 4-byte opcode: with the bytes ordered as 0x22114433 - // Simd prefix is at the first byte. - BYTE sizePrefix = (code >> 16) & 0xFF; - if (sizePrefix != 0 && isPrefix(sizePrefix)) - { - evexPrefixAdjustedSize -= 1; - } - - // If the opcode size is 4 bytes, then the second escape prefix is at fourth byte in opcode. - // But in this case the opcode has not counted R\M part. - // opcodeSize + evexPrefixAdjustedSize - ExtraEscapePrefixSize + ModR\MSize - //=opcodeSize + evexPrefixAdjustedSize -1 + 1 - //=opcodeSize + evexPrefixAdjustedSize - // So although we may have second byte escape prefix, we won't decrease evexPrefixAdjustedSize. + prefixAdjustedSize = emitGetEvexPrefixSize(id); + assert(prefixAdjustedSize == 4); } + else + { + assert(IsVexEncodableInstruction(ins)); - adjustedSize = evexPrefixAdjustedSize; - } - else if (IsVexEncodableInstruction(ins)) - { - // VEX prefix encodes some bytes of the opcode and as a result, overall size of the instruction reduces. - // Therefore, to estimate the size adding VEX prefix size and size of instruction opcode bytes will always - // overstimate. - // Instead this routine will adjust the size of VEX prefix based on the number of bytes of opcode it encodes so - // that - // instruction size estimate will be accurate. - // Basically this will decrease the vexPrefixSize, so that opcodeSize + vexPrefixAdjustedSize will be the right - // size. - // - // rightOpcodeSize + vexPrefixSize - // = (opcodeSize - ExtrabytesSize) + vexPrefixSize - // = opcodeSize + (vexPrefixSize - ExtrabytesSize) - // = opcodeSize + vexPrefixAdjustedSize + prefixAdjustedSize = emitGetVexPrefixSize(id); + assert((prefixAdjustedSize == 2) || (prefixAdjustedSize == 3)); + } - unsigned simdPrefixAdjustedSize = emitGetVexPrefixSize(id); - assert((simdPrefixAdjustedSize == 2) || (simdPrefixAdjustedSize == 3)); + assert(prefixAdjustedSize != 0); // In this case, opcode will contains escape prefix at least one byte, - // vexPrefixAdjustedSize should be minus one. - simdPrefixAdjustedSize -= 1; + // prefixAdjustedSize should be minus one. + prefixAdjustedSize -= 1; // Get the fourth byte in Opcode. // If this byte is non-zero, then we should check whether the opcode contains SIMD prefix or not. BYTE check = (code >> 24) & 0xFF; + if (check != 0) { // 3-byte opcode: with the bytes ordered as 0x2211RM33 or // 4-byte opcode: with the bytes ordered as 0x22114433 // Simd prefix is at the first byte. BYTE sizePrefix = (code >> 16) & 0xFF; + if (sizePrefix != 0 && isPrefix(sizePrefix)) { - simdPrefixAdjustedSize -= 1; + prefixAdjustedSize -= 1; } // If the opcode size is 4 bytes, then the second escape prefix is at fourth byte in opcode. // But in this case the opcode has not counted R\M part. - // opcodeSize + VexPrefixAdjustedSize - ExtraEscapePrefixSize + ModR\MSize - //=opcodeSize + VexPrefixAdjustedSize -1 + 1 - //=opcodeSize + VexPrefixAdjustedSize - // So although we may have second byte escape prefix, we won't decrease vexPrefixAdjustedSize. + // opcodeSize + prefixAdjustedSize - extraEscapePrefixSize + modRMSize + // = opcodeSize + prefixAdjustedSize -1 + 1 + // = opcodeSize + prefixAdjustedSize + // So although we may have second byte escape prefix, we won't decrease prefixAdjustedSize. } - adjustedSize = simdPrefixAdjustedSize; + adjustedSize = prefixAdjustedSize; } else if (Is4ByteSSEInstruction(ins)) { @@ -3041,15 +2991,16 @@ inline emitter::code_t emitter::insEncodeReg3456(const instrDesc* id, regNumber // Both prefix encodes register operand in 1's complement form assert(regBits <= 0xF); - if (UseEvexEncoding() && IsEvexEncodableInstruction(ins)) + + if (IsVexOrEvexEncodableInstruction(ins)) { - if (TakesEvexPrefix(id) && codeEvexMigrationCheck(code)) + if (TakesEvexPrefix(id) && codeEvexMigrationCheck(code)) // TODO-XArch-AVX512: Remove codeEvexMigrationCheck(). { - assert(hasEvexPrefix(code) && TakesEvexPrefix(id)); + assert(hasEvexPrefix(code)); -// TODO-XARCH-AVX512 I don't like that we redefine regBits on the EVEX case. -// Rather see these paths cleaned up. #if defined(TARGET_AMD64) + // TODO-XARCH-AVX512 I don't like that we redefine regBits on the EVEX case. + // Rather see these paths cleaned up. regBits = HighAwareRegEncoding(reg); if (IsHighSIMDReg(reg)) @@ -3058,19 +3009,14 @@ inline emitter::code_t emitter::insEncodeReg3456(const instrDesc* id, regNumber code = AddEvexVPrimePrefix(code); } #endif + // Shift count = 5-bytes of opcode + 0-2 bits for EVEX regBits <<= 43; return code ^ regBits; } - } - if (UseVEXEncoding() && IsVexEncodableInstruction(ins)) - { - - // Both prefix encodes register operand in 1's complement form - assert(regBits <= 0xF); - - if (TakesVexPrefix(ins)) + else { + assert(IsVexEncodableInstruction(ins)); assert(hasVexPrefix(code)); // Shift count = 4-bytes of opcode + 0-2 bits for VEX @@ -3078,6 +3024,7 @@ inline emitter::code_t emitter::insEncodeReg3456(const instrDesc* id, regNumber return code ^ regBits; } } + return code ^ regBits; } From 56f27aad8e2cd8435960a1d4805a7ade46a9cf16 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 19 Mar 2023 09:48:04 -0700 Subject: [PATCH 06/16] Tweak the Vector512 ISA check to properly account for VL --- src/coreclr/jit/compiler.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 1f46f4dee235e..8a6abbbe62f26 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2287,15 +2287,15 @@ void Compiler::compSetProcessor() // the overall JIT implementation, we currently require the entire set of ISAs to be // supported and disable AVX512 support otherwise. - if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW) && - instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD) && - instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ) && - instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL)) + if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW_VL) && + instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD_VL) && + instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ_VL)) { + assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW)); + assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD)); + assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ)); assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW_VL)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD_VL)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ_VL)); + assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL)); instructionSetFlags.AddInstructionSet(InstructionSet_Vector512); } From d8f9ef768f7f1d4a1fc660264b5976cf426e109f Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 19 Mar 2023 10:05:22 -0700 Subject: [PATCH 07/16] Applying formatting patch --- src/coreclr/jit/emitxarch.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index a64cd88e01b02..07e01c335ac61 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -1036,7 +1036,7 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const return false; } - if (HasHighSIMDReg(id) || (id->idOpSize() == OPSZ64) || HasKMaskRegisterDest(ins)) + if (HasHighSIMDReg(id) || (id->idOpSize() == EA_64BYTE) || HasKMaskRegisterDest(ins)) { // Requires the EVEX encoding due to used registers return true; @@ -2151,8 +2151,8 @@ unsigned emitter::emitGetAdjustedSize(instrDesc* id, code_t code) const // Therefore, to estimate the size adding VEX/EVEX prefix size and size of instruction opcode bytes will always // overstimate. // - // Instead this routine will adjust the size of VEX/EVEX prefix based on the number of bytes of opcode it encodes so - // that instruction size estimate will be accurate. + // Instead this routine will adjust the size of VEX/EVEX prefix based on the number of bytes of opcode it + // encodes so that instruction size estimate will be accurate. // // Basically this will decrease the prefixSize, so that opcodeSize + prefixAdjustedSize will be the // right size. From 09741714eaf465196bf93cf44ef84f868fbade0d Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Sun, 19 Mar 2023 16:20:29 -0700 Subject: [PATCH 08/16] Ensure we're checking for actual KMask usage and not just potential usage --- src/coreclr/jit/emitxarch.cpp | 52 ++++++++++++++++++++++++++++++----- src/coreclr/jit/emitxarch.h | 4 ++- 2 files changed, 48 insertions(+), 8 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 07e01c335ac61..c905f60410ded 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -1036,17 +1036,19 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const return false; } - if (HasHighSIMDReg(id) || (id->idOpSize() == EA_64BYTE) || HasKMaskRegisterDest(ins)) + if (HasHighSIMDReg(id) || (id->idOpSize() == EA_64BYTE) || HasMaskReg(id)) { // Requires the EVEX encoding due to used registers return true; } - // TODO-XArch-AVX512: This needs to return true when the id includes EVEX specific functionality: - // * masking - // * embedded broadcast - // * embedded rounding control - // * etc + if (HasEmbeddedBroadcast(id)) + { + // TODO-XArch-AVX512: This needs to return true when the id includes: + // * embedded rounding control + // * other EVEX specific functionality + return true; + } // Only supports the EVEX encoding or in stress mode to always use EVEX encoding return !IsVexEncodableInstruction(ins) || emitComp->DoJitStressEvexEncoding(); @@ -1302,7 +1304,7 @@ bool emitter::TakesRexWPrefix(const instrDesc* id) const } //------------------------------------------------------------------------ -// HasHighSIMReg: Checks if an instruction uses a high SIMD registers (mm16-mm31) +// HasHighSIMDReg: Checks if an instruction uses a high SIMD registers (mm16-mm31) // and will require one of the EVEX high SIMD bits (EVEX.R', EVEX.V', EVEX.X) // // Arguments: @@ -1345,6 +1347,42 @@ bool emitter::IsHighSIMDReg(regNumber reg) const #endif } +//------------------------------------------------------------------------ +// HasMaskReg: Checks if an instruction uses a KMask registers (k0-k7) +// +// Arguments: +// id -- instruction descriptor for encoding +// +// Return Value: +// true if instruction will require EVEX encoding for its register operands. +bool emitter::HasMaskReg(const instrDesc* id) const +{ + if (isMaskReg(id->idReg1())) + { + assert(HasKMaskRegisterDest(id->idIns())); + return true; + } + +#if defined(DEBUG) + assert(!isMaskReg(id->idReg2())); + + if (!id->idIsSmallDsc()) + { + if (id->idHasReg3()) + { + assert(!isMaskReg(id->idReg3())); + } + + if (id->idHasReg4()) + { + assert(!isMaskReg(id->idReg4())); + } + } +#endif // DEBUG + + return false; +} + // Returns true if using this register will require a REX.* prefix. // Since XMM registers overlap with YMM registers, this routine // can also be used to know whether a YMM register if the diff --git a/src/coreclr/jit/emitxarch.h b/src/coreclr/jit/emitxarch.h index 16ac15eb5f1b3..771051b6ad1d1 100644 --- a/src/coreclr/jit/emitxarch.h +++ b/src/coreclr/jit/emitxarch.h @@ -812,7 +812,7 @@ inline bool emitIsUncondJump(instrDesc* jmp) // Returns: // `true` if the instruction does embedded broadcast. // -inline bool HasEmbeddedBroadcast(instrDesc* id) +inline bool HasEmbeddedBroadcast(const instrDesc* id) const { return false; } @@ -820,4 +820,6 @@ inline bool HasEmbeddedBroadcast(instrDesc* id) inline bool HasHighSIMDReg(const instrDesc* id) const; inline bool IsHighSIMDReg(regNumber) const; +inline bool HasMaskReg(const instrDesc* id) const; + #endif // TARGET_XARCH From 2328f46a7e16382e3ac3add35b0280f3e01b9634 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 20 Mar 2023 08:03:53 -0700 Subject: [PATCH 09/16] Fixing CORJIT_ALLOCMEM_FLG_RODATA_64BYTE_ALIGN for the managed VM --- src/coreclr/jit/emit.cpp | 11 ++++++++--- src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/src/coreclr/jit/emit.cpp b/src/coreclr/jit/emit.cpp index dbcd64b7254ee..c61b9e885f031 100644 --- a/src/coreclr/jit/emit.cpp +++ b/src/coreclr/jit/emit.cpp @@ -6546,7 +6546,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, coldCodeBlock = nullptr; // This restricts the data alignment to: 4, 8, 16, 32 or 64 bytes - // Alignments greater than 32 would require VM support in ICorJitInfo::allocMem + // Alignments greater than 64 would require VM support in ICorJitInfo::allocMem uint32_t dataAlignment = emitConsDsc.alignment; assert((dataSection::MIN_DATA_ALIGN <= dataAlignment) && (dataAlignment <= dataSection::MAX_DATA_ALIGN) && isPow2(dataAlignment)); @@ -6665,11 +6665,16 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, { assert(((size_t)codeBlock & 15) == 0); } - if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_RODATA_32BYTE_ALIGN) != 0) + + if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_RODATA_64BYTE_ALIGN) != 0) + { + assert(((size_t)consBlock & 63) == 0); + } + else if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_RODATA_32BYTE_ALIGN) != 0) { assert(((size_t)consBlock & 31) == 0); } - if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_RODATA_16BYTE_ALIGN) != 0) + else if ((allocMemFlag & CORJIT_ALLOCMEM_FLG_RODATA_16BYTE_ALIGN) != 0) { assert(((size_t)consBlock & 15) == 0); } diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs index 98afd9ae0902a..7126cb7315111 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoTypes.cs @@ -742,7 +742,7 @@ public enum CorJitAllocMemFlag CORJIT_ALLOCMEM_FLG_RODATA_16BYTE_ALIGN = 0x00000002, // The read-only data will be 16-byte aligned CORJIT_ALLOCMEM_FLG_32BYTE_ALIGN = 0x00000004, // The code will be 32-byte aligned CORJIT_ALLOCMEM_FLG_RODATA_32BYTE_ALIGN = 0x00000008, // The read-only data will be 32-byte aligned - CORJIT_ALLOCMEM_FLG_RODATA_64BYTE_ALIGN = 0x00000008, // The read-only data will be 64-byte aligned + CORJIT_ALLOCMEM_FLG_RODATA_64BYTE_ALIGN = 0x00000010, // The read-only data will be 64-byte aligned } public enum CorJitFuncKind From 081516d41c4f8efcfe9edc5eee5e146fc5d9a761 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Mon, 20 Mar 2023 08:36:31 -0700 Subject: [PATCH 10/16] Fixing the DoJitStressEvexEncoding check to account for VEX vs EVEX differences --- src/coreclr/jit/emitxarch.cpp | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index c905f60410ded..e73bee5fb5ca4 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -1032,10 +1032,16 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const if (!IsEvexEncodableInstruction(ins)) { - // Can't be EVEX encoded ever + // Never supports the EVEX encoding return false; } + if (!IsVexEncodableInstruction(ins)) + { + // Only supports the EVEX encoding + return true; + } + if (HasHighSIMDReg(id) || (id->idOpSize() == EA_64BYTE) || HasMaskReg(id)) { // Requires the EVEX encoding due to used registers @@ -1044,14 +1050,28 @@ bool emitter::TakesEvexPrefix(const instrDesc* id) const if (HasEmbeddedBroadcast(id)) { + // Requires the EVEX encoding due to embedded functionality + // // TODO-XArch-AVX512: This needs to return true when the id includes: // * embedded rounding control // * other EVEX specific functionality return true; } - // Only supports the EVEX encoding or in stress mode to always use EVEX encoding - return !IsVexEncodableInstruction(ins) || emitComp->DoJitStressEvexEncoding(); +#if defined(DEBUG) + if (emitComp->DoJitStressEvexEncoding()) + { + // Requires the EVEX encoding due to STRESS mode and no change in semantics + // + // Some instructions, like VCMPEQW return the value in a SIMD register for + // VEX but in a MASK register for EVEX. Such instructions will have already + // returned TRUE if they should have used EVEX due to the HasMaskReg(id) + // check above so we need to still return false here to preserve semantics. + return !HasKMaskRegisterDest(ins); + } +#endif // DEBUG + + return false; } // Intel AVX-512 encoding is defined in "Intel 64 and ia-32 architectures software developer's manual volume 2", Section From 222e97207af11202ec019995a0a54dbf2d7a4c90 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 21 Mar 2023 06:49:11 -0700 Subject: [PATCH 11/16] Break apart an overly long assert --- src/coreclr/jit/emitxarch.cpp | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/src/coreclr/jit/emitxarch.cpp b/src/coreclr/jit/emitxarch.cpp index 56166f4a55b04..4fab45b3b94b9 100644 --- a/src/coreclr/jit/emitxarch.cpp +++ b/src/coreclr/jit/emitxarch.cpp @@ -13416,9 +13416,17 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc) // When SMALL_CODE is set, we only expect 4-byte alignment, otherwise // we expect the same alignment as the size of the constant. - assert((emitChkAlign == false) || (ins == INS_lea) || - ((emitComp->compCodeOpt() == Compiler::SMALL_CODE) && (((size_t)addr & 3) == 0)) || - (((size_t)addr & (byteSize - 1)) == 0)); + if (emitChkAlign && (ins != INS_lea)) + { + if (emitComp->compCodeOpt() == Compiler::SMALL_CODE) + { + assert((reinterpret_cast(addr) & 3) == 0); + } + else + { + assert((reinterpret_cast(addr) & (byteSize - 1)) == 0); + } + } #endif // DEBUG } else From 0b9dbb382bdd8d8d70301d15c9ca896a9d0d0f26 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 21 Mar 2023 09:18:53 -0700 Subject: [PATCH 12/16] Ensure IsBaselineVector512IsaSupported works on x86 --- src/coreclr/jit/compiler.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 0f83c632a6d16..a1f1a879f2b2d 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -9170,7 +9170,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // bool IsBaselineVector512IsaSupportedDebugOnly() const { -#ifdef TARGET_AMD64 +#ifdef TARGET_XARCH return (compIsaSupportedDebugOnly(InstructionSet_Vector512)); #else return false; @@ -9186,7 +9186,7 @@ XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX // bool IsBaselineVector512IsaSupported() const { -#ifdef TARGET_AMD64 +#ifdef TARGET_XARCH return (compExactlyDependsOn(InstructionSet_Vector512)); #else return false; From 9dfbd7fb4108381a1fc6314f070db8a1ce22c7af Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 21 Mar 2023 11:31:04 -0700 Subject: [PATCH 13/16] Block NI_Vector512_ExtractMostSignificantBits on x86 pending decomposition work --- src/coreclr/jit/hwintrinsicxarch.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index f572e538689d8..3887dc8016b77 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -1341,6 +1341,11 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic, case NI_Vector512_ExtractMostSignificantBits: { +#if defined(TARGET_X86) + // TODO-XARCH-CQ: It may be beneficial to decompose this operation + break; +#endif // TARGET_X86 + if (IsBaselineVector512IsaSupported()) { var_types simdType = getSIMDTypeForSize(simdSize); From 1f6fa3da996d16a7a65e98797910b6c7d938c642 Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 21 Mar 2023 12:06:30 -0700 Subject: [PATCH 14/16] Ensure we don't overwrite 64-byte alignment for SPMI --- src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp b/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp index c627a042d4f35..df29582d30b85 100644 --- a/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp +++ b/src/coreclr/tools/superpmi/superpmi/icorjitinfo.cpp @@ -1657,7 +1657,7 @@ void MyICJI::allocMem(AllocMemArgs* pArgs) { roDataAlignment = 64; } - if ((pArgs->flag & CORJIT_ALLOCMEM_FLG_RODATA_32BYTE_ALIGN) != 0) + else if ((pArgs->flag & CORJIT_ALLOCMEM_FLG_RODATA_32BYTE_ALIGN) != 0) { roDataAlignment = 32; } From b713f69bd3fa6698e480822587eb05f486de6d9c Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 21 Mar 2023 12:09:52 -0700 Subject: [PATCH 15/16] Include Vector512 HWIntrinsic tests by default --- src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_r.csproj | 4 ++-- src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_r.csproj b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_r.csproj index 696c3d04a6dd1..4e87ff605efc7 100644 --- a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_r.csproj +++ b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_r.csproj @@ -21,8 +21,8 @@ - - + + diff --git a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj index e9fd5bcfd4999..25f6749739539 100644 --- a/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj +++ b/src/tests/JIT/HardwareIntrinsics/HardwareIntrinsics_ro.csproj @@ -22,8 +22,8 @@ - - + + From 5dbbc054810cc1f7b92958a411df2b2fa23fb729 Mon Sep 17 00:00:00 2001 From: Bruce Forstall Date: Thu, 23 Mar 2023 23:50:14 -0700 Subject: [PATCH 16/16] WIP --- src/coreclr/inc/clrconfigvalues.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/inc/clrconfigvalues.h b/src/coreclr/inc/clrconfigvalues.h index 9dd74d914ad74..c2c28da395920 100644 --- a/src/coreclr/inc/clrconfigvalues.h +++ b/src/coreclr/inc/clrconfigvalues.h @@ -754,7 +754,7 @@ RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512CD, W("EnableAVX512CD"), 1 RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512CD_VL, W("EnableAVX512CD_VL"), 1, "Allows AVX512CD_VL+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512DQ, W("EnableAVX512DQ"), 1, "Allows AVX512DQ+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512DQ_VL, W("EnableAVX512DQ_VL"), 1, "Allows AVX512DQ_VL+ hardware intrinsics to be disabled") -RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512F, W("EnableAVX512F"), 1, "Allows AVX512F+ hardware intrinsics to be disabled") +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512F, W("EnableAVX512F"), 0, "Allows AVX512F+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVX512F_VL, W("EnableAVX512F_VL"), 1, "Allows AVX512F_VL+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableAVXVNNI, W("EnableAVXVNNI"), 1, "Allows AVX VNNI+ hardware intrinsics to be disabled") RETAIL_CONFIG_DWORD_INFO(EXTERNAL_EnableBMI1, W("EnableBMI1"), 1, "Allows BMI1+ hardware intrinsics to be disabled")