Skip to content

Commit b5948bf

Browse files
authored
AVX10.1 API introduction in JIT (#101938)
* Add AVX10v1 API surface * Define HWINTRINSIC for AVX10v1, AVX10v1_V256 and AVX10v1_V512 * Setup template testing for AVX10v1 APIs * Handle AVX10v1 APIs in JIT where equivalent AVX512* APIs are handled * Merge Avx10v1 and Avx10v1.V256. Rename Avx10.cs to Avx10v1.cs * Add Avx10v1 to relevant places * Fix CI errors. Add missing API in Avx10v1.PlatofrmNotSupported ad end line with a new character * Changes to be made with latest changes on main. Make appropriate comments. Update tests in template testing for Avx10v1 * Lower AVX10v1 hwintrinsic in lowering and gentree.cpp for simdSize 32/16 * Fix failures on GNR for AVX10v1 * Disable template tests disabled for Avx512 * Distinguish between Avx10v1 and Avx10v1/512, Add appropriate comments and clean up code in lowerCast * Remove duplicate code and rather use a single if condition * Use bool instead of compIsa checks where possible * remove duplication of code in shuffle * resolve review comments. Make evex encoding checks clear to read and resolve a bug in gtNewSimdCvtNode * Add FMA and Avx512F.X64 instructions to AVX10v1. Restructure code and compOpportunistic checks * Combine compOpportunistic checks with Avx10 check using IsAvx10OrIsaSupportedOpportunistically * Introduce a new internal ISA InstructionSet_EVEX and remove InstructionSet_AVX10v1_V256 to make space for the new ISA. Also change all the internal special intrinsic nodes for Avx512F on x86/x64 arch to evex nodes * Addressing review comments. resolving errors introduced when merged with main * fix formatting * Reorder declaration of InstructionSet_EVEX to proper position. Run formatting adn resolve errors introduced when merging with main
1 parent fa1acc6 commit b5948bf

File tree

115 files changed

+11280
-555
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

115 files changed

+11280
-555
lines changed

src/coreclr/inc/corinfoinstructionset.h

+84-80
Large diffs are not rendered by default.

src/coreclr/inc/jiteeversionguid.h

+5-5
Original file line numberDiff line numberDiff line change
@@ -43,11 +43,11 @@ typedef const GUID *LPCGUID;
4343
#define GUID_DEFINED
4444
#endif // !GUID_DEFINED
4545

46-
constexpr GUID JITEEVersionIdentifier = { /* 227e46fa-1be3-4770-b613-4a239e7c28aa */
47-
0x227e46fa,
48-
0x1be3,
49-
0x4770,
50-
{0xb6, 0x13, 0x4a, 0x23, 0x9e, 0x7c, 0x28, 0xaa}
46+
constexpr GUID JITEEVersionIdentifier = { /* 6e0b439f-0d18-4836-a486-4962af0cc948 */
47+
0x6e0b439f,
48+
0x0d18,
49+
0x4836,
50+
{0xa4, 0x86, 0x49, 0x62, 0xaf, 0x0c, 0xc9, 0x48}
5151
};
5252

5353
//////////////////////////////////////////////////////////////////////////////////////////////////////////

src/coreclr/inc/readytoruninstructionset.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -53,8 +53,8 @@ enum ReadyToRunInstructionSet
5353
READYTORUN_INSTRUCTION_Rcpc2=42,
5454
READYTORUN_INSTRUCTION_Sve=43,
5555
READYTORUN_INSTRUCTION_Avx10v1=44,
56-
READYTORUN_INSTRUCTION_Avx10v1_V256=45,
5756
READYTORUN_INSTRUCTION_Avx10v1_V512=46,
57+
READYTORUN_INSTRUCTION_EVEX=47,
5858

5959
};
6060

src/coreclr/jit/assertionprop.cpp

+2-2
Original file line numberDiff line numberDiff line change
@@ -3184,8 +3184,8 @@ bool Compiler::optIsProfitableToSubstitute(GenTree* dest, BasicBlock* destBlock,
31843184
return (simdBaseType == TYP_FLOAT) && vecCon->IsZero();
31853185
}
31863186

3187-
case NI_AVX512F_CompareEqualMask:
3188-
case NI_AVX512F_CompareNotEqualMask:
3187+
case NI_EVEX_CompareEqualMask:
3188+
case NI_EVEX_CompareNotEqualMask:
31893189
{
31903190
// We can optimize when the constant is zero, but only
31913191
// for non floating-point since +0.0 == -0.0

src/coreclr/jit/codegencommon.cpp

+32-2
Original file line numberDiff line numberDiff line change
@@ -1827,7 +1827,22 @@ void CodeGen::genGenerateMachineCode()
18271827
#if defined(TARGET_X86)
18281828
if (compiler->canUseEvexEncoding())
18291829
{
1830-
printf("X86 with AVX512");
1830+
if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
1831+
{
1832+
if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v1_V512))
1833+
{
1834+
printf("X86 with AVX10/512");
1835+
}
1836+
else
1837+
{
1838+
printf("X86 with AVX10/256");
1839+
}
1840+
}
1841+
else
1842+
{
1843+
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
1844+
printf("X86 with AVX512");
1845+
}
18311846
}
18321847
else if (compiler->canUseVexEncoding())
18331848
{
@@ -1840,7 +1855,22 @@ void CodeGen::genGenerateMachineCode()
18401855
#elif defined(TARGET_AMD64)
18411856
if (compiler->canUseEvexEncoding())
18421857
{
1843-
printf("X64 with AVX512");
1858+
if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
1859+
{
1860+
if (compiler->compOpportunisticallyDependsOn(InstructionSet_AVX10v1_V512))
1861+
{
1862+
printf("X86 with AVX10/512");
1863+
}
1864+
else
1865+
{
1866+
printf("X86 with AVX10/256");
1867+
}
1868+
}
1869+
else
1870+
{
1871+
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
1872+
printf("X86 with AVX512");
1873+
}
18441874
}
18451875
else if (compiler->canUseVexEncoding())
18461876
{

src/coreclr/jit/codegenxarch.cpp

+24-13
Original file line numberDiff line numberDiff line change
@@ -465,7 +465,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
465465
{
466466
if (emitter::isHighSimdReg(targetReg))
467467
{
468-
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
468+
assert(compiler->canUseEvexEncodingDebugOnly());
469469
emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, attr, targetReg, targetReg, targetReg,
470470
static_cast<int8_t>(0xFF), INS_OPTS_NONE);
471471
}
@@ -492,7 +492,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
492492
{
493493
if (emitter::isHighSimdReg(targetReg))
494494
{
495-
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
495+
assert(compiler->canUseEvexEncodingDebugOnly());
496496
emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, attr, targetReg, targetReg, targetReg,
497497
static_cast<int8_t>(0xFF), INS_OPTS_NONE);
498498
}
@@ -521,7 +521,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
521521
{
522522
if (emitter::isHighSimdReg(targetReg))
523523
{
524-
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
524+
assert(compiler->canUseEvexEncodingDebugOnly());
525525
emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, attr, targetReg, targetReg, targetReg,
526526
static_cast<int8_t>(0xFF), INS_OPTS_NONE);
527527
}
@@ -548,7 +548,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, simd_t
548548
{
549549
if (emitter::isHighSimdReg(targetReg))
550550
{
551-
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
551+
assert(compiler->canUseEvexEncodingDebugOnly());
552552
emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, attr, targetReg, targetReg, targetReg,
553553
static_cast<int8_t>(0xFF), INS_OPTS_NONE);
554554
}
@@ -667,7 +667,7 @@ void CodeGen::genSetRegToConst(regNumber targetReg, var_types targetType, GenTre
667667
{
668668
if (emitter::isHighSimdReg(targetReg))
669669
{
670-
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
670+
assert(compiler->canUseEvexEncodingDebugOnly());
671671
emit->emitIns_SIMD_R_R_R_I(INS_vpternlogd, EA_16BYTE, targetReg, targetReg, targetReg,
672672
static_cast<int8_t>(0xFF), INS_OPTS_NONE);
673673
}
@@ -5654,6 +5654,8 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
56545654
case NI_AVX512F_ExtractVector256:
56555655
case NI_AVX512DQ_ExtractVector128:
56565656
case NI_AVX512DQ_ExtractVector256:
5657+
case NI_AVX10v1_V512_ExtractVector128:
5658+
case NI_AVX10v1_V512_ExtractVector256:
56575659
{
56585660
// These intrinsics are "ins reg/mem, xmm, imm8"
56595661
ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
@@ -5682,6 +5684,8 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
56825684
case NI_AVX512F_ConvertToVector256UInt32:
56835685
case NI_AVX512F_VL_ConvertToVector128UInt32:
56845686
case NI_AVX512F_VL_ConvertToVector128UInt32WithSaturation:
5687+
case NI_AVX10v1_ConvertToVector128UInt32:
5688+
case NI_AVX10v1_ConvertToVector128UInt32WithSaturation:
56855689
{
56865690
assert(!varTypeIsFloating(baseType));
56875691
FALLTHROUGH;
@@ -5719,6 +5723,16 @@ void CodeGen::genCodeForStoreInd(GenTreeStoreInd* tree)
57195723
case NI_AVX512BW_VL_ConvertToVector128ByteWithSaturation:
57205724
case NI_AVX512BW_VL_ConvertToVector128SByte:
57215725
case NI_AVX512BW_VL_ConvertToVector128SByteWithSaturation:
5726+
case NI_AVX10v1_ConvertToVector128Byte:
5727+
case NI_AVX10v1_ConvertToVector128ByteWithSaturation:
5728+
case NI_AVX10v1_ConvertToVector128Int16:
5729+
case NI_AVX10v1_ConvertToVector128Int16WithSaturation:
5730+
case NI_AVX10v1_ConvertToVector128Int32:
5731+
case NI_AVX10v1_ConvertToVector128Int32WithSaturation:
5732+
case NI_AVX10v1_ConvertToVector128SByte:
5733+
case NI_AVX10v1_ConvertToVector128SByteWithSaturation:
5734+
case NI_AVX10v1_ConvertToVector128UInt16:
5735+
case NI_AVX10v1_ConvertToVector128UInt16WithSaturation:
57225736
{
57235737
// These intrinsics are "ins reg/mem, xmm"
57245738
ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
@@ -7324,13 +7338,11 @@ void CodeGen::genIntToFloatCast(GenTree* treeNode)
73247338
// Also we don't expect to see uint32 -> float/double and uint64 -> float conversions
73257339
// here since they should have been lowered appropriately.
73267340
noway_assert(srcType != TYP_UINT);
7327-
assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT) ||
7328-
compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
7341+
assert((srcType != TYP_ULONG) || (dstType != TYP_FLOAT) || compiler->canUseEvexEncodingDebugOnly());
73297342

7330-
if ((srcType == TYP_ULONG) && varTypeIsFloating(dstType) &&
7331-
compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F))
7343+
if ((srcType == TYP_ULONG) && varTypeIsFloating(dstType) && compiler->canUseEvexEncoding())
73327344
{
7333-
assert(compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
7345+
assert(compiler->canUseEvexEncodingDebugOnly());
73347346
genConsumeOperands(treeNode->AsOp());
73357347
instruction ins = ins_FloatConv(dstType, srcType, emitTypeSize(srcType));
73367348
GetEmitter()->emitInsBinary(ins, emitTypeSize(srcType), treeNode, op1);
@@ -7458,13 +7470,12 @@ void CodeGen::genFloatToIntCast(GenTree* treeNode)
74587470
// into a helper call by either front-end or lowering phase, unless we have AVX512F
74597471
// accelerated conversions.
74607472
assert(!varTypeIsUnsigned(dstType) || (dstSize != EA_ATTR(genTypeSize(TYP_LONG))) ||
7461-
compiler->compIsaSupportedDebugOnly(InstructionSet_AVX512F));
7473+
compiler->canUseEvexEncodingDebugOnly());
74627474

74637475
// If the dstType is TYP_UINT, we have 32-bits to encode the
74647476
// float number. Any of 33rd or above bits can be the sign bit.
74657477
// To achieve it we pretend as if we are converting it to a long.
7466-
if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))) &&
7467-
!compiler->compOpportunisticallyDependsOn(InstructionSet_AVX512F))
7478+
if (varTypeIsUnsigned(dstType) && (dstSize == EA_ATTR(genTypeSize(TYP_INT))) && !compiler->canUseEvexEncoding())
74687479
{
74697480
dstType = TYP_LONG;
74707481
}

src/coreclr/jit/compiler.cpp

-1
Original file line numberDiff line numberDiff line change
@@ -2307,7 +2307,6 @@ void Compiler::compSetProcessor()
23072307
{
23082308
instructionSetFlags.AddInstructionSet(InstructionSet_Vector256);
23092309
}
2310-
23112310
// x86-64-v4 feature level supports AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL
23122311
// These have been shipped together historically and at the time of this writing
23132312
// there exists no hardware which doesn't support the entire feature set. To simplify

src/coreclr/jit/compiler.h

+93-1
Original file line numberDiff line numberDiff line change
@@ -9554,6 +9554,14 @@ class Compiler
95549554
return opts.compSupportsISA.HasInstructionSet(isa);
95559555
}
95569556

9557+
// Following cases should be taken into consideration when using the below APIs:
9558+
// InstructionSet_EVEX implies Avx10v1 -or- Avx512F+CD+DQ+BW+VL and can be used for 128-bit or 256-bit EVEX encoding
9559+
// instructions in these instruction sets InstructionSet_Avx10v1_V512 should never be queried directly, it is
9560+
// covered by querying Avx512* InstructionSet_Avx512F (and same for BW, CD, DQ) is only queried for 512-bit EVEX
9561+
// encoded instructions
9562+
// InstructionSet_Avx10v1 is only queried for cases like 128-bit/256-bit instructions that wouldn't be in
9563+
// F+CD+DQ+BW+VL (such as VBMI) and should appear with a corresponding query around AVX512*_VL (i.e. AVX512_VBMI_VL)
9564+
95579565
#ifdef DEBUG
95589566
//------------------------------------------------------------------------
95599567
// IsBaselineVector512IsaSupportedDebugOnly - Does isa support exist for Vector512.
@@ -9567,6 +9575,42 @@ class Compiler
95679575
return compIsaSupportedDebugOnly(InstructionSet_AVX512F);
95689576
#else
95699577
return false;
9578+
#endif
9579+
}
9580+
9581+
//------------------------------------------------------------------------
9582+
// canUseEvexEncodingDebugOnly - Answer the question: Is Evex encoding supported on this target.
9583+
//
9584+
// Returns:
9585+
// `true` if Evex encoding is supported, `false` if not.
9586+
//
9587+
bool canUseEvexEncodingDebugOnly() const
9588+
{
9589+
#ifdef TARGET_XARCH
9590+
return (compIsaSupportedDebugOnly(InstructionSet_EVEX));
9591+
#else
9592+
return false;
9593+
#endif
9594+
}
9595+
9596+
//------------------------------------------------------------------------
9597+
// IsAvx10OrIsaSupportedDebugOnly - Answer the question: Is AVX10v1 or the given ISA supported.
9598+
//
9599+
// Returns:
9600+
// `true` if AVX10v1 or the given ISA is supported, `false` if not.
9601+
//
9602+
bool IsAvx10OrIsaSupportedDebugOnly(CORINFO_InstructionSet isa) const
9603+
{
9604+
#ifdef TARGET_XARCH
9605+
// For the below cases, check for evex encoding should be used.
9606+
assert(isa != InstructionSet_AVX512F || isa != InstructionSet_AVX512F_VL || isa != InstructionSet_AVX512BW ||
9607+
isa != InstructionSet_AVX512BW_VL || isa != InstructionSet_AVX512CD ||
9608+
isa != InstructionSet_AVX512CD_VL || isa != InstructionSet_AVX512DQ ||
9609+
isa != InstructionSet_AVX512DQ_VL);
9610+
9611+
return (compIsaSupportedDebugOnly(InstructionSet_AVX10v1) || compIsaSupportedDebugOnly(isa));
9612+
#else
9613+
return false;
95709614
#endif
95719615
}
95729616
#endif // DEBUG
@@ -9586,6 +9630,21 @@ class Compiler
95869630
#endif
95879631
}
95889632

9633+
//------------------------------------------------------------------------
9634+
// IsAvx10OrIsaSupportedOpportunistically - Does opportunistic isa support exist for AVX10v1 or the given ISA.
9635+
//
9636+
// Returns:
9637+
// `true` if AVX10v1 or the given ISA is supported, `false` if not.
9638+
//
9639+
bool IsAvx10OrIsaSupportedOpportunistically(CORINFO_InstructionSet isa) const
9640+
{
9641+
#ifdef TARGET_XARCH
9642+
return (compOpportunisticallyDependsOn(InstructionSet_AVX10v1) || compOpportunisticallyDependsOn(isa));
9643+
#else
9644+
return false;
9645+
#endif
9646+
}
9647+
95899648
bool canUseEmbeddedBroadcast() const
95909649
{
95919650
return JitConfig.EnableEmbeddedBroadcast();
@@ -9598,6 +9657,35 @@ class Compiler
95989657

95999658
#ifdef TARGET_XARCH
96009659
public:
9660+
9661+
//------------------------------------------------------------------------
9662+
// compIsEvexOpportunisticallySupported - Checks for whether AVX10v1 or avx512InstructionSet is supported
9663+
// opportunistically.
9664+
//
9665+
// Returns:
9666+
// returns true if AVX10v1 or avx512InstructionSet is supported opportunistically and
9667+
// sets isV512Supported to true if AVX512F is supported, false otherwise.
9668+
//
9669+
bool compIsEvexOpportunisticallySupported(bool& isV512Supported,
9670+
CORINFO_InstructionSet avx512InstructionSet = InstructionSet_AVX512F)
9671+
{
9672+
assert(avx512InstructionSet == InstructionSet_AVX512F || avx512InstructionSet == InstructionSet_AVX512F_VL ||
9673+
avx512InstructionSet == InstructionSet_AVX512BW || avx512InstructionSet == InstructionSet_AVX512BW_VL ||
9674+
avx512InstructionSet == InstructionSet_AVX512CD || avx512InstructionSet == InstructionSet_AVX512CD_VL ||
9675+
avx512InstructionSet == InstructionSet_AVX512DQ || avx512InstructionSet == InstructionSet_AVX512DQ_VL ||
9676+
avx512InstructionSet == InstructionSet_AVX512VBMI ||
9677+
avx512InstructionSet == InstructionSet_AVX512VBMI_VL);
9678+
9679+
if (compOpportunisticallyDependsOn(avx512InstructionSet))
9680+
{
9681+
isV512Supported = true;
9682+
return true;
9683+
}
9684+
9685+
isV512Supported = false;
9686+
return compOpportunisticallyDependsOn(InstructionSet_AVX10v1);
9687+
}
9688+
96019689
bool canUseVexEncoding() const
96029690
{
96039691
return compOpportunisticallyDependsOn(InstructionSet_AVX);
@@ -9611,7 +9699,7 @@ class Compiler
96119699
//
96129700
bool canUseEvexEncoding() const
96139701
{
9614-
return compOpportunisticallyDependsOn(InstructionSet_AVX512F);
9702+
return (compOpportunisticallyDependsOn(InstructionSet_EVEX));
96159703
}
96169704

96179705
private:
@@ -9641,6 +9729,10 @@ class Compiler
96419729

96429730
return true;
96439731
}
9732+
else if (JitConfig.JitStressEvexEncoding() && compOpportunisticallyDependsOn(InstructionSet_AVX10v1))
9733+
{
9734+
return true;
9735+
}
96449736
#endif // DEBUG
96459737

96469738
return false;

0 commit comments

Comments
 (0)