Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 9bffa42

Browse files
committedJun 10, 2024·
Cleanup some handling around Avx10v1
1 parent c87d73c commit 9bffa42

File tree

11 files changed

+96
-599
lines changed

11 files changed

+96
-599
lines changed
 

‎src/coreclr/jit/compiler.cpp

+39-30
Original file line numberDiff line numberDiff line change
@@ -2307,37 +2307,46 @@ void Compiler::compSetProcessor()
23072307
{
23082308
instructionSetFlags.AddInstructionSet(InstructionSet_Vector256);
23092309
}
2310-
// x86-64-v4 feature level supports AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL
2311-
// These have been shipped together historically and at the time of this writing
2312-
// there exists no hardware which doesn't support the entire feature set. To simplify
2313-
// the overall JIT implementation, we currently require the entire set of ISAs to be
2314-
// supported and disable AVX512 support otherwise.
2315-
2316-
if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F))
2317-
{
2318-
assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F));
2319-
assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL));
2320-
assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW));
2321-
assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW_VL));
2322-
assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD));
2323-
assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD_VL));
2324-
assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ));
2325-
assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ_VL));
2326-
2327-
instructionSetFlags.AddInstructionSet(InstructionSet_Vector512);
2328-
2329-
if ((preferredVectorByteLength == 0) && jitFlags.IsSet(JitFlags::JIT_FLAG_VECTOR512_THROTTLING))
2330-
{
2331-
// Some architectures can experience frequency throttling when
2332-
// executing 512-bit width instructions. To account for this we set the
2333-
// default preferred vector width to 256-bits in some scenarios. Power
2334-
// users can override this with `DOTNET_PreferredVectorBitWidth=512` to
2335-
// allow using such instructions where hardware support is available.
2336-
//
2337-
// Do not condition this based on stress mode as it makes the support
2338-
// reported inconsistent across methods and breaks expectations/functionality
23392310

2340-
preferredVectorByteLength = 256 / 8;
2311+
if (instructionSetFlags.HasInstructionSet(InstructionSet_EVEX))
2312+
{
2313+
if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F))
2314+
{
2315+
// x86-64-v4 feature level supports AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL
2316+
// These have been shipped together historically and at the time of this writing
2317+
// there exists no hardware which doesn't support the entire feature set. To simplify
2318+
// the overall JIT implementation, we currently require the entire set of ISAs to be
2319+
// supported and disable AVX512 support otherwise.
2320+
2321+
assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F));
2322+
assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL));
2323+
assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW));
2324+
assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW_VL));
2325+
assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD));
2326+
assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD_VL));
2327+
assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ));
2328+
assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ_VL));
2329+
2330+
instructionSetFlags.AddInstructionSet(InstructionSet_Vector512);
2331+
2332+
if ((preferredVectorByteLength == 0) && jitFlags.IsSet(JitFlags::JIT_FLAG_VECTOR512_THROTTLING))
2333+
{
2334+
// Some architectures can experience frequency throttling when
2335+
// executing 512-bit width instructions. To account for this we set the
2336+
// default preferred vector width to 256-bits in some scenarios. Power
2337+
// users can override this with `DOTNET_PreferredVectorBitWidth=512` to
2338+
// allow using such instructions where hardware support is available.
2339+
//
2340+
// Do not condition this based on stress mode as it makes the support
2341+
// reported inconsistent across methods and breaks expectations/functionality
2342+
2343+
preferredVectorByteLength = 256 / 8;
2344+
}
2345+
}
2346+
else
2347+
{
2348+
// We shouldn't have EVEX enabled if neither AVX512 nor AVX10v1 are supported
2349+
assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX10v1));
23412350
}
23422351
}
23432352

‎src/coreclr/jit/hwintrinsiclistxarch.h

-10
Original file line numberDiff line numberDiff line change
@@ -1185,18 +1185,8 @@ HARDWARE_INTRINSIC(AVX10v1, GetMantissaScalar,
11851185
HARDWARE_INTRINSIC(AVX10v1, LeadingZeroCount, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vplzcntd, INS_vplzcntd, INS_vplzcntq, INS_vplzcntq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
11861186
HARDWARE_INTRINSIC(AVX10v1, Max, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaxsq, INS_vpmaxuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_Commutative)
11871187
HARDWARE_INTRINSIC(AVX10v1, Min, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpminsq, INS_vpminuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_Commutative)
1188-
HARDWARE_INTRINSIC(AVX10v1, MultiplyAdd, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ps, INS_vfmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
1189-
HARDWARE_INTRINSIC(AVX10v1, MultiplyAddNegated, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ps, INS_vfnmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
1190-
HARDWARE_INTRINSIC(AVX10v1, MultiplyAddNegatedScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ss, INS_vfnmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits)
1191-
HARDWARE_INTRINSIC(AVX10v1, MultiplyAddScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ss, INS_vfmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits)
1192-
HARDWARE_INTRINSIC(AVX10v1, MultiplyAddSubtract, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmaddsub213ps, INS_vfmaddsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
11931188
HARDWARE_INTRINSIC(AVX10v1, MultiplyLow, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible)
11941189
HARDWARE_INTRINSIC(AVX10v1, MultiplyScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_mulsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible)
1195-
HARDWARE_INTRINSIC(AVX10v1, MultiplySubtract, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ps, INS_vfmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
1196-
HARDWARE_INTRINSIC(AVX10v1, MultiplySubtractAdd, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsubadd213ps, INS_vfmsubadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
1197-
HARDWARE_INTRINSIC(AVX10v1, MultiplySubtractNegated, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ps, INS_vfnmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
1198-
HARDWARE_INTRINSIC(AVX10v1, MultiplySubtractNegatedScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ss, INS_vfnmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits)
1199-
HARDWARE_INTRINSIC(AVX10v1, MultiplySubtractScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ss, INS_vfmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits)
12001190
HARDWARE_INTRINSIC(AVX10v1, PermuteVar16x16, 32, 2, false, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible)
12011191
HARDWARE_INTRINSIC(AVX10v1, PermuteVar16x16x2, 32, 3, false, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible)
12021192
HARDWARE_INTRINSIC(AVX10v1, PermuteVar16x8, 16, 2, false, {INS_vpermb, INS_vpermb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible)

‎src/coreclr/jit/hwintrinsicxarch.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -970,8 +970,8 @@ GenTree* Compiler::impNonConstFallback(NamedIntrinsic intrinsic, var_types simdT
970970
static_assert_no_msg(NI_AVX512F_RotateLeftVariable == (NI_AVX512F_RotateLeft + 1));
971971
static_assert_no_msg(NI_AVX512F_RotateRightVariable == (NI_AVX512F_RotateRight + 1));
972972
static_assert_no_msg(NI_AVX512F_VL_RotateLeftVariable == (NI_AVX512F_VL_RotateLeft + 1));
973-
static_assert_no_msg(NI_AVX10v1_RotateLeftVariable == (NI_AVX10v1_RotateLeft + 1));
974973
static_assert_no_msg(NI_AVX512F_VL_RotateRightVariable == (NI_AVX512F_VL_RotateRight + 1));
974+
static_assert_no_msg(NI_AVX10v1_RotateLeftVariable == (NI_AVX10v1_RotateLeft + 1));
975975
static_assert_no_msg(NI_AVX10v1_RotateRightVariable == (NI_AVX10v1_RotateRight + 1));
976976

977977
impSpillSideEffect(true,

‎src/coreclr/jit/importercalls.cpp

+10-10
Original file line numberDiff line numberDiff line change
@@ -4012,11 +4012,8 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,
40124012
op2 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op2, callJitType, 16);
40134013
op1 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op1, callJitType, 16);
40144014

4015-
retNode = compOpportunisticallyDependsOn(InstructionSet_AVX10v1)
4016-
? gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, NI_AVX10v1_MultiplyAddScalar,
4017-
callJitType, 16)
4018-
: gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, NI_FMA_MultiplyAddScalar,
4019-
callJitType, 16);
4015+
retNode =
4016+
gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, NI_FMA_MultiplyAddScalar, callJitType, 16);
40204017

40214018
retNode = gtNewSimdToScalarNode(callType, retNode, callJitType, 16);
40224019
break;
@@ -9298,8 +9295,9 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method,
92989295
#if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH)
92999296
if (!isMagnitude && compOpportunisticallyDependsOn(InstructionSet_SSE2))
93009297
{
9301-
bool needsFixup = false;
9302-
bool canHandle = false;
9298+
bool needsFixup = false;
9299+
bool canHandle = false;
9300+
bool isV512Supported = false;
93039301

93049302
if (isMax)
93059303
{
@@ -9328,7 +9326,7 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method,
93289326
needsFixup = cnsNode->IsFloatPositiveZero();
93299327
}
93309328

9331-
if (!needsFixup || compOpportunisticallyDependsOn(InstructionSet_AVX512F))
9329+
if (!needsFixup || compIsEvexOpportunisticallySupported(isV512Supported))
93329330
{
93339331
// Given the checks, op1 can safely be the cns and op2 the other node
93349332

@@ -9369,7 +9367,7 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method,
93699367
needsFixup = cnsNode->IsFloatNegativeZero();
93709368
}
93719369

9372-
if (!needsFixup || compOpportunisticallyDependsOn(InstructionSet_AVX512F))
9370+
if (!needsFixup || compIsEvexOpportunisticallySupported(isV512Supported))
93739371
{
93749372
// Given the checks, op1 can safely be the cns and op2 the other node
93759373

@@ -9453,8 +9451,10 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method,
94539451
tbl->gtSimdVal.i32[0] = 0x0700;
94549452
}
94559453

9454+
NamedIntrinsic fixupScalarId = isV512Supported ? NI_AVX512F_FixupScalar : NI_AVX10v1_FixupScalar;
9455+
94569456
retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, retNode, op2Clone, tbl, gtNewIconNode(0),
9457-
NI_AVX512F_FixupScalar, callJitType, 16);
9457+
fixupScalarId, callJitType, 16);
94589458
}
94599459

94609460
if (isNumber)

0 commit comments

Comments
 (0)
Please sign in to comment.