diff --git a/src/coreclr/inc/corinfoinstructionset.h b/src/coreclr/inc/corinfoinstructionset.h index d83f6c0cc94a4e..9a3c0efc709275 100644 --- a/src/coreclr/inc/corinfoinstructionset.h +++ b/src/coreclr/inc/corinfoinstructionset.h @@ -584,12 +584,12 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_SSE41); if (resultflags.HasInstructionSet(InstructionSet_SSE42) && !resultflags.HasInstructionSet(InstructionSet_SSE41)) resultflags.RemoveInstructionSet(InstructionSet_SSE42); + if (resultflags.HasInstructionSet(InstructionSet_POPCNT) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) + resultflags.RemoveInstructionSet(InstructionSet_POPCNT); if (resultflags.HasInstructionSet(InstructionSet_AVX) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) resultflags.RemoveInstructionSet(InstructionSet_AVX); if (resultflags.HasInstructionSet(InstructionSet_AVX2) && !resultflags.HasInstructionSet(InstructionSet_AVX)) resultflags.RemoveInstructionSet(InstructionSet_AVX2); - if (resultflags.HasInstructionSet(InstructionSet_AES) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) - resultflags.RemoveInstructionSet(InstructionSet_AES); if (resultflags.HasInstructionSet(InstructionSet_BMI1) && !resultflags.HasInstructionSet(InstructionSet_AVX)) resultflags.RemoveInstructionSet(InstructionSet_BMI1); if (resultflags.HasInstructionSet(InstructionSet_BMI2) && !resultflags.HasInstructionSet(InstructionSet_AVX)) @@ -598,22 +598,8 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_FMA); if (resultflags.HasInstructionSet(InstructionSet_LZCNT) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_LZCNT); - if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) - resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ); - if (resultflags.HasInstructionSet(InstructionSet_POPCNT) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) - resultflags.RemoveInstructionSet(InstructionSet_POPCNT); - if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_SSE)) - resultflags.RemoveInstructionSet(InstructionSet_Vector128); - if (resultflags.HasInstructionSet(InstructionSet_Vector256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) - resultflags.RemoveInstructionSet(InstructionSet_Vector256); - if (resultflags.HasInstructionSet(InstructionSet_Vector512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_Vector512); - if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) - resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet_MOVBE) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) resultflags.RemoveInstructionSet(InstructionSet_MOVBE); - if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) - resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet_EVEX) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_EVEX); if (resultflags.HasInstructionSet(InstructionSet_EVEX) && !resultflags.HasInstructionSet(InstructionSet_FMA)) @@ -646,10 +632,14 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); - if (resultflags.HasInstructionSet(InstructionSet_EVEX) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) - resultflags.RemoveInstructionSet(InstructionSet_EVEX); - if (resultflags.HasInstructionSet(InstructionSet_EVEX) && !resultflags.HasInstructionSet(InstructionSet_FMA)) - resultflags.RemoveInstructionSet(InstructionSet_EVEX); + if (resultflags.HasInstructionSet(InstructionSet_AES) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + resultflags.RemoveInstructionSet(InstructionSet_AES); + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); + if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) + resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_EVEX)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1)) @@ -674,18 +664,18 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); + if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_SSE)) + resultflags.RemoveInstructionSet(InstructionSet_Vector128); + if (resultflags.HasInstructionSet(InstructionSet_Vector256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) + resultflags.RemoveInstructionSet(InstructionSet_Vector256); + if (resultflags.HasInstructionSet(InstructionSet_Vector512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + resultflags.RemoveInstructionSet(InstructionSet_Vector512); if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) resultflags.RemoveInstructionSet(InstructionSet_VectorT128); if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_VectorT256); if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) resultflags.RemoveInstructionSet(InstructionSet_VectorT512); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F); #endif // TARGET_AMD64 #ifdef TARGET_X86 if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) @@ -700,12 +690,12 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_SSE41); if (resultflags.HasInstructionSet(InstructionSet_SSE42) && !resultflags.HasInstructionSet(InstructionSet_SSE41)) resultflags.RemoveInstructionSet(InstructionSet_SSE42); + if (resultflags.HasInstructionSet(InstructionSet_POPCNT) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) + resultflags.RemoveInstructionSet(InstructionSet_POPCNT); if (resultflags.HasInstructionSet(InstructionSet_AVX) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) resultflags.RemoveInstructionSet(InstructionSet_AVX); if (resultflags.HasInstructionSet(InstructionSet_AVX2) && !resultflags.HasInstructionSet(InstructionSet_AVX)) resultflags.RemoveInstructionSet(InstructionSet_AVX2); - if (resultflags.HasInstructionSet(InstructionSet_AES) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) - resultflags.RemoveInstructionSet(InstructionSet_AES); if (resultflags.HasInstructionSet(InstructionSet_BMI1) && !resultflags.HasInstructionSet(InstructionSet_AVX)) resultflags.RemoveInstructionSet(InstructionSet_BMI1); if (resultflags.HasInstructionSet(InstructionSet_BMI2) && !resultflags.HasInstructionSet(InstructionSet_AVX)) @@ -714,22 +704,8 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_FMA); if (resultflags.HasInstructionSet(InstructionSet_LZCNT) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) resultflags.RemoveInstructionSet(InstructionSet_LZCNT); - if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) - resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ); - if (resultflags.HasInstructionSet(InstructionSet_POPCNT) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) - resultflags.RemoveInstructionSet(InstructionSet_POPCNT); - if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_SSE)) - resultflags.RemoveInstructionSet(InstructionSet_Vector128); - if (resultflags.HasInstructionSet(InstructionSet_Vector256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) - resultflags.RemoveInstructionSet(InstructionSet_Vector256); - if (resultflags.HasInstructionSet(InstructionSet_Vector512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) - resultflags.RemoveInstructionSet(InstructionSet_Vector512); - if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) - resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet_MOVBE) && !resultflags.HasInstructionSet(InstructionSet_SSE42)) resultflags.RemoveInstructionSet(InstructionSet_MOVBE); - if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) - resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet_EVEX) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_EVEX); if (resultflags.HasInstructionSet(InstructionSet_EVEX) && !resultflags.HasInstructionSet(InstructionSet_FMA)) @@ -762,10 +738,14 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); if (resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX512VBMI_VL); - if (resultflags.HasInstructionSet(InstructionSet_EVEX) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) - resultflags.RemoveInstructionSet(InstructionSet_EVEX); - if (resultflags.HasInstructionSet(InstructionSet_EVEX) && !resultflags.HasInstructionSet(InstructionSet_FMA)) - resultflags.RemoveInstructionSet(InstructionSet_EVEX); + if (resultflags.HasInstructionSet(InstructionSet_AES) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + resultflags.RemoveInstructionSet(InstructionSet_AES); + if (resultflags.HasInstructionSet(InstructionSet_PCLMULQDQ) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) + resultflags.RemoveInstructionSet(InstructionSet_PCLMULQDQ); + if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) + resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI); + if (resultflags.HasInstructionSet(InstructionSet_X86Serialize) && !resultflags.HasInstructionSet(InstructionSet_X86Base)) + resultflags.RemoveInstructionSet(InstructionSet_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet_AVX10v1) && !resultflags.HasInstructionSet(InstructionSet_EVEX)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX10v1)) @@ -790,18 +770,18 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); if (resultflags.HasInstructionSet(InstructionSet_AVX10v1_V512) && !resultflags.HasInstructionSet(InstructionSet_AVX512VBMI_VL)) resultflags.RemoveInstructionSet(InstructionSet_AVX10v1_V512); + if (resultflags.HasInstructionSet(InstructionSet_Vector128) && !resultflags.HasInstructionSet(InstructionSet_SSE)) + resultflags.RemoveInstructionSet(InstructionSet_Vector128); + if (resultflags.HasInstructionSet(InstructionSet_Vector256) && !resultflags.HasInstructionSet(InstructionSet_AVX)) + resultflags.RemoveInstructionSet(InstructionSet_Vector256); + if (resultflags.HasInstructionSet(InstructionSet_Vector512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) + resultflags.RemoveInstructionSet(InstructionSet_Vector512); if (resultflags.HasInstructionSet(InstructionSet_VectorT128) && !resultflags.HasInstructionSet(InstructionSet_SSE2)) resultflags.RemoveInstructionSet(InstructionSet_VectorT128); if (resultflags.HasInstructionSet(InstructionSet_VectorT256) && !resultflags.HasInstructionSet(InstructionSet_AVX2)) resultflags.RemoveInstructionSet(InstructionSet_VectorT256); if (resultflags.HasInstructionSet(InstructionSet_VectorT512) && !resultflags.HasInstructionSet(InstructionSet_AVX512F)) resultflags.RemoveInstructionSet(InstructionSet_VectorT512); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512BW_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512CD_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet_AVX512F) && !resultflags.HasInstructionSet(InstructionSet_AVX512DQ_VL)) - resultflags.RemoveInstructionSet(InstructionSet_AVX512F); #endif // TARGET_X86 } while (!oldflags.Equals(resultflags)); diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index aefbe6653a51a3..74f564b2bef26e 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -2307,37 +2307,46 @@ void Compiler::compSetProcessor() { instructionSetFlags.AddInstructionSet(InstructionSet_Vector256); } - // x86-64-v4 feature level supports AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL - // These have been shipped together historically and at the time of this writing - // there exists no hardware which doesn't support the entire feature set. To simplify - // the overall JIT implementation, we currently require the entire set of ISAs to be - // supported and disable AVX512 support otherwise. - - if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F)) - { - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW_VL)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD_VL)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ)); - assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ_VL)); - - instructionSetFlags.AddInstructionSet(InstructionSet_Vector512); - - if ((preferredVectorByteLength == 0) && jitFlags.IsSet(JitFlags::JIT_FLAG_VECTOR512_THROTTLING)) - { - // Some architectures can experience frequency throttling when - // executing 512-bit width instructions. To account for this we set the - // default preferred vector width to 256-bits in some scenarios. Power - // users can override this with `DOTNET_PreferredVectorBitWidth=512` to - // allow using such instructions where hardware support is available. - // - // Do not condition this based on stress mode as it makes the support - // reported inconsistent across methods and breaks expectations/functionality - preferredVectorByteLength = 256 / 8; + if (instructionSetFlags.HasInstructionSet(InstructionSet_EVEX)) + { + if (instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F)) + { + // x86-64-v4 feature level supports AVX512F, AVX512BW, AVX512CD, AVX512DQ, AVX512VL + // These have been shipped together historically and at the time of this writing + // there exists no hardware which doesn't support the entire feature set. To simplify + // the overall JIT implementation, we currently require the entire set of ISAs to be + // supported and disable AVX512 support otherwise. + + assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F)); + assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512F_VL)); + assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW)); + assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512BW_VL)); + assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD)); + assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512CD_VL)); + assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ)); + assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX512DQ_VL)); + + instructionSetFlags.AddInstructionSet(InstructionSet_Vector512); + + if ((preferredVectorByteLength == 0) && jitFlags.IsSet(JitFlags::JIT_FLAG_VECTOR512_THROTTLING)) + { + // Some architectures can experience frequency throttling when + // executing 512-bit width instructions. To account for this we set the + // default preferred vector width to 256-bits in some scenarios. Power + // users can override this with `DOTNET_PreferredVectorBitWidth=512` to + // allow using such instructions where hardware support is available. + // + // Do not condition this based on stress mode as it makes the support + // reported inconsistent across methods and breaks expectations/functionality + + preferredVectorByteLength = 256 / 8; + } + } + else + { + // We shouldn't have EVEX enabled if neither AVX512 nor AVX10v1 are supported + assert(instructionSetFlags.HasInstructionSet(InstructionSet_AVX10v1)); } } diff --git a/src/coreclr/jit/hwintrinsiclistxarch.h b/src/coreclr/jit/hwintrinsiclistxarch.h index f69b4b93d8758b..73be7d1d1666a8 100644 --- a/src/coreclr/jit/hwintrinsiclistxarch.h +++ b/src/coreclr/jit/hwintrinsiclistxarch.h @@ -1233,18 +1233,8 @@ HARDWARE_INTRINSIC(AVX10v1, GetMantissaScalar, HARDWARE_INTRINSIC(AVX10v1, LeadingZeroCount, -1, 1, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vplzcntd, INS_vplzcntd, INS_vplzcntq, INS_vplzcntq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX10v1, Max, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmaxsq, INS_vpmaxuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_Commutative) HARDWARE_INTRINSIC(AVX10v1, Min, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpminsq, INS_vpminuq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_Commutative) -HARDWARE_INTRINSIC(AVX10v1, MultiplyAdd, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ps, INS_vfmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, MultiplyAddNegated, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ps, INS_vfnmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, MultiplyAddNegatedScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmadd213ss, INS_vfnmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, MultiplyAddScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ss, INS_vfmadd213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, MultiplyAddSubtract, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmaddsub213ps, INS_vfmaddsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX10v1, MultiplyLow, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpmullq, INS_vpmullq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX10v1, MultiplyScalar, 16, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_mulsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible) -HARDWARE_INTRINSIC(AVX10v1, MultiplySubtract, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ps, INS_vfmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, MultiplySubtractAdd, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsubadd213ps, INS_vfmsubadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, MultiplySubtractNegated, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ps, INS_vfnmsub213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) -HARDWARE_INTRINSIC(AVX10v1, MultiplySubtractNegatedScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfnmsub213ss, INS_vfnmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) -HARDWARE_INTRINSIC(AVX10v1, MultiplySubtractScalar, 16, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmsub213ss, INS_vfmsub213sd}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_CopyUpperBits) HARDWARE_INTRINSIC(AVX10v1, MultiShift, -1, 2, false, {INS_vpmultishiftqb, INS_vpmultishiftqb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX10v1, PermuteVar16x16, 32, 2, false, {INS_invalid, INS_invalid, INS_vpermw, INS_vpermw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbMaskingCompatible) HARDWARE_INTRINSIC(AVX10v1, PermuteVar16x16x2, 32, 3, false, {INS_invalid, INS_invalid, INS_vpermt2w, INS_vpermt2w, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_PermuteVar2x|HW_Flag_RmwIntrinsic|HW_Flag_EmbMaskingCompatible) @@ -1463,37 +1453,37 @@ HARDWARE_INTRINSIC(SSE2, UCOMISD, HARDWARE_INTRINSIC(SSE41, PTEST, 16, 2, false, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoRMWSemantics|HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(AVX, PTEST, 0, 2, false, {INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_ptest, INS_vtestps, INS_vtestpd}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics) HARDWARE_INTRINSIC(EVEX, KORTEST, 0, -1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(EVEX, KTEST, 0, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(EVEX, PTESTM, 0, 2, false, {INS_vptestmb, INS_vptestmb, INS_vptestmw, INS_vptestmw, INS_vptestmd, INS_vptestmd, INS_vptestmq, INS_vptestmq, INS_vptestmd, INS_vptestmq}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, PTESTNM, 0, 2, false, {INS_vptestnmb, INS_vptestnmb, INS_vptestnmw, INS_vptestnmw, INS_vptestnmd, INS_vptestnmd, INS_vptestnmq, INS_vptestnmq, INS_vptestnmd, INS_vptestnmq}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(EVEX, KTEST, 0, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(EVEX, PTESTM, 0, 2, false, {INS_vptestmb, INS_vptestmb, INS_vptestmw, INS_vptestmw, INS_vptestmd, INS_vptestmd, INS_vptestmq, INS_vptestmq, INS_vptestmd, INS_vptestmq}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(EVEX, PTESTNM, 0, 2, false, {INS_vptestnmb, INS_vptestnmb, INS_vptestnmw, INS_vptestnmw, INS_vptestnmd, INS_vptestnmd, INS_vptestnmq, INS_vptestnmq, INS_vptestnmd, INS_vptestnmq}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, AddMask, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, AndMask, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, AndNotMask, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, BlendVariableMask, -1, 3, true, {INS_vpblendmb, INS_vpblendmb, INS_vpblendmw, INS_vpblendmw, INS_vpblendmd, INS_vpblendmd, INS_vpblendmq, INS_vpblendmq, INS_vblendmps, INS_vblendmpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, CompareMask, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_IMM, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, CompareEqualMask, -1, 2, true, {INS_vpcmpeqb, INS_vpcmpeqb, INS_vpcmpeqw, INS_vpcmpeqw, INS_vpcmpeqd, INS_vpcmpeqd, INS_vpcmpeqq, INS_vpcmpeqq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(EVEX, CompareGreaterThanMask, -1, 2, true, {INS_vpcmpgtb, INS_vpcmpub, INS_vpcmpgtw, INS_vpcmpuw, INS_vpcmpgtd, INS_vpcmpud, INS_vpcmpgtq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, CompareGreaterThanOrEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, CompareLessThanMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, CompareLessThanOrEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, CompareNotEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) -HARDWARE_INTRINSIC(EVEX, CompareNotGreaterThanMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, CompareNotGreaterThanOrEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, CompareNotLessThanMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, CompareNotLessThanOrEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, CompareOrderedMask, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, CompareUnorderedMask, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) -HARDWARE_INTRINSIC(EVEX, ConvertMaskToVector, -1, 1, true, {INS_vpmovm2b, INS_vpmovm2b, INS_vpmovm2w, INS_vpmovm2w, INS_vpmovm2d, INS_vpmovm2d, INS_vpmovm2q, INS_vpmovm2q, INS_vpmovm2d, INS_vpmovm2q}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, ConvertVectorToMask, -1, 1, true, {INS_vpmovb2m, INS_vpmovb2m, INS_vpmovw2m, INS_vpmovw2m, INS_vpmovd2m, INS_vpmovd2m, INS_vpmovq2m, INS_vpmovq2m, INS_vpmovd2m, INS_vpmovq2m}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, MoveMask, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) -HARDWARE_INTRINSIC(EVEX, NotMask, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, op_EqualityMask, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative) -HARDWARE_INTRINSIC(EVEX, op_InequalityMask, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative) -HARDWARE_INTRINSIC(EVEX, OrMask, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) -HARDWARE_INTRINSIC(EVEX, ShiftLeftMask, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(EVEX, ShiftRightMask, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen) -HARDWARE_INTRINSIC(EVEX, XorMask, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(EVEX, AddMask, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(EVEX, AndMask, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(EVEX, AndNotMask, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(EVEX, BlendVariableMask, -1, 3, true, {INS_vpblendmb, INS_vpblendmb, INS_vpblendmw, INS_vpblendmw, INS_vpblendmd, INS_vpblendmd, INS_vpblendmq, INS_vpblendmq, INS_vblendmps, INS_vblendmpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(EVEX, CompareMask, -1, 3, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_IMM, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(EVEX, CompareEqualMask, -1, 2, true, {INS_vpcmpeqb, INS_vpcmpeqb, INS_vpcmpeqw, INS_vpcmpeqw, INS_vpcmpeqd, INS_vpcmpeqd, INS_vpcmpeqq, INS_vpcmpeqq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(EVEX, CompareGreaterThanMask, -1, 2, true, {INS_vpcmpgtb, INS_vpcmpub, INS_vpcmpgtw, INS_vpcmpuw, INS_vpcmpgtd, INS_vpcmpud, INS_vpcmpgtq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(EVEX, CompareGreaterThanOrEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(EVEX, CompareLessThanMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(EVEX, CompareLessThanOrEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(EVEX, CompareNotEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible|HW_Flag_Commutative|HW_Flag_CanBenefitFromConstantProp) +HARDWARE_INTRINSIC(EVEX, CompareNotGreaterThanMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(EVEX, CompareNotGreaterThanOrEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(EVEX, CompareNotLessThanMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(EVEX, CompareNotLessThanOrEqualMask, -1, 2, true, {INS_vpcmpb, INS_vpcmpub, INS_vpcmpw, INS_vpcmpuw, INS_vpcmpd, INS_vpcmpud, INS_vpcmpq, INS_vpcmpuq, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(EVEX, CompareOrderedMask, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(EVEX, CompareUnorderedMask, -1, 2, true, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vcmpps, INS_vcmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_EmbBroadcastCompatible) +HARDWARE_INTRINSIC(EVEX, ConvertMaskToVector, -1, 1, true, {INS_vpmovm2b, INS_vpmovm2b, INS_vpmovm2w, INS_vpmovm2w, INS_vpmovm2d, INS_vpmovm2d, INS_vpmovm2q, INS_vpmovm2q, INS_vpmovm2d, INS_vpmovm2q}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(EVEX, ConvertVectorToMask, -1, 1, true, {INS_vpmovb2m, INS_vpmovb2m, INS_vpmovw2m, INS_vpmovw2m, INS_vpmovd2m, INS_vpmovd2m, INS_vpmovq2m, INS_vpmovq2m, INS_vpmovd2m, INS_vpmovq2m}, HW_Category_SimpleSIMD, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(EVEX, MoveMask, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment) +HARDWARE_INTRINSIC(EVEX, NotMask, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(EVEX, op_EqualityMask, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative) +HARDWARE_INTRINSIC(EVEX, op_InequalityMask, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative) +HARDWARE_INTRINSIC(EVEX, OrMask, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) +HARDWARE_INTRINSIC(EVEX, ShiftLeftMask, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(EVEX, ShiftRightMask, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen) +HARDWARE_INTRINSIC(EVEX, XorMask, -1, 2, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoContainment|HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask) #endif // FEATURE_HW_INTRINSIC diff --git a/src/coreclr/jit/hwintrinsicxarch.cpp b/src/coreclr/jit/hwintrinsicxarch.cpp index 61a7890e5051f4..1dfda9f2b1daf4 100644 --- a/src/coreclr/jit/hwintrinsicxarch.cpp +++ b/src/coreclr/jit/hwintrinsicxarch.cpp @@ -970,8 +970,8 @@ GenTree* Compiler::impNonConstFallback(NamedIntrinsic intrinsic, var_types simdT static_assert_no_msg(NI_AVX512F_RotateLeftVariable == (NI_AVX512F_RotateLeft + 1)); static_assert_no_msg(NI_AVX512F_RotateRightVariable == (NI_AVX512F_RotateRight + 1)); static_assert_no_msg(NI_AVX512F_VL_RotateLeftVariable == (NI_AVX512F_VL_RotateLeft + 1)); - static_assert_no_msg(NI_AVX10v1_RotateLeftVariable == (NI_AVX10v1_RotateLeft + 1)); static_assert_no_msg(NI_AVX512F_VL_RotateRightVariable == (NI_AVX512F_VL_RotateRight + 1)); + static_assert_no_msg(NI_AVX10v1_RotateLeftVariable == (NI_AVX10v1_RotateLeft + 1)); static_assert_no_msg(NI_AVX10v1_RotateRightVariable == (NI_AVX10v1_RotateRight + 1)); impSpillSideEffect(true, diff --git a/src/coreclr/jit/importercalls.cpp b/src/coreclr/jit/importercalls.cpp index f52fe739f11c00..4160f53a3a1410 100644 --- a/src/coreclr/jit/importercalls.cpp +++ b/src/coreclr/jit/importercalls.cpp @@ -4012,11 +4012,8 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis, op2 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op2, callJitType, 16); op1 = gtNewSimdCreateScalarUnsafeNode(TYP_SIMD16, op1, callJitType, 16); - retNode = compOpportunisticallyDependsOn(InstructionSet_AVX10v1) - ? gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, NI_AVX10v1_MultiplyAddScalar, - callJitType, 16) - : gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, NI_FMA_MultiplyAddScalar, - callJitType, 16); + retNode = + gtNewSimdHWIntrinsicNode(TYP_SIMD16, op1, op2, op3, NI_FMA_MultiplyAddScalar, callJitType, 16); retNode = gtNewSimdToScalarNode(callType, retNode, callJitType, 16); break; @@ -9298,8 +9295,9 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method, #if defined(FEATURE_HW_INTRINSICS) && defined(TARGET_XARCH) if (!isMagnitude && compOpportunisticallyDependsOn(InstructionSet_SSE2)) { - bool needsFixup = false; - bool canHandle = false; + bool needsFixup = false; + bool canHandle = false; + bool isV512Supported = false; if (isMax) { @@ -9328,7 +9326,7 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method, needsFixup = cnsNode->IsFloatPositiveZero(); } - if (!needsFixup || compOpportunisticallyDependsOn(InstructionSet_AVX512F)) + if (!needsFixup || compIsEvexOpportunisticallySupported(isV512Supported)) { // Given the checks, op1 can safely be the cns and op2 the other node @@ -9369,7 +9367,7 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method, needsFixup = cnsNode->IsFloatNegativeZero(); } - if (!needsFixup || compOpportunisticallyDependsOn(InstructionSet_AVX512F)) + if (!needsFixup || compIsEvexOpportunisticallySupported(isV512Supported)) { // Given the checks, op1 can safely be the cns and op2 the other node @@ -9453,8 +9451,10 @@ GenTree* Compiler::impMinMaxIntrinsic(CORINFO_METHOD_HANDLE method, tbl->gtSimdVal.i32[0] = 0x0700; } + NamedIntrinsic fixupScalarId = isV512Supported ? NI_AVX512F_FixupScalar : NI_AVX10v1_FixupScalar; + retNode = gtNewSimdHWIntrinsicNode(TYP_SIMD16, retNode, op2Clone, tbl, gtNewIconNode(0), - NI_AVX512F_FixupScalar, callJitType, 16); + fixupScalarId, callJitType, 16); } if (isNumber) diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 2675303454bebe..409b662825de74 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -1303,7 +1303,7 @@ void Lowering::LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIn } //---------------------------------------------------------------------------------------------- -// LowerFusedMultiplyAdd: Changes NI_FMA_MultiplyAddScalar / NI_AVX10v1_MultiplyAddScalar produced +// LowerFusedMultiplyAdd: Changes NI_FMA_MultiplyAddScalar produced // by Math(F).FusedMultiplyAdd to a better FMA intrinsics if there are GT_NEG around in order // to eliminate them. // @@ -1311,22 +1311,21 @@ void Lowering::LowerHWIntrinsicCC(GenTreeHWIntrinsic* node, NamedIntrinsic newIn // node - The hardware intrinsic node // // Notes: -// Math(F).FusedMultiplyAdd is expanded into NI_FMA_MultiplyAddScalar / NI_AVX10v1_MultiplyAddScalar and +// Math(F).FusedMultiplyAdd is expanded into NI_FMA_MultiplyAddScalar and // depending on additional GT_NEG nodes around it can be: // -// x * y + z -> NI_FMA_MultiplyAddScalar / NI_AVX10v1_MultiplyAddScalar -// x * -y + z -> NI_FMA_MultiplyAddNegatedScalar / NI_AVX10v1_MultiplyAddNegatedScalar -// -x * y + z -> NI_FMA_MultiplyAddNegatedScalar / NI_AVX10v1_MultiplyAddNegatedScalar -// -x * -y + z -> NI_FMA_MultiplyAddScalar / NI_AVX10v1_MultiplyAddScalar -// x * y - z -> NI_FMA_MultiplySubtractScalar / NI_AVX10v1_MultiplySubtractScalar -// x * -y - z -> NI_FMA_MultiplySubtractNegatedScalar / NI_AVX10v1_MultiplySubtractNegatedScalar -// -x * y - z -> NI_FMA_MultiplySubtractNegatedScalar / NI_AVX10v1_MultiplySubtractNegatedScalar -// -x * -y - z -> NI_FMA_MultiplySubtractScalar / NI_AVX10v1_MultiplySubtractScalar +// x * y + z -> NI_FMA_MultiplyAddScalar +// x * -y + z -> NI_FMA_MultiplyAddNegatedScalar +// -x * y + z -> NI_FMA_MultiplyAddNegatedScalar +// -x * -y + z -> NI_FMA_MultiplyAddScalar +// x * y - z -> NI_FMA_MultiplySubtractScalar +// x * -y - z -> NI_FMA_MultiplySubtractNegatedScalar +// -x * y - z -> NI_FMA_MultiplySubtractNegatedScalar +// -x * -y - z -> NI_FMA_MultiplySubtractScalar // void Lowering::LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node) { - assert((node->GetHWIntrinsicId() == NI_FMA_MultiplyAddScalar) || - (node->GetHWIntrinsicId() == NI_AVX10v1_MultiplyAddScalar)); + assert(node->GetHWIntrinsicId() == NI_FMA_MultiplyAddScalar); GenTreeHWIntrinsic* createScalarOps[3]; for (size_t i = 1; i <= 3; i++) @@ -1370,26 +1369,11 @@ void Lowering::LowerFusedMultiplyAdd(GenTreeHWIntrinsic* node) createScalarOps[2]->Op(1)->ClearContained(); ContainCheckHWIntrinsic(createScalarOps[2]); - if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - node->ChangeHWIntrinsicId(negMul ? NI_AVX10v1_MultiplySubtractNegatedScalar - : NI_AVX10v1_MultiplySubtractScalar); - } - else - { - node->ChangeHWIntrinsicId(negMul ? NI_FMA_MultiplySubtractNegatedScalar : NI_FMA_MultiplySubtractScalar); - } + node->ChangeHWIntrinsicId(negMul ? NI_FMA_MultiplySubtractNegatedScalar : NI_FMA_MultiplySubtractScalar); } else { - if (comp->compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) - { - node->ChangeHWIntrinsicId(negMul ? NI_AVX10v1_MultiplyAddNegatedScalar : NI_AVX10v1_MultiplyAddScalar); - } - else - { - node->ChangeHWIntrinsicId(negMul ? NI_FMA_MultiplyAddNegatedScalar : NI_FMA_MultiplyAddScalar); - } + node->ChangeHWIntrinsicId(negMul ? NI_FMA_MultiplyAddNegatedScalar : NI_FMA_MultiplyAddScalar); } } @@ -2150,7 +2134,6 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node) break; case NI_FMA_MultiplyAddScalar: - case NI_AVX10v1_MultiplyAddScalar: LowerFusedMultiplyAdd(node); break; @@ -4907,7 +4890,7 @@ GenTree* Lowering::LowerHWIntrinsicGetElement(GenTreeHWIntrinsic* node) NamedIntrinsic extractIntrinsicId = NI_AVX512F_ExtractVector128; - if ((genTypeSize(simdBaseType) == 8) && !comp->compOpportunisticallyDependsOn(InstructionSet_AVX512DQ)) + if ((genTypeSize(simdBaseType) == 8) && comp->compOpportunisticallyDependsOn(InstructionSet_AVX512DQ)) { extractIntrinsicId = NI_AVX512DQ_ExtractVector128; } @@ -5191,7 +5174,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) NamedIntrinsic extractIntrinsicId = NI_AVX512F_ExtractVector128; - if ((genTypeSize(simdBaseType) == 8) && !comp->compOpportunisticallyDependsOn(InstructionSet_AVX512DQ)) + if ((genTypeSize(simdBaseType) == 8) && comp->compOpportunisticallyDependsOn(InstructionSet_AVX512DQ)) { extractIntrinsicId = NI_AVX512DQ_ExtractVector128; } @@ -5211,7 +5194,7 @@ GenTree* Lowering::LowerHWIntrinsicWithElement(GenTreeHWIntrinsic* node) NamedIntrinsic insertIntrinsicId = NI_AVX512F_InsertVector128; - if ((genTypeSize(simdBaseType) == 8) && !comp->compOpportunisticallyDependsOn(InstructionSet_AVX512DQ)) + if ((genTypeSize(simdBaseType) == 8) && comp->compOpportunisticallyDependsOn(InstructionSet_AVX512DQ)) { insertIntrinsicId = NI_AVX512DQ_InsertVector128; } @@ -8708,9 +8691,9 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre case NI_AVX10v1_RotateRight: case NI_AVX10v1_RoundScale: case NI_AVX10v1_ShiftRightArithmetic: + case NI_AVX10v1_Shuffle2x128: case NI_AVX10v1_SumAbsoluteDifferencesInBlock32: case NI_AVX10v1_TernaryLogic: - case NI_AVX10v1_Shuffle2x128: case NI_AVX10v1_V512_Range: case NI_AVX10v1_V512_Reduce: { @@ -8847,6 +8830,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre case NI_AVX512F_RoundScaleScalar: case NI_AVX512DQ_RangeScalar: case NI_AVX512DQ_ReduceScalar: + case NI_AVX10v1_FixupScalar: case NI_AVX10v1_GetMantissaScalar: case NI_AVX10v1_RangeScalar: case NI_AVX10v1_ReduceScalar: @@ -8937,6 +8921,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* parentNode, GenTre case NI_AVX512F_X64_ConvertScalarToVector128Single: case NI_AVX10v1_X64_ConvertScalarToVector128Double: case NI_AVX10v1_X64_ConvertScalarToVector128Single: + case NI_AVX10v1_ConvertScalarToVector128Double: case NI_AVX10v1_ConvertScalarToVector128Single: { if (!varTypeIsIntegral(childNode->TypeGet())) diff --git a/src/coreclr/jit/lsraxarch.cpp b/src/coreclr/jit/lsraxarch.cpp index a834eb7ad9acea..b037aa1507d659 100644 --- a/src/coreclr/jit/lsraxarch.cpp +++ b/src/coreclr/jit/lsraxarch.cpp @@ -2455,16 +2455,6 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou case NI_AVX10v1_FusedMultiplyAddScalar: case NI_AVX10v1_FusedMultiplySubtractNegatedScalar: case NI_AVX10v1_FusedMultiplySubtractScalar: - case NI_AVX10v1_MultiplyAdd: - case NI_AVX10v1_MultiplyAddNegated: - case NI_AVX10v1_MultiplyAddNegatedScalar: - case NI_AVX10v1_MultiplyAddScalar: - case NI_AVX10v1_MultiplyAddSubtract: - case NI_AVX10v1_MultiplySubtract: - case NI_AVX10v1_MultiplySubtractAdd: - case NI_AVX10v1_MultiplySubtractNegated: - case NI_AVX10v1_MultiplySubtractNegatedScalar: - case NI_AVX10v1_MultiplySubtractScalar: { assert((numArgs == 3) || (intrinsicTree->OperIsEmbRoundingEnabled())); assert(isRMW); diff --git a/src/coreclr/jit/simdashwintrinsic.cpp b/src/coreclr/jit/simdashwintrinsic.cpp index 2bf103c2ba2f79..44a92f1479a4a3 100644 --- a/src/coreclr/jit/simdashwintrinsic.cpp +++ b/src/coreclr/jit/simdashwintrinsic.cpp @@ -1084,7 +1084,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic, assert(sig->numArgs == 1); assert(varTypeIsLong(simdBaseType)); NamedIntrinsic intrinsic = NI_Illegal; - if (compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) + if ((simdSize != 64) && compOpportunisticallyDependsOn(InstructionSet_AVX10v1)) { if (simdSize == 32) { diff --git a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs index c2b304996debc5..a5f498ebc57a2c 100644 --- a/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs +++ b/src/coreclr/tools/Common/Compiler/HardwareIntrinsicHelpers.cs @@ -70,20 +70,12 @@ private static class XArchIntrinsicConstants public const int Lzcnt = 0x1000; public const int AvxVnni = 0x2000; public const int Movbe = 0x4000; - public const int Avx512f = 0x8000; - public const int Avx512f_vl = 0x10000; - public const int Avx512bw = 0x20000; - public const int Avx512bw_vl = 0x40000; - public const int Avx512cd = 0x80000; - public const int Avx512cd_vl = 0x100000; - public const int Avx512dq = 0x200000; - public const int Avx512dq_vl = 0x400000; - public const int Avx512Vbmi = 0x800000; - public const int Avx512Vbmi_vl = 0x1000000; - public const int Serialize = 0x2000000; - public const int Avx10v1 = 0x4000000; - public const int Avx10v1_v512 = 0x8000000; - public const int Evex = 0x10000000; + public const int Avx512 = 0x8000; + public const int Avx512Vbmi = 0x10000; + public const int Serialize = 0x20000; + public const int Avx10v1 = 0x40000; + public const int Avx10v1_v512 = 0x80000; + public const int Evex = 0x100000; public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) { @@ -117,26 +109,22 @@ public static void AddToBuilder(InstructionSetSupportBuilder builder, int flags) builder.AddSupportedInstructionSet("avxvnni"); if ((flags & Movbe) != 0) builder.AddSupportedInstructionSet("movbe"); - if ((flags & Avx512f) != 0) + if ((flags & Avx512) != 0) + { builder.AddSupportedInstructionSet("avx512f"); - if ((flags & Avx512f_vl) != 0) builder.AddSupportedInstructionSet("avx512f_vl"); - if ((flags & Avx512bw) != 0) builder.AddSupportedInstructionSet("avx512bw"); - if ((flags & Avx512bw_vl) != 0) builder.AddSupportedInstructionSet("avx512bw_vl"); - if ((flags & Avx512cd) != 0) builder.AddSupportedInstructionSet("avx512cd"); - if ((flags & Avx512cd_vl) != 0) builder.AddSupportedInstructionSet("avx512cd_vl"); - if ((flags & Avx512dq) != 0) builder.AddSupportedInstructionSet("avx512dq"); - if ((flags & Avx512dq_vl) != 0) builder.AddSupportedInstructionSet("avx512dq_vl"); + } if ((flags & Avx512Vbmi) != 0) + { builder.AddSupportedInstructionSet("avx512vbmi"); - if ((flags & Avx512Vbmi_vl) != 0) builder.AddSupportedInstructionSet("avx512vbmi_vl"); + } if ((flags & Serialize) != 0) builder.AddSupportedInstructionSet("serialize"); if ((flags & Avx10v1) != 0) @@ -186,26 +174,26 @@ public static int FromInstructionSet(InstructionSet instructionSet) InstructionSet.X64_AVXVNNI_X64 => AvxVnni, InstructionSet.X64_MOVBE => Movbe, InstructionSet.X64_MOVBE_X64 => Movbe, - InstructionSet.X64_AVX512F => Avx512f, - InstructionSet.X64_AVX512F_X64 => Avx512f, - InstructionSet.X64_AVX512F_VL => Avx512f_vl, - InstructionSet.X64_AVX512F_VL_X64 => Avx512f_vl, - InstructionSet.X64_AVX512BW => Avx512bw, - InstructionSet.X64_AVX512BW_X64 => Avx512bw, - InstructionSet.X64_AVX512BW_VL => Avx512bw_vl, - InstructionSet.X64_AVX512BW_VL_X64 => Avx512bw_vl, - InstructionSet.X64_AVX512CD => Avx512cd, - InstructionSet.X64_AVX512CD_X64 => Avx512cd, - InstructionSet.X64_AVX512CD_VL => Avx512cd_vl, - InstructionSet.X64_AVX512CD_VL_X64 => Avx512cd_vl, - InstructionSet.X64_AVX512DQ => Avx512dq, - InstructionSet.X64_AVX512DQ_X64 => Avx512dq, - InstructionSet.X64_AVX512DQ_VL => Avx512dq_vl, - InstructionSet.X64_AVX512DQ_VL_X64 => Avx512dq_vl, + InstructionSet.X64_AVX512F => Avx512, + InstructionSet.X64_AVX512F_X64 => Avx512, + InstructionSet.X64_AVX512F_VL => Avx512, + InstructionSet.X64_AVX512F_VL_X64 => Avx512, + InstructionSet.X64_AVX512BW => Avx512, + InstructionSet.X64_AVX512BW_X64 => Avx512, + InstructionSet.X64_AVX512BW_VL => Avx512, + InstructionSet.X64_AVX512BW_VL_X64 => Avx512, + InstructionSet.X64_AVX512CD => Avx512, + InstructionSet.X64_AVX512CD_X64 => Avx512, + InstructionSet.X64_AVX512CD_VL => Avx512, + InstructionSet.X64_AVX512CD_VL_X64 => Avx512, + InstructionSet.X64_AVX512DQ => Avx512, + InstructionSet.X64_AVX512DQ_X64 => Avx512, + InstructionSet.X64_AVX512DQ_VL => Avx512, + InstructionSet.X64_AVX512DQ_VL_X64 => Avx512, InstructionSet.X64_AVX512VBMI => Avx512Vbmi, InstructionSet.X64_AVX512VBMI_X64 => Avx512Vbmi, - InstructionSet.X64_AVX512VBMI_VL => Avx512Vbmi_vl, - InstructionSet.X64_AVX512VBMI_VL_X64 => Avx512Vbmi_vl, + InstructionSet.X64_AVX512VBMI_VL => Avx512Vbmi, + InstructionSet.X64_AVX512VBMI_VL_X64 => Avx512Vbmi, InstructionSet.X64_X86Serialize => Serialize, InstructionSet.X64_X86Serialize_X64 => Serialize, InstructionSet.X64_AVX10v1 => Avx10v1, @@ -227,7 +215,7 @@ public static int FromInstructionSet(InstructionSet instructionSet) // Vector Sizes InstructionSet.X64_VectorT128 => 0, InstructionSet.X64_VectorT256 => Avx2, - InstructionSet.X64_VectorT512 => Avx512f, + InstructionSet.X64_VectorT512 => Avx512, _ => throw new NotSupportedException(((InstructionSet_X64)instructionSet).ToString()) }; diff --git a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs index facc1d70acc6f9..9971accb07818d 100644 --- a/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs +++ b/src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs @@ -722,12 +722,12 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_SSSE3); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE42)) resultflags.AddInstructionSet(InstructionSet.X64_SSE41); + if (resultflags.HasInstructionSet(InstructionSet.X64_POPCNT)) + resultflags.AddInstructionSet(InstructionSet.X64_SSE42); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX)) resultflags.AddInstructionSet(InstructionSet.X64_SSE42); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) resultflags.AddInstructionSet(InstructionSet.X64_AVX); - if (resultflags.HasInstructionSet(InstructionSet.X64_AES)) - resultflags.AddInstructionSet(InstructionSet.X64_SSE2); if (resultflags.HasInstructionSet(InstructionSet.X64_BMI1)) resultflags.AddInstructionSet(InstructionSet.X64_AVX); if (resultflags.HasInstructionSet(InstructionSet.X64_BMI2)) @@ -736,22 +736,8 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_AVX); if (resultflags.HasInstructionSet(InstructionSet.X64_LZCNT)) resultflags.AddInstructionSet(InstructionSet.X64_X86Base); - if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ)) - resultflags.AddInstructionSet(InstructionSet.X64_SSE2); - if (resultflags.HasInstructionSet(InstructionSet.X64_POPCNT)) - resultflags.AddInstructionSet(InstructionSet.X64_SSE42); - if (resultflags.HasInstructionSet(InstructionSet.X64_Vector128)) - resultflags.AddInstructionSet(InstructionSet.X64_SSE); - if (resultflags.HasInstructionSet(InstructionSet.X64_Vector256)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX); - if (resultflags.HasInstructionSet(InstructionSet.X64_Vector512)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X64_MOVBE)) resultflags.AddInstructionSet(InstructionSet.X64_SSE42); - if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize)) - resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_EVEX)) resultflags.AddInstructionSet(InstructionSet.X64_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X64_EVEX)) @@ -784,10 +770,14 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI_VL)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL); - if (resultflags.HasInstructionSet(InstructionSet.X64_EVEX)) + if (resultflags.HasInstructionSet(InstructionSet.X64_AES)) + resultflags.AddInstructionSet(InstructionSet.X64_SSE2); + if (resultflags.HasInstructionSet(InstructionSet.X64_PCLMULQDQ)) + resultflags.AddInstructionSet(InstructionSet.X64_SSE2); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI)) resultflags.AddInstructionSet(InstructionSet.X64_AVX2); - if (resultflags.HasInstructionSet(InstructionSet.X64_EVEX)) - resultflags.AddInstructionSet(InstructionSet.X64_FMA); + if (resultflags.HasInstructionSet(InstructionSet.X64_X86Serialize)) + resultflags.AddInstructionSet(InstructionSet.X64_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1)) resultflags.AddInstructionSet(InstructionSet.X64_EVEX); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512)) @@ -812,18 +802,18 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1_V512)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL); + if (resultflags.HasInstructionSet(InstructionSet.X64_Vector128)) + resultflags.AddInstructionSet(InstructionSet.X64_SSE); + if (resultflags.HasInstructionSet(InstructionSet.X64_Vector256)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX); + if (resultflags.HasInstructionSet(InstructionSet.X64_Vector512)) + resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT128)) resultflags.AddInstructionSet(InstructionSet.X64_SSE2); if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT256)) resultflags.AddInstructionSet(InstructionSet.X64_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X64_VectorT512)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512BW_VL); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512CD_VL); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512DQ_VL); break; case TargetArchitecture.X86: @@ -839,12 +829,12 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_SSSE3); if (resultflags.HasInstructionSet(InstructionSet.X86_SSE42)) resultflags.AddInstructionSet(InstructionSet.X86_SSE41); + if (resultflags.HasInstructionSet(InstructionSet.X86_POPCNT)) + resultflags.AddInstructionSet(InstructionSet.X86_SSE42); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX)) resultflags.AddInstructionSet(InstructionSet.X86_SSE42); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) resultflags.AddInstructionSet(InstructionSet.X86_AVX); - if (resultflags.HasInstructionSet(InstructionSet.X86_AES)) - resultflags.AddInstructionSet(InstructionSet.X86_SSE2); if (resultflags.HasInstructionSet(InstructionSet.X86_BMI1)) resultflags.AddInstructionSet(InstructionSet.X86_AVX); if (resultflags.HasInstructionSet(InstructionSet.X86_BMI2)) @@ -853,22 +843,8 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_AVX); if (resultflags.HasInstructionSet(InstructionSet.X86_LZCNT)) resultflags.AddInstructionSet(InstructionSet.X86_X86Base); - if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ)) - resultflags.AddInstructionSet(InstructionSet.X86_SSE2); - if (resultflags.HasInstructionSet(InstructionSet.X86_POPCNT)) - resultflags.AddInstructionSet(InstructionSet.X86_SSE42); - if (resultflags.HasInstructionSet(InstructionSet.X86_Vector128)) - resultflags.AddInstructionSet(InstructionSet.X86_SSE); - if (resultflags.HasInstructionSet(InstructionSet.X86_Vector256)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX); - if (resultflags.HasInstructionSet(InstructionSet.X86_Vector512)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNI)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X86_MOVBE)) resultflags.AddInstructionSet(InstructionSet.X86_SSE42); - if (resultflags.HasInstructionSet(InstructionSet.X86_X86Serialize)) - resultflags.AddInstructionSet(InstructionSet.X86_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X86_EVEX)) resultflags.AddInstructionSet(InstructionSet.X86_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X86_EVEX)) @@ -901,10 +877,14 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI_VL)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL); - if (resultflags.HasInstructionSet(InstructionSet.X86_EVEX)) + if (resultflags.HasInstructionSet(InstructionSet.X86_AES)) + resultflags.AddInstructionSet(InstructionSet.X86_SSE2); + if (resultflags.HasInstructionSet(InstructionSet.X86_PCLMULQDQ)) + resultflags.AddInstructionSet(InstructionSet.X86_SSE2); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVXVNNI)) resultflags.AddInstructionSet(InstructionSet.X86_AVX2); - if (resultflags.HasInstructionSet(InstructionSet.X86_EVEX)) - resultflags.AddInstructionSet(InstructionSet.X86_FMA); + if (resultflags.HasInstructionSet(InstructionSet.X86_X86Serialize)) + resultflags.AddInstructionSet(InstructionSet.X86_X86Base); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1)) resultflags.AddInstructionSet(InstructionSet.X86_EVEX); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512)) @@ -929,18 +909,18 @@ public static InstructionSetFlags ExpandInstructionSetByImplicationHelper(Target resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1_V512)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI_VL); + if (resultflags.HasInstructionSet(InstructionSet.X86_Vector128)) + resultflags.AddInstructionSet(InstructionSet.X86_SSE); + if (resultflags.HasInstructionSet(InstructionSet.X86_Vector256)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX); + if (resultflags.HasInstructionSet(InstructionSet.X86_Vector512)) + resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT128)) resultflags.AddInstructionSet(InstructionSet.X86_SSE2); if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT256)) resultflags.AddInstructionSet(InstructionSet.X86_AVX2); if (resultflags.HasInstructionSet(InstructionSet.X86_VectorT512)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512BW_VL); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512CD_VL); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512DQ_VL); break; } } while (!oldflags.Equals(resultflags)); @@ -1083,12 +1063,12 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_SSE41); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE41)) resultflags.AddInstructionSet(InstructionSet.X64_SSE42); + if (resultflags.HasInstructionSet(InstructionSet.X64_SSE42)) + resultflags.AddInstructionSet(InstructionSet.X64_POPCNT); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE42)) resultflags.AddInstructionSet(InstructionSet.X64_AVX); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX)) resultflags.AddInstructionSet(InstructionSet.X64_AVX2); - if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) - resultflags.AddInstructionSet(InstructionSet.X64_AES); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX)) resultflags.AddInstructionSet(InstructionSet.X64_BMI1); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX)) @@ -1097,22 +1077,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_FMA); if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) resultflags.AddInstructionSet(InstructionSet.X64_LZCNT); - if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) - resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ); - if (resultflags.HasInstructionSet(InstructionSet.X64_SSE42)) - resultflags.AddInstructionSet(InstructionSet.X64_POPCNT); - if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) - resultflags.AddInstructionSet(InstructionSet.X64_Vector128); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX)) - resultflags.AddInstructionSet(InstructionSet.X64_Vector256); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X64_Vector512); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) - resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE42)) resultflags.AddInstructionSet(InstructionSet.X64_MOVBE); - if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) - resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) resultflags.AddInstructionSet(InstructionSet.X64_EVEX); if (resultflags.HasInstructionSet(InstructionSet.X64_FMA)) @@ -1145,10 +1111,14 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL)) resultflags.AddInstructionSet(InstructionSet.X64_AVX512VBMI_VL); + if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) + resultflags.AddInstructionSet(InstructionSet.X64_AES); + if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) + resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) - resultflags.AddInstructionSet(InstructionSet.X64_EVEX); - if (resultflags.HasInstructionSet(InstructionSet.X64_FMA)) - resultflags.AddInstructionSet(InstructionSet.X64_EVEX); + resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI); + if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base)) + resultflags.AddInstructionSet(InstructionSet.X64_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet.X64_EVEX)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX10v1)) @@ -1173,18 +1143,18 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512VBMI_VL)) resultflags.AddInstructionSet(InstructionSet.X64_AVX10v1_V512); + if (resultflags.HasInstructionSet(InstructionSet.X64_SSE)) + resultflags.AddInstructionSet(InstructionSet.X64_Vector128); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX)) + resultflags.AddInstructionSet(InstructionSet.X64_Vector256); + if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X64_Vector512); if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2)) resultflags.AddInstructionSet(InstructionSet.X64_VectorT128); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX2)) resultflags.AddInstructionSet(InstructionSet.X64_VectorT256); if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X64_VectorT512); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512BW_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512CD_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X64_AVX512DQ_VL)) - resultflags.AddInstructionSet(InstructionSet.X64_AVX512F); break; case TargetArchitecture.X86: @@ -1200,12 +1170,12 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_SSE41); if (resultflags.HasInstructionSet(InstructionSet.X86_SSE41)) resultflags.AddInstructionSet(InstructionSet.X86_SSE42); + if (resultflags.HasInstructionSet(InstructionSet.X86_SSE42)) + resultflags.AddInstructionSet(InstructionSet.X86_POPCNT); if (resultflags.HasInstructionSet(InstructionSet.X86_SSE42)) resultflags.AddInstructionSet(InstructionSet.X86_AVX); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX)) resultflags.AddInstructionSet(InstructionSet.X86_AVX2); - if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2)) - resultflags.AddInstructionSet(InstructionSet.X86_AES); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX)) resultflags.AddInstructionSet(InstructionSet.X86_BMI1); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX)) @@ -1214,22 +1184,8 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_FMA); if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base)) resultflags.AddInstructionSet(InstructionSet.X86_LZCNT); - if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2)) - resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ); - if (resultflags.HasInstructionSet(InstructionSet.X86_SSE42)) - resultflags.AddInstructionSet(InstructionSet.X86_POPCNT); - if (resultflags.HasInstructionSet(InstructionSet.X86_SSE)) - resultflags.AddInstructionSet(InstructionSet.X86_Vector128); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX)) - resultflags.AddInstructionSet(InstructionSet.X86_Vector256); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) - resultflags.AddInstructionSet(InstructionSet.X86_Vector512); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) - resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNI); if (resultflags.HasInstructionSet(InstructionSet.X86_SSE42)) resultflags.AddInstructionSet(InstructionSet.X86_MOVBE); - if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base)) - resultflags.AddInstructionSet(InstructionSet.X86_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) resultflags.AddInstructionSet(InstructionSet.X86_EVEX); if (resultflags.HasInstructionSet(InstructionSet.X86_FMA)) @@ -1262,10 +1218,14 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI_VL); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL)) resultflags.AddInstructionSet(InstructionSet.X86_AVX512VBMI_VL); + if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2)) + resultflags.AddInstructionSet(InstructionSet.X86_AES); + if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2)) + resultflags.AddInstructionSet(InstructionSet.X86_PCLMULQDQ); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) - resultflags.AddInstructionSet(InstructionSet.X86_EVEX); - if (resultflags.HasInstructionSet(InstructionSet.X86_FMA)) - resultflags.AddInstructionSet(InstructionSet.X86_EVEX); + resultflags.AddInstructionSet(InstructionSet.X86_AVXVNNI); + if (resultflags.HasInstructionSet(InstructionSet.X86_X86Base)) + resultflags.AddInstructionSet(InstructionSet.X86_X86Serialize); if (resultflags.HasInstructionSet(InstructionSet.X86_EVEX)) resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX10v1)) @@ -1290,18 +1250,18 @@ private static InstructionSetFlags ExpandInstructionSetByReverseImplicationHelpe resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512VBMI_VL)) resultflags.AddInstructionSet(InstructionSet.X86_AVX10v1_V512); + if (resultflags.HasInstructionSet(InstructionSet.X86_SSE)) + resultflags.AddInstructionSet(InstructionSet.X86_Vector128); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX)) + resultflags.AddInstructionSet(InstructionSet.X86_Vector256); + if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) + resultflags.AddInstructionSet(InstructionSet.X86_Vector512); if (resultflags.HasInstructionSet(InstructionSet.X86_SSE2)) resultflags.AddInstructionSet(InstructionSet.X86_VectorT128); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX2)) resultflags.AddInstructionSet(InstructionSet.X86_VectorT256); if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512F)) resultflags.AddInstructionSet(InstructionSet.X86_VectorT512); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512BW_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512CD_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); - if (resultflags.HasInstructionSet(InstructionSet.X86_AVX512DQ_VL)) - resultflags.AddInstructionSet(InstructionSet.X86_AVX512F); break; } } while (!oldflags.Equals(resultflags)); diff --git a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt index c75cff32f35a2e..dbb8e6efd20adf 100644 --- a/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt +++ b/src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt @@ -101,45 +101,55 @@ vectorinstructionset,X86 ,Vector128 vectorinstructionset,X86 ,Vector256 vectorinstructionset,X86 ,Vector512 +; x86-64-v1 + implication ,X86 ,SSE ,X86Base implication ,X86 ,SSE2 ,SSE + +; x86-64-v2 + implication ,X86 ,SSE3 ,SSE2 implication ,X86 ,SSSE3 ,SSE3 implication ,X86 ,SSE41 ,SSSE3 implication ,X86 ,SSE42 ,SSE41 +implication ,X86 ,POPCNT ,SSE42 + +; x86-64-v3 + implication ,X86 ,AVX ,SSE42 implication ,X86 ,AVX2 ,AVX -implication ,X86 ,AES ,SSE2 implication ,X86 ,BMI1 ,AVX implication ,X86 ,BMI2 ,AVX implication ,X86 ,FMA ,AVX implication ,X86 ,LZCNT ,X86Base -implication ,X86 ,PCLMULQDQ ,SSE2 -implication ,X86 ,POPCNT ,SSE42 -implication ,X86 ,Vector128 ,SSE -implication ,X86 ,Vector256 ,AVX -implication ,X86 ,Vector512 ,AVX512F -implication ,X86 ,AVXVNNI ,AVX2 implication ,X86 ,MOVBE ,SSE42 -implication ,X86 ,X86Serialize ,X86Base + +; x86-64-v4 + implication ,X86 ,EVEX ,AVX2 implication ,X86 ,EVEX ,FMA implication ,X86 ,AVX512F ,EVEX implication ,X86 ,AVX512F_VL ,AVX512F -implication ,X86 ,AVX512CD ,AVX512F -implication ,X86 ,AVX512CD_VL ,AVX512CD -implication ,X86 ,AVX512CD_VL ,AVX512F_VL implication ,X86 ,AVX512BW ,AVX512F implication ,X86 ,AVX512BW_VL ,AVX512BW implication ,X86 ,AVX512BW_VL ,AVX512F_VL +implication ,X86 ,AVX512CD ,AVX512F +implication ,X86 ,AVX512CD_VL ,AVX512CD +implication ,X86 ,AVX512CD_VL ,AVX512F_VL implication ,X86 ,AVX512DQ ,AVX512F implication ,X86 ,AVX512DQ_VL ,AVX512DQ implication ,X86 ,AVX512DQ_VL ,AVX512F_VL implication ,X86 ,AVX512VBMI ,AVX512BW implication ,X86 ,AVX512VBMI_VL ,AVX512VBMI implication ,X86 ,AVX512VBMI_VL ,AVX512BW_VL -implication ,X86 ,EVEX ,AVX2 -implication ,X86 ,EVEX ,FMA + +; Unversioned + +implication ,X86 ,AES ,SSE2 +implication ,X86 ,PCLMULQDQ ,SSE2 +implication ,X86 ,AVXVNNI ,AVX2 +implication ,X86 ,X86Serialize ,X86Base + implication ,X86 ,AVX10v1 ,EVEX implication ,X86 ,AVX10v1_V512 ,AVX10v1 implication ,X86 ,AVX10v1_V512 ,AVX512F @@ -152,20 +162,19 @@ implication ,X86 ,AVX10v1_V512 ,AVX512DQ implication ,X86 ,AVX10v1_V512 ,AVX512DQ_VL implication ,X86 ,AVX10v1_V512 ,AVX512VBMI implication ,X86 ,AVX10v1_V512 ,AVX512VBMI_VL + +; These synthetic ISAs need to appear after the core ISAs +; as they depend on the other implications being correct first +; otherwise they may not be disabled if the required isa is disabled + +implication ,X86 ,Vector128 ,SSE +implication ,X86 ,Vector256 ,AVX +implication ,X86 ,Vector512 ,AVX512F + implication ,X86 ,VectorT128 ,SSE2 implication ,X86 ,VectorT256 ,AVX2 implication ,X86 ,VectorT512 ,AVX512F -; While the AVX-512 ISAs can be individually lit-up, they really -; need F, BW, CD, DQ, and VL to be fully functional without adding -; significant complexity into the JIT. Additionally, unlike AVX/AVX2 -; there was never really any hardware that didn't provide all 5 at -; once, with the notable exception being Knight's Landing which -; provided a similar but not quite the same feature. -implication ,X86 ,AVX512F ,AVX512BW_VL -implication ,X86 ,AVX512F ,AVX512CD_VL -implication ,X86 ,AVX512F ,AVX512DQ_VL - ; Definition of X64 instruction sets definearch ,X64 ,64Bit ,X64, X64 diff --git a/src/coreclr/vm/codeman.cpp b/src/coreclr/vm/codeman.cpp index 70d1836da124e1..7c5fe1a920a3e3 100644 --- a/src/coreclr/vm/codeman.cpp +++ b/src/coreclr/vm/codeman.cpp @@ -1270,12 +1270,14 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.Set(InstructionSet_VectorT256); } - if (((cpuFeatures & XArchIntrinsicConstants_Avx512f) != 0) && (maxVectorTBitWidth >= 512)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0) && (maxVectorTBitWidth >= 512)) { // We require 512-bit Vector to be opt-in CPUCompileFlags.Set(InstructionSet_VectorT512); } + // x86-64-v1 + if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableHWIntrinsic)) { CPUCompileFlags.Set(InstructionSet_X86Base); @@ -1291,74 +1293,47 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.Set(InstructionSet_SSE2); } - if (((cpuFeatures & XArchIntrinsicConstants_Aes) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAES)) - { - CPUCompileFlags.Set(InstructionSet_AES); - } - - if (((cpuFeatures & XArchIntrinsicConstants_Avx) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX)) - { - CPUCompileFlags.Set(InstructionSet_AVX); - } - - if (((cpuFeatures & XArchIntrinsicConstants_Avx2) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX2)) - { - CPUCompileFlags.Set(InstructionSet_AVX2); - } - - if (((cpuFeatures & XArchIntrinsicConstants_Avx512f) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F)) - { - CPUCompileFlags.Set(InstructionSet_AVX512F); - } - - if (((cpuFeatures & XArchIntrinsicConstants_Avx512f_vl) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F_VL)) - { - CPUCompileFlags.Set(InstructionSet_AVX512F_VL); - } - - if (((cpuFeatures & XArchIntrinsicConstants_Avx512bw) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW)) - { - CPUCompileFlags.Set(InstructionSet_AVX512BW); - } + // x86-64-v2 - if (((cpuFeatures & XArchIntrinsicConstants_Avx512bw_vl) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW_VL)) + if (((cpuFeatures & XArchIntrinsicConstants_Sse3) != 0) && + CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE3) && + CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE3_4)) { - CPUCompileFlags.Set(InstructionSet_AVX512BW_VL); + // We need to additionally check that EXTERNAL_EnableSSE3_4 is set, as that + // is a prexisting config flag that controls the SSE3+ ISAs + CPUCompileFlags.Set(InstructionSet_SSE3); } - if (((cpuFeatures & XArchIntrinsicConstants_Avx512cd) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512CD)) + if (((cpuFeatures & XArchIntrinsicConstants_Ssse3) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSSE3)) { - CPUCompileFlags.Set(InstructionSet_AVX512CD); + CPUCompileFlags.Set(InstructionSet_SSSE3); } - if (((cpuFeatures & XArchIntrinsicConstants_Avx512cd_vl) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512CD_VL)) + if (((cpuFeatures & XArchIntrinsicConstants_Sse41) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE41)) { - CPUCompileFlags.Set(InstructionSet_AVX512CD_VL); + CPUCompileFlags.Set(InstructionSet_SSE41); } - if (((cpuFeatures & XArchIntrinsicConstants_Avx512dq) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ)) + if (((cpuFeatures & XArchIntrinsicConstants_Sse42) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE42)) { - CPUCompileFlags.Set(InstructionSet_AVX512DQ); + CPUCompileFlags.Set(InstructionSet_SSE42); } - if (((cpuFeatures & XArchIntrinsicConstants_Avx512dq_vl) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ_VL)) + if (((cpuFeatures & XArchIntrinsicConstants_Popcnt) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnablePOPCNT)) { - CPUCompileFlags.Set(InstructionSet_AVX512DQ_VL); + CPUCompileFlags.Set(InstructionSet_POPCNT); } - if (((cpuFeatures & XArchIntrinsicConstants_Avx512Vbmi) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512VBMI)) - { - CPUCompileFlags.Set(InstructionSet_AVX512VBMI); - } + // x86-64-v3 - if (((cpuFeatures & XArchIntrinsicConstants_Avx512Vbmi_vl) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512VBMI_VL)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX)) { - CPUCompileFlags.Set(InstructionSet_AVX512VBMI_VL); + CPUCompileFlags.Set(InstructionSet_AVX); } - if (((cpuFeatures & XArchIntrinsicConstants_AvxVnni) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVXVNNI)) + if (((cpuFeatures & XArchIntrinsicConstants_Avx2) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX2)) { - CPUCompileFlags.Set(InstructionSet_AVXVNNI); + CPUCompileFlags.Set(InstructionSet_AVX2); } if (((cpuFeatures & XArchIntrinsicConstants_Bmi1) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableBMI1)) @@ -1381,43 +1356,69 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.Set(InstructionSet_LZCNT); } - if (((cpuFeatures & XArchIntrinsicConstants_Pclmulqdq) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnablePCLMULQDQ)) - { - CPUCompileFlags.Set(InstructionSet_PCLMULQDQ); - } - if (((cpuFeatures & XArchIntrinsicConstants_Movbe) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableMOVBE)) { CPUCompileFlags.Set(InstructionSet_MOVBE); } - if (((cpuFeatures & XArchIntrinsicConstants_Popcnt) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnablePOPCNT)) + // x86-64-v4 + + if (((cpuFeatures & XArchIntrinsicConstants_Evex) != 0) && + ((cpuFeatures & XArchIntrinsicConstants_Avx512) != 0)) { - CPUCompileFlags.Set(InstructionSet_POPCNT); + // While the AVX-512 ISAs can be individually lit-up, they really + // need F, BW, CD, DQ, and VL to be fully functional without adding + // significant complexity into the JIT. Additionally, unlike AVX/AVX2 + // there was never really any hardware that didn't provide all 5 at + // once, with the notable exception being Knight's Landing which + // provided a similar but not quite the same feature. + + if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F) && + CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512F_VL) && + CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW) && + CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512BW_VL) && + CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512CD) && + CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512CD_VL) && + CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ) && + CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512DQ_VL)) + { + CPUCompileFlags.Set(InstructionSet_EVEX); + CPUCompileFlags.Set(InstructionSet_AVX512F); + CPUCompileFlags.Set(InstructionSet_AVX512F_VL); + CPUCompileFlags.Set(InstructionSet_AVX512BW); + CPUCompileFlags.Set(InstructionSet_AVX512BW_VL); + CPUCompileFlags.Set(InstructionSet_AVX512CD); + CPUCompileFlags.Set(InstructionSet_AVX512CD_VL); + CPUCompileFlags.Set(InstructionSet_AVX512DQ); + CPUCompileFlags.Set(InstructionSet_AVX512DQ_VL); + } } - // We need to additionally check that EXTERNAL_EnableSSE3_4 is set, as that - // is a prexisting config flag that controls the SSE3+ ISAs - if (((cpuFeatures & XArchIntrinsicConstants_Sse3) != 0) && - CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE3) && - CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE3_4)) + if ((cpuFeatures & XArchIntrinsicConstants_Avx512Vbmi) != 0) { - CPUCompileFlags.Set(InstructionSet_SSE3); + if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512VBMI) && + CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX512VBMI_VL)) + { + CPUCompileFlags.Set(InstructionSet_AVX512VBMI); + CPUCompileFlags.Set(InstructionSet_AVX512VBMI_VL); + } } - if (((cpuFeatures & XArchIntrinsicConstants_Sse41) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE41)) + // Unversioned + + if (((cpuFeatures & XArchIntrinsicConstants_Aes) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAES)) { - CPUCompileFlags.Set(InstructionSet_SSE41); + CPUCompileFlags.Set(InstructionSet_AES); } - if (((cpuFeatures & XArchIntrinsicConstants_Sse42) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSE42)) + if (((cpuFeatures & XArchIntrinsicConstants_Pclmulqdq) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnablePCLMULQDQ)) { - CPUCompileFlags.Set(InstructionSet_SSE42); + CPUCompileFlags.Set(InstructionSet_PCLMULQDQ); } - if (((cpuFeatures & XArchIntrinsicConstants_Ssse3) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableSSSE3)) + if (((cpuFeatures & XArchIntrinsicConstants_AvxVnni) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVXVNNI)) { - CPUCompileFlags.Set(InstructionSet_SSSE3); + CPUCompileFlags.Set(InstructionSet_AVXVNNI); } if (((cpuFeatures & XArchIntrinsicConstants_Serialize) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableX86Serialize)) @@ -1425,22 +1426,17 @@ void EEJitManager::SetCpuInfo() CPUCompileFlags.Set(InstructionSet_X86Serialize); } - if (((cpuFeatures & XArchIntrinsicConstants_Evex) != 0) && (CPUCompileFlags.IsSet(InstructionSet_AVX512F) || CPUCompileFlags.IsSet(InstructionSet_AVX10v1))) + if (((cpuFeatures & XArchIntrinsicConstants_Evex) != 0) && + ((cpuFeatures & XArchIntrinsicConstants_Avx10v1) != 0)) { - CPUCompileFlags.Set(InstructionSet_EVEX); - } - - // As Avx10v1_V512 could imply Avx10v1, - // then the flag check here can be conducted for only once, and let - // `EnusreValidInstructionSetSupport` to handle the illegal combination. - // To ensure `EnusreValidInstructionSetSupport` handle the dependency correctly, the implication - // defined in InstructionSetDesc.txt should be explicit, no transitive implication should be assumed. - if (((cpuFeatures & XArchIntrinsicConstants_Avx10v1) != 0) && CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX10v1)) - { - CPUCompileFlags.Set(InstructionSet_AVX10v1); + if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_EnableAVX10v1)) + { + CPUCompileFlags.Set(InstructionSet_EVEX); + CPUCompileFlags.Set(InstructionSet_AVX10v1); + } } - if (((cpuFeatures & XArchIntrinsicConstants_Avx10v1_V512) != 0)) + if ((cpuFeatures & XArchIntrinsicConstants_Avx10v1_V512) != 0) { CPUCompileFlags.Set(InstructionSet_AVX10v1_V512); } diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx10v1.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx10v1.PlatformNotSupported.cs index e65968465db49f..11ec731a685bf9 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx10v1.PlatformNotSupported.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx10v1.PlatformNotSupported.cs @@ -1616,118 +1616,6 @@ internal Avx10v1() { } /// public static Vector256 Min(Vector256 left, Vector256 right) { throw new PlatformNotSupportedException(); } - /// - /// __m128 _mm_fmadd_ps (__m128 a, __m128 b, __m128 c) - /// VFMADDPS xmm1, xmm2, xmm3/m128 - /// VFMADDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst - /// - public static Vector128 MultiplyAdd(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m128d _mm_fmadd_pd (__m128d a, __m128d b, __m128d c) - /// VFMADDPD xmm1, xmm2, xmm3/m128 - /// VFMADDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst - /// - public static Vector128 MultiplyAdd(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m256 _mm256_fmadd_ps (__m256 a, __m256 b, __m256 c) - /// VFMADDPS ymm1, ymm2, ymm3/m256 - /// VFMADDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst - /// - public static Vector256 MultiplyAdd(Vector256 a, Vector256 b, Vector256 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m256d _mm256_fmadd_pd (__m256d a, __m256d b, __m256d c) - /// VFMADDPD ymm1, ymm2, ymm3/m256 - /// VFMADDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst - /// - public static Vector256 MultiplyAdd(Vector256 a, Vector256 b, Vector256 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m128 _mm_fnmadd_ps (__m128 a, __m128 b, __m128 c) - /// VFNMADDPS xmm1, xmm2, xmm3/m128 - /// VFNMADDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst - /// - public static Vector128 MultiplyAddNegated(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m128d _mm_fnmadd_pd (__m128d a, __m128d b, __m128d c) - /// VFNMADDPD xmm1, xmm2, xmm3/m128 - /// VFNMADDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst - /// - public static Vector128 MultiplyAddNegated(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m256 _mm256_fnmadd_ps (__m256 a, __m256 b, __m256 c) - /// VFNMADDPS ymm1, ymm2, ymm3/m256 - /// VFNMADDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst - /// - public static Vector256 MultiplyAddNegated(Vector256 a, Vector256 b, Vector256 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m256d _mm256_fnmadd_pd (__m256d a, __m256d b, __m256d c) - /// VFNMADDPD ymm1, ymm2, ymm3/m256 - /// VFNMADDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst - /// - public static Vector256 MultiplyAddNegated(Vector256 a, Vector256 b, Vector256 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m128 _mm_fnmadd_ss (__m128 a, __m128 b, __m128 c) - /// VFNMADDSS xmm1, xmm2, xmm3/m32 - /// VFNMADDSS xmm1 {k1}{z}, xmm2, xmm3/m32{er} - /// - public static Vector128 MultiplyAddNegatedScalar(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m128d _mm_fnmadd_sd (__m128d a, __m128d b, __m128d c) - /// VFNMADDSD xmm1, xmm2, xmm3/m64 - /// VFNMADDSD xmm1 {k1}{z}, xmm2, xmm3/m64{er} - /// - public static Vector128 MultiplyAddNegatedScalar(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m128 _mm_fmadd_ss (__m128 a, __m128 b, __m128 c) - /// VFMADDSS xmm1, xmm2, xmm3/m32 - /// VFMADDSS xmm1 {k1}{z}, xmm2, xmm3/m32{er} - /// - public static Vector128 MultiplyAddScalar(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m128d _mm_fmadd_sd (__m128d a, __m128d b, __m128d c) - /// VFMADDSD xmm1, xmm2, xmm3/m64 - /// VFMADDSD xmm1 {k1}{z}, xmm2, xmm3/m64{er} - /// - public static Vector128 MultiplyAddScalar(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m128 _mm_fmaddsub_ps (__m128 a, __m128 b, __m128 c) - /// VFMADDSUBPS xmm1, xmm2, xmm3/m128 - /// VFMADDSUBPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst - /// - public static Vector128 MultiplyAddSubtract(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m128d _mm_fmaddsub_pd (__m128d a, __m128d b, __m128d c) - /// VFMADDSUBPD xmm1, xmm2, xmm3/m128 - /// VFMADDSUBPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst - /// - public static Vector128 MultiplyAddSubtract(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m256 _mm256_fmaddsub_ps (__m256 a, __m256 b, __m256 c) - /// VFMADDSUBPS ymm1, ymm2, ymm3/m256 - /// VFMADDSUBPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst - /// - public static Vector256 MultiplyAddSubtract(Vector256 a, Vector256 b, Vector256 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m256d _mm256_fmaddsub_pd (__m256d a, __m256d b, __m256d c) - /// VFMADDSUBPD ymm1, ymm2, ymm3/m256 - /// VFMADDSUBPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst - /// - public static Vector256 MultiplyAddSubtract(Vector256 a, Vector256 b, Vector256 c) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_mullo_epi64 (__m128i a, __m128i b) /// VPMULLQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst @@ -1764,118 +1652,6 @@ internal Avx10v1() { } /// public static Vector128 MultiplyScalar(Vector128 left, Vector128 right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) { throw new PlatformNotSupportedException(); } - /// - /// __m128 _mm_fmsub_ps (__m128 a, __m128 b, __m128 c) - /// VFMSUBPS xmm1, xmm2, xmm3/m128 - /// VFMSUBPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst - /// - public static Vector128 MultiplySubtract(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m128d _mm_fmsub_pd (__m128d a, __m128d b, __m128d c) - /// VFMSUBPD xmm1, xmm2, xmm3/m128 - /// VFMSUBPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst - /// - public static Vector128 MultiplySubtract(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m256 _mm256_fmsub_ps (__m256 a, __m256 b, __m256 c) - /// VFMSUBPS ymm1, ymm2, ymm3/m256 - /// VFMSUBPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst - /// - public static Vector256 MultiplySubtract(Vector256 a, Vector256 b, Vector256 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m256d _mm256_fmsub_pd (__m256d a, __m256d b, __m256d c) - /// VFMSUBPD ymm1, ymm2, ymm3/m256 - /// VFMSUBPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst - /// - public static Vector256 MultiplySubtract(Vector256 a, Vector256 b, Vector256 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m128 _mm_fmsubadd_ps (__m128 a, __m128 b, __m128 c) - /// VFMSUBADDPS xmm1, xmm2, xmm3/m128 - /// VFMSUBADDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst - /// - public static Vector128 MultiplySubtractAdd(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m128d _mm_fmsubadd_pd (__m128d a, __m128d b, __m128d c) - /// VFMSUBADDPD xmm1, xmm2, xmm3/m128 - /// VFMSUBADDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst - /// - public static Vector128 MultiplySubtractAdd(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m256 _mm256_fmsubadd_ps (__m256 a, __m256 b, __m256 c) - /// VFMSUBADDPS ymm1, ymm2, ymm3/m256 - /// VFMSUBADDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst - /// - public static Vector256 MultiplySubtractAdd(Vector256 a, Vector256 b, Vector256 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m256d _mm256_fmsubadd_pd (__m256d a, __m256d b, __m256d c) - /// VFMSUBADDPD ymm1, ymm2, ymm3/m256 - /// VFMSUBADDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst - /// - public static Vector256 MultiplySubtractAdd(Vector256 a, Vector256 b, Vector256 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m128 _mm_fnmsub_ps (__m128 a, __m128 b, __m128 c) - /// VFNMSUBPS xmm1, xmm2, xmm3/m128 - /// VFNMSUBPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst - /// - public static Vector128 MultiplySubtractNegated(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m128d _mm_fnmsub_pd (__m128d a, __m128d b, __m128d c) - /// VFNMSUBPD xmm1, xmm2, xmm3/m128 - /// VFNMSUBPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst - /// - public static Vector128 MultiplySubtractNegated(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m256 _mm256_fnmsub_ps (__m256 a, __m256 b, __m256 c) - /// VFNMSUBPS ymm1, ymm2, ymm3/m256 - /// VFNMSUBPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst - /// - public static Vector256 MultiplySubtractNegated(Vector256 a, Vector256 b, Vector256 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m256d _mm256_fnmsub_pd (__m256d a, __m256d b, __m256d c) - /// VFNMSUBPD ymm1, ymm2, ymm3/m256 - /// VFNMSUBPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst - /// - public static Vector256 MultiplySubtractNegated(Vector256 a, Vector256 b, Vector256 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m128 _mm_fnmsub_ss (__m128 a, __m128 b, __m128 c) - /// VFNMSUBSS xmm1, xmm2, xmm3/m32 - /// VFNMSUBSS xmm1 {k1}{z}, xmm2, xmm3/m32{er} - /// - public static Vector128 MultiplySubtractNegatedScalar(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m128d _mm_fnmsub_sd (__m128d a, __m128d b, __m128d c) - /// VFNMSUBSD xmm1, xmm2, xmm3/m64 - /// VFNMSUBSD xmm1 {k1}{z}, xmm2, xmm3/m64{er} - /// - public static Vector128 MultiplySubtractNegatedScalar(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m128d _mm_fmsub_sd (__m128d a, __m128d b, __m128d c) - /// VFMSUBSD xmm1, xmm2, xmm3/m64 - /// VFMSUBSD xmm1 {k1}{z}, xmm2, xmm3/m64{er} - /// - public static Vector128 MultiplySubtractScalar(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } - - /// - /// __m128 _mm_fmsub_ss (__m128 a, __m128 b, __m128 c) - /// VFMSUBSS xmm1, xmm2, xmm3/m32 - /// VFMSUBSS xmm1 {k1}{z}, xmm2, xmm3/m32{er} - /// - public static Vector128 MultiplySubtractScalar(Vector128 a, Vector128 b, Vector128 c) { throw new PlatformNotSupportedException(); } - /// /// __m128i _mm_multishift_epi64_epi8(__m128i a, __m128i b) /// VPMULTISHIFTQB xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx10v1.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx10v1.cs index f4b18202006445..f1dd488826ecb0 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx10v1.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/Avx10v1.cs @@ -1615,118 +1615,6 @@ internal Avx10v1() { } /// public static Vector256 Min(Vector256 left, Vector256 right) => Min(left, right); - /// - /// __m128 _mm_fmadd_ps (__m128 a, __m128 b, __m128 c) - /// VFMADDPS xmm1, xmm2, xmm3/m128 - /// VFMADDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst - /// - public static Vector128 MultiplyAdd(Vector128 a, Vector128 b, Vector128 c) => MultiplyAdd(a, b, c); - - /// - /// __m128d _mm_fmadd_pd (__m128d a, __m128d b, __m128d c) - /// VFMADDPD xmm1, xmm2, xmm3/m128 - /// VFMADDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst - /// - public static Vector128 MultiplyAdd(Vector128 a, Vector128 b, Vector128 c) => MultiplyAdd(a, b, c); - - /// - /// __m256 _mm256_fmadd_ps (__m256 a, __m256 b, __m256 c) - /// VFMADDPS ymm1, ymm2, ymm3/m256 - /// VFMADDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst - /// - public static Vector256 MultiplyAdd(Vector256 a, Vector256 b, Vector256 c) => MultiplyAdd(a, b, c); - - /// - /// __m256d _mm256_fmadd_pd (__m256d a, __m256d b, __m256d c) - /// VFMADDPD ymm1, ymm2, ymm3/m256 - /// VFMADDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst - /// - public static Vector256 MultiplyAdd(Vector256 a, Vector256 b, Vector256 c) => MultiplyAdd(a, b, c); - - /// - /// __m128 _mm_fnmadd_ps (__m128 a, __m128 b, __m128 c) - /// VFNMADDPS xmm1, xmm2, xmm3/m128 - /// VFNMADDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst - /// - public static Vector128 MultiplyAddNegated(Vector128 a, Vector128 b, Vector128 c) => MultiplyAddNegated(a, b, c); - - /// - /// __m128d _mm_fnmadd_pd (__m128d a, __m128d b, __m128d c) - /// VFNMADDPD xmm1, xmm2, xmm3/m128 - /// VFNMADDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst - /// - public static Vector128 MultiplyAddNegated(Vector128 a, Vector128 b, Vector128 c) => MultiplyAddNegated(a, b, c); - - /// - /// __m256 _mm256_fnmadd_ps (__m256 a, __m256 b, __m256 c) - /// VFNMADDPS ymm1, ymm2, ymm3/m256 - /// VFNMADDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst - /// - public static Vector256 MultiplyAddNegated(Vector256 a, Vector256 b, Vector256 c) => MultiplyAddNegated(a, b, c); - - /// - /// __m256d _mm256_fnmadd_pd (__m256d a, __m256d b, __m256d c) - /// VFNMADDPD ymm1, ymm2, ymm3/m256 - /// VFNMADDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst - /// - public static Vector256 MultiplyAddNegated(Vector256 a, Vector256 b, Vector256 c) => MultiplyAddNegated(a, b, c); - - /// - /// __m128 _mm_fnmadd_ss (__m128 a, __m128 b, __m128 c) - /// VFNMADDSS xmm1, xmm2, xmm3/m32 - /// VFNMADDSS xmm1 {k1}{z}, xmm2, xmm3/m32{er} - /// - public static Vector128 MultiplyAddNegatedScalar(Vector128 a, Vector128 b, Vector128 c) => MultiplyAddNegatedScalar(a, b, c); - - /// - /// __m128d _mm_fnmadd_sd (__m128d a, __m128d b, __m128d c) - /// VFNMADDSD xmm1, xmm2, xmm3/m64 - /// VFNMADDSD xmm1 {k1}{z}, xmm2, xmm3/m64{er} - /// - public static Vector128 MultiplyAddNegatedScalar(Vector128 a, Vector128 b, Vector128 c) => MultiplyAddNegatedScalar(a, b, c); - - /// - /// __m128 _mm_fmadd_ss (__m128 a, __m128 b, __m128 c) - /// VFMADDSS xmm1, xmm2, xmm3/m32 - /// VFMADDSS xmm1 {k1}{z}, xmm2, xmm3/m32{er} - /// - public static Vector128 MultiplyAddScalar(Vector128 a, Vector128 b, Vector128 c) => MultiplyAddScalar(a, b, c); - - /// - /// __m128d _mm_fmadd_sd (__m128d a, __m128d b, __m128d c) - /// VFMADDSD xmm1, xmm2, xmm3/m64 - /// VFMADDSD xmm1 {k1}{z}, xmm2, xmm3/m64{er} - /// - public static Vector128 MultiplyAddScalar(Vector128 a, Vector128 b, Vector128 c) => MultiplyAddScalar(a, b, c); - - /// - /// __m128 _mm_fmaddsub_ps (__m128 a, __m128 b, __m128 c) - /// VFMADDSUBPS xmm1, xmm2, xmm3/m128 - /// VFMADDSUBPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst - /// - public static Vector128 MultiplyAddSubtract(Vector128 a, Vector128 b, Vector128 c) => MultiplyAddSubtract(a, b, c); - - /// - /// __m128d _mm_fmaddsub_pd (__m128d a, __m128d b, __m128d c) - /// VFMADDSUBPD xmm1, xmm2, xmm3/m128 - /// VFMADDSUBPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst - /// - public static Vector128 MultiplyAddSubtract(Vector128 a, Vector128 b, Vector128 c) => MultiplyAddSubtract(a, b, c); - - /// - /// __m256 _mm256_fmaddsub_ps (__m256 a, __m256 b, __m256 c) - /// VFMADDSUBPS ymm1, ymm2, ymm3/m256 - /// VFMADDSUBPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst - /// - public static Vector256 MultiplyAddSubtract(Vector256 a, Vector256 b, Vector256 c) => MultiplyAddSubtract(a, b, c); - - /// - /// __m256d _mm256_fmaddsub_pd (__m256d a, __m256d b, __m256d c) - /// VFMADDSUBPD ymm1, ymm2, ymm3/m256 - /// VFMADDSUBPD ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst - /// - public static Vector256 MultiplyAddSubtract(Vector256 a, Vector256 b, Vector256 c) => MultiplyAddSubtract(a, b, c); - /// /// __m128i _mm_mullo_epi64 (__m128i a, __m128i b) /// VPMULLQ xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst @@ -1763,118 +1651,6 @@ internal Avx10v1() { } /// public static Vector128 MultiplyScalar(Vector128 left, Vector128 right, [ConstantExpected(Max = FloatRoundingMode.ToZero)] FloatRoundingMode mode) => MultiplyScalar(left, right, mode); - /// - /// __m128 _mm_fmsub_ps (__m128 a, __m128 b, __m128 c) - /// VFMSUBPS xmm1, xmm2, xmm3/m128 - /// VFMSUBPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst - /// - public static Vector128 MultiplySubtract(Vector128 a, Vector128 b, Vector128 c) => MultiplySubtract(a, b, c); - - /// - /// __m128d _mm_fmsub_pd (__m128d a, __m128d b, __m128d c) - /// VFMSUBPD xmm1, xmm2, xmm3/m128 - /// VFMSUBPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst - /// - public static Vector128 MultiplySubtract(Vector128 a, Vector128 b, Vector128 c) => MultiplySubtract(a, b, c); - - /// - /// __m256 _mm256_fmsub_ps (__m256 a, __m256 b, __m256 c) - /// VFMSUBPS ymm1, ymm2, ymm3/m256 - /// VFMSUBPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst - /// - public static Vector256 MultiplySubtract(Vector256 a, Vector256 b, Vector256 c) => MultiplySubtract(a, b, c); - - /// - /// __m256d _mm256_fmsub_pd (__m256d a, __m256d b, __m256d c) - /// VFMSUBPD ymm1, ymm2, ymm3/m256 - /// VFMSUBPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst - /// - public static Vector256 MultiplySubtract(Vector256 a, Vector256 b, Vector256 c) => MultiplySubtract(a, b, c); - - /// - /// __m128 _mm_fmsubadd_ps (__m128 a, __m128 b, __m128 c) - /// VFMSUBADDPS xmm1, xmm2, xmm3/m128 - /// VFMSUBADDPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst - /// - public static Vector128 MultiplySubtractAdd(Vector128 a, Vector128 b, Vector128 c) => MultiplySubtractAdd(a, b, c); - - /// - /// __m128d _mm_fmsubadd_pd (__m128d a, __m128d b, __m128d c) - /// VFMSUBADDPD xmm1, xmm2, xmm3/m128 - /// VFMSUBADDPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst - /// - public static Vector128 MultiplySubtractAdd(Vector128 a, Vector128 b, Vector128 c) => MultiplySubtractAdd(a, b, c); - - /// - /// __m256 _mm256_fmsubadd_ps (__m256 a, __m256 b, __m256 c) - /// VFMSUBADDPS ymm1, ymm2, ymm3/m256 - /// VFMSUBADDPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst - /// - public static Vector256 MultiplySubtractAdd(Vector256 a, Vector256 b, Vector256 c) => MultiplySubtractAdd(a, b, c); - - /// - /// __m256d _mm256_fmsubadd_pd (__m256d a, __m256d b, __m256d c) - /// VFMSUBADDPD ymm1, ymm2, ymm3/m256 - /// VFMSUBADDPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst - /// - public static Vector256 MultiplySubtractAdd(Vector256 a, Vector256 b, Vector256 c) => MultiplySubtractAdd(a, b, c); - - /// - /// __m128 _mm_fnmsub_ps (__m128 a, __m128 b, __m128 c) - /// VFNMSUBPS xmm1, xmm2, xmm3/m128 - /// VFNMSUBPS xmm1 {k1}{z}, xmm2, xmm3/m128/m32bcst - /// - public static Vector128 MultiplySubtractNegated(Vector128 a, Vector128 b, Vector128 c) => MultiplySubtractNegated(a, b, c); - - /// - /// __m128d _mm_fnmsub_pd (__m128d a, __m128d b, __m128d c) - /// VFNMSUBPD xmm1, xmm2, xmm3/m128 - /// VFNMSUBPD xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst - /// - public static Vector128 MultiplySubtractNegated(Vector128 a, Vector128 b, Vector128 c) => MultiplySubtractNegated(a, b, c); - - /// - /// __m256 _mm256_fnmsub_ps (__m256 a, __m256 b, __m256 c) - /// VFNMSUBPS ymm1, ymm2, ymm3/m256 - /// VFNMSUBPS ymm1 {k1}{z}, ymm2, ymm3/m256/m32bcst - /// - public static Vector256 MultiplySubtractNegated(Vector256 a, Vector256 b, Vector256 c) => MultiplySubtractNegated(a, b, c); - - /// - /// __m256d _mm256_fnmsub_pd (__m256d a, __m256d b, __m256d c) - /// VFNMSUBPD ymm1, ymm2, ymm3/m256 - /// VFNMSUBPD ymm1 {k1}{z}, ymm2, ymm3/m256/m64bcst - /// - public static Vector256 MultiplySubtractNegated(Vector256 a, Vector256 b, Vector256 c) => MultiplySubtractNegated(a, b, c); - - /// - /// __m128 _mm_fnmsub_ss (__m128 a, __m128 b, __m128 c) - /// VFNMSUBSS xmm1, xmm2, xmm3/m32 - /// VFNMSUBSS xmm1 {k1}{z}, xmm2, xmm3/m32{er} - /// - public static Vector128 MultiplySubtractNegatedScalar(Vector128 a, Vector128 b, Vector128 c) => MultiplySubtractNegatedScalar(a, b, c); - - /// - /// __m128d _mm_fnmsub_sd (__m128d a, __m128d b, __m128d c) - /// VFNMSUBSD xmm1, xmm2, xmm3/m64 - /// VFNMSUBSD xmm1 {k1}{z}, xmm2, xmm3/m64{er} - /// - public static Vector128 MultiplySubtractNegatedScalar(Vector128 a, Vector128 b, Vector128 c) => MultiplySubtractNegatedScalar(a, b, c); - - /// - /// __m128d _mm_fmsub_sd (__m128d a, __m128d b, __m128d c) - /// VFMSUBSD xmm1, xmm2, xmm3/m64 - /// VFMSUBSD xmm1 {k1}{z}, xmm2, xmm3/m64{er} - /// - public static Vector128 MultiplySubtractScalar(Vector128 a, Vector128 b, Vector128 c) => MultiplySubtractScalar(a, b, c); - - /// - /// __m128 _mm_fmsub_ss (__m128 a, __m128 b, __m128 c) - /// VFMSUBSS xmm1, xmm2, xmm3/m32 - /// VFMSUBSS xmm1 {k1}{z}, xmm2, xmm3/m32{er} - /// - public static Vector128 MultiplySubtractScalar(Vector128 a, Vector128 b, Vector128 c) => MultiplySubtractScalar(a, b, c); - /// /// __m128i _mm_multishift_epi64_epi8(__m128i a, __m128i b) /// VPMULTISHIFTQB xmm1 {k1}{z}, xmm2, xmm3/m128/m64bcst diff --git a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs index 2cee279e6099fb..51464c6bd34485 100644 --- a/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs +++ b/src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs @@ -4203,7 +4203,7 @@ internal Arm64() { } public static System.Numerics.Vector AbsoluteDifference(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector AbsoluteDifference(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector AbsoluteDifference(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } - public static System.Numerics.Vector AbsoluteDifference(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector AbsoluteDifference(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector Add(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector Add(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } @@ -4379,7 +4379,7 @@ internal Arm64() { } public static System.Numerics.Vector DotProductBySelectedScalar(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right, [ConstantExpected] byte rightIndex) { throw null; } public static System.Numerics.Vector DotProductBySelectedScalar(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right, [ConstantExpected] byte rightIndex) { throw null; } public static System.Numerics.Vector DotProductBySelectedScalar(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right, [ConstantExpected] byte rightIndex) { throw null; } - public static System.Numerics.Vector DotProductBySelectedScalar(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right, [ConstantExpected] byte rightIndex) { throw null; } + public static System.Numerics.Vector DotProductBySelectedScalar(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right, [ConstantExpected] byte rightIndex) { throw null; } public static System.Numerics.Vector FusedMultiplyAdd(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector FusedMultiplyAdd(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } @@ -4597,13 +4597,13 @@ internal Arm64() { } public static System.Numerics.Vector MultiplyAdd(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector MultiplyAdd(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector MultiplyAdd(System.Numerics.Vector addend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } - + public static System.Numerics.Vector MultiplyBySelectedScalar(System.Numerics.Vector left, System.Numerics.Vector right, [ConstantExpected] byte rightIndex) { throw null; } public static System.Numerics.Vector MultiplyBySelectedScalar(System.Numerics.Vector left, System.Numerics.Vector right, [ConstantExpected] byte rightIndex) { throw null; } - + public static System.Numerics.Vector MultiplyExtended(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector MultiplyExtended(System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } - + public static System.Numerics.Vector MultiplySubtract(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector MultiplySubtract(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector MultiplySubtract(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } @@ -4611,7 +4611,7 @@ internal Arm64() { } public static System.Numerics.Vector MultiplySubtract(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector MultiplySubtract(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector MultiplySubtract(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } - public static System.Numerics.Vector MultiplySubtract(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } + public static System.Numerics.Vector MultiplySubtract(System.Numerics.Vector minuend, System.Numerics.Vector left, System.Numerics.Vector right) { throw null; } public static System.Numerics.Vector Negate(System.Numerics.Vector value) { throw null; } public static System.Numerics.Vector Negate(System.Numerics.Vector value) { throw null; } @@ -5931,44 +5931,12 @@ internal Avx10v1() { } public static System.Runtime.Intrinsics.Vector128 Min(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } public static System.Runtime.Intrinsics.Vector256 Min(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } public static System.Runtime.Intrinsics.Vector256 Min(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } - public static System.Runtime.Intrinsics.Vector128 MultiplyAdd(System.Runtime.Intrinsics.Vector128 a, System.Runtime.Intrinsics.Vector128 b, System.Runtime.Intrinsics.Vector128 c) { throw null; } - public static System.Runtime.Intrinsics.Vector128 MultiplyAdd(System.Runtime.Intrinsics.Vector128 a, System.Runtime.Intrinsics.Vector128 b, System.Runtime.Intrinsics.Vector128 c) { throw null; } - public static System.Runtime.Intrinsics.Vector256 MultiplyAdd(System.Runtime.Intrinsics.Vector256 a, System.Runtime.Intrinsics.Vector256 b, System.Runtime.Intrinsics.Vector256 c) { throw null; } - public static System.Runtime.Intrinsics.Vector256 MultiplyAdd(System.Runtime.Intrinsics.Vector256 a, System.Runtime.Intrinsics.Vector256 b, System.Runtime.Intrinsics.Vector256 c) { throw null; } - public static System.Runtime.Intrinsics.Vector128 MultiplyAddNegated(System.Runtime.Intrinsics.Vector128 a, System.Runtime.Intrinsics.Vector128 b, System.Runtime.Intrinsics.Vector128 c) { throw null; } - public static System.Runtime.Intrinsics.Vector128 MultiplyAddNegated(System.Runtime.Intrinsics.Vector128 a, System.Runtime.Intrinsics.Vector128 b, System.Runtime.Intrinsics.Vector128 c) { throw null; } - public static System.Runtime.Intrinsics.Vector256 MultiplyAddNegated(System.Runtime.Intrinsics.Vector256 a, System.Runtime.Intrinsics.Vector256 b, System.Runtime.Intrinsics.Vector256 c) { throw null; } - public static System.Runtime.Intrinsics.Vector256 MultiplyAddNegated(System.Runtime.Intrinsics.Vector256 a, System.Runtime.Intrinsics.Vector256 b, System.Runtime.Intrinsics.Vector256 c) { throw null; } - public static System.Runtime.Intrinsics.Vector128 MultiplyAddNegatedScalar(System.Runtime.Intrinsics.Vector128 a, System.Runtime.Intrinsics.Vector128 b, System.Runtime.Intrinsics.Vector128 c) { throw null; } - public static System.Runtime.Intrinsics.Vector128 MultiplyAddNegatedScalar(System.Runtime.Intrinsics.Vector128 a, System.Runtime.Intrinsics.Vector128 b, System.Runtime.Intrinsics.Vector128 c) { throw null; } - public static System.Runtime.Intrinsics.Vector128 MultiplyAddScalar(System.Runtime.Intrinsics.Vector128 a, System.Runtime.Intrinsics.Vector128 b, System.Runtime.Intrinsics.Vector128 c) { throw null; } - public static System.Runtime.Intrinsics.Vector128 MultiplyAddScalar(System.Runtime.Intrinsics.Vector128 a, System.Runtime.Intrinsics.Vector128 b, System.Runtime.Intrinsics.Vector128 c) { throw null; } - public static System.Runtime.Intrinsics.Vector128 MultiplyAddSubtract(System.Runtime.Intrinsics.Vector128 a, System.Runtime.Intrinsics.Vector128 b, System.Runtime.Intrinsics.Vector128 c) { throw null; } - public static System.Runtime.Intrinsics.Vector128 MultiplyAddSubtract(System.Runtime.Intrinsics.Vector128 a, System.Runtime.Intrinsics.Vector128 b, System.Runtime.Intrinsics.Vector128 c) { throw null; } - public static System.Runtime.Intrinsics.Vector256 MultiplyAddSubtract(System.Runtime.Intrinsics.Vector256 a, System.Runtime.Intrinsics.Vector256 b, System.Runtime.Intrinsics.Vector256 c) { throw null; } - public static System.Runtime.Intrinsics.Vector256 MultiplyAddSubtract(System.Runtime.Intrinsics.Vector256 a, System.Runtime.Intrinsics.Vector256 b, System.Runtime.Intrinsics.Vector256 c) { throw null; } public static System.Runtime.Intrinsics.Vector128 MultiplyLow(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } public static System.Runtime.Intrinsics.Vector128 MultiplyLow(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right) { throw null; } public static System.Runtime.Intrinsics.Vector256 MultiplyLow(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } public static System.Runtime.Intrinsics.Vector256 MultiplyLow(System.Runtime.Intrinsics.Vector256 left, System.Runtime.Intrinsics.Vector256 right) { throw null; } public static System.Runtime.Intrinsics.Vector128 MultiplyScalar(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; } public static System.Runtime.Intrinsics.Vector128 MultiplyScalar(System.Runtime.Intrinsics.Vector128 left, System.Runtime.Intrinsics.Vector128 right, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; } - public static System.Runtime.Intrinsics.Vector128 MultiplySubtract(System.Runtime.Intrinsics.Vector128 a, System.Runtime.Intrinsics.Vector128 b, System.Runtime.Intrinsics.Vector128 c) { throw null; } - public static System.Runtime.Intrinsics.Vector128 MultiplySubtract(System.Runtime.Intrinsics.Vector128 a, System.Runtime.Intrinsics.Vector128 b, System.Runtime.Intrinsics.Vector128 c) { throw null; } - public static System.Runtime.Intrinsics.Vector256 MultiplySubtract(System.Runtime.Intrinsics.Vector256 a, System.Runtime.Intrinsics.Vector256 b, System.Runtime.Intrinsics.Vector256 c) { throw null; } - public static System.Runtime.Intrinsics.Vector256 MultiplySubtract(System.Runtime.Intrinsics.Vector256 a, System.Runtime.Intrinsics.Vector256 b, System.Runtime.Intrinsics.Vector256 c) { throw null; } - public static System.Runtime.Intrinsics.Vector128 MultiplySubtractAdd(System.Runtime.Intrinsics.Vector128 a, System.Runtime.Intrinsics.Vector128 b, System.Runtime.Intrinsics.Vector128 c) { throw null; } - public static System.Runtime.Intrinsics.Vector128 MultiplySubtractAdd(System.Runtime.Intrinsics.Vector128 a, System.Runtime.Intrinsics.Vector128 b, System.Runtime.Intrinsics.Vector128 c) { throw null; } - public static System.Runtime.Intrinsics.Vector256 MultiplySubtractAdd(System.Runtime.Intrinsics.Vector256 a, System.Runtime.Intrinsics.Vector256 b, System.Runtime.Intrinsics.Vector256 c) { throw null; } - public static System.Runtime.Intrinsics.Vector256 MultiplySubtractAdd(System.Runtime.Intrinsics.Vector256 a, System.Runtime.Intrinsics.Vector256 b, System.Runtime.Intrinsics.Vector256 c) { throw null; } - public static System.Runtime.Intrinsics.Vector128 MultiplySubtractNegated(System.Runtime.Intrinsics.Vector128 a, System.Runtime.Intrinsics.Vector128 b, System.Runtime.Intrinsics.Vector128 c) { throw null; } - public static System.Runtime.Intrinsics.Vector128 MultiplySubtractNegated(System.Runtime.Intrinsics.Vector128 a, System.Runtime.Intrinsics.Vector128 b, System.Runtime.Intrinsics.Vector128 c) { throw null; } - public static System.Runtime.Intrinsics.Vector256 MultiplySubtractNegated(System.Runtime.Intrinsics.Vector256 a, System.Runtime.Intrinsics.Vector256 b, System.Runtime.Intrinsics.Vector256 c) { throw null; } - public static System.Runtime.Intrinsics.Vector256 MultiplySubtractNegated(System.Runtime.Intrinsics.Vector256 a, System.Runtime.Intrinsics.Vector256 b, System.Runtime.Intrinsics.Vector256 c) { throw null; } - public static System.Runtime.Intrinsics.Vector128 MultiplySubtractNegatedScalar(System.Runtime.Intrinsics.Vector128 a, System.Runtime.Intrinsics.Vector128 b, System.Runtime.Intrinsics.Vector128 c) { throw null; } - public static System.Runtime.Intrinsics.Vector128 MultiplySubtractNegatedScalar(System.Runtime.Intrinsics.Vector128 a, System.Runtime.Intrinsics.Vector128 b, System.Runtime.Intrinsics.Vector128 c) { throw null; } - public static System.Runtime.Intrinsics.Vector128 MultiplySubtractScalar(System.Runtime.Intrinsics.Vector128 a, System.Runtime.Intrinsics.Vector128 b, System.Runtime.Intrinsics.Vector128 c) { throw null; } - public static System.Runtime.Intrinsics.Vector128 MultiplySubtractScalar(System.Runtime.Intrinsics.Vector128 a, System.Runtime.Intrinsics.Vector128 b, System.Runtime.Intrinsics.Vector128 c) { throw null; } public static System.Runtime.Intrinsics.Vector128 MultiShift(System.Runtime.Intrinsics.Vector128 control, System.Runtime.Intrinsics.Vector128 value) { throw null; } public static System.Runtime.Intrinsics.Vector128 MultiShift(System.Runtime.Intrinsics.Vector128 control, System.Runtime.Intrinsics.Vector128 value) { throw null; } public static System.Runtime.Intrinsics.Vector256 MultiShift(System.Runtime.Intrinsics.Vector256 control, System.Runtime.Intrinsics.Vector256 value) { throw null; } @@ -6135,7 +6103,7 @@ internal Avx10v1() { } internal X64() { } public static new bool IsSupported { get { throw null; } } public static System.Runtime.Intrinsics.Vector128 ConvertScalarToVector128Double(System.Runtime.Intrinsics.Vector128 upper, ulong value) { throw null; } - public static System.Runtime.Intrinsics.Vector128 ConvertScalarToVector128Single(System.Runtime.Intrinsics.Vector128 upper, ulong value) { throw null; } + public static System.Runtime.Intrinsics.Vector128 ConvertScalarToVector128Single(System.Runtime.Intrinsics.Vector128 upper, ulong value) { throw null; } public static System.Runtime.Intrinsics.Vector128 ConvertScalarToVector128Double(System.Runtime.Intrinsics.Vector128 upper, ulong value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; } public static System.Runtime.Intrinsics.Vector128 ConvertScalarToVector128Single(System.Runtime.Intrinsics.Vector128 upper, ulong value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; } public static System.Runtime.Intrinsics.Vector128 ConvertScalarToVector128Double(System.Runtime.Intrinsics.Vector128 upper, long value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; } @@ -7368,7 +7336,7 @@ internal VL() { } internal X64() { } public static new bool IsSupported { get { throw null; } } public static System.Runtime.Intrinsics.Vector128 ConvertScalarToVector128Double(System.Runtime.Intrinsics.Vector128 upper, ulong value) { throw null; } - public static System.Runtime.Intrinsics.Vector128 ConvertScalarToVector128Single(System.Runtime.Intrinsics.Vector128 upper, ulong value) { throw null; } + public static System.Runtime.Intrinsics.Vector128 ConvertScalarToVector128Single(System.Runtime.Intrinsics.Vector128 upper, ulong value) { throw null; } public static System.Runtime.Intrinsics.Vector128 ConvertScalarToVector128Double(System.Runtime.Intrinsics.Vector128 upper, ulong value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; } public static System.Runtime.Intrinsics.Vector128 ConvertScalarToVector128Single(System.Runtime.Intrinsics.Vector128 upper, ulong value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; } public static System.Runtime.Intrinsics.Vector128 ConvertScalarToVector128Double(System.Runtime.Intrinsics.Vector128 upper, long value, [System.Diagnostics.CodeAnalysis.ConstantExpected(Max = System.Runtime.Intrinsics.X86.FloatRoundingMode.ToZero)] System.Runtime.Intrinsics.X86.FloatRoundingMode mode) { throw null; } diff --git a/src/native/minipal/cpufeatures.c b/src/native/minipal/cpufeatures.c index 45271beed86374..47327255e40e22 100644 --- a/src/native/minipal/cpufeatures.c +++ b/src/native/minipal/cpufeatures.c @@ -199,11 +199,11 @@ int minipal_getcpufeatures(void) } const int requiredAvxEcxFlags = (1 << 27) // OSXSAVE - | (1 << 28); // AVX + | (1 << 28); // AVX if ((cpuidInfo[CPUID_ECX] & requiredAvxEcxFlags) == requiredAvxEcxFlags) { - if (IsAvxEnabled() && (xmmYmmStateSupport() == 1)) // XGETBV == 11 + if (IsAvxEnabled() && (xmmYmmStateSupport() == 1)) // XGETBV == 11 { result |= XArchIntrinsicConstants_Avx; @@ -220,54 +220,27 @@ int minipal_getcpufeatures(void) { result |= XArchIntrinsicConstants_Avx2; - if (IsAvx512Enabled() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111 + if (IsAvx512Enabled() && (avx512StateSupport() == 1)) // XGETBV XRC0[7:5] == 111 { - if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) // AVX512F + if (((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) && // AVX512F + ((cpuidInfo[CPUID_EBX] & (1 << 30)) != 0) && // AVX512BW + ((cpuidInfo[CPUID_EBX] & (1 << 28)) != 0) && // AVX512CD + ((cpuidInfo[CPUID_EBX] & (1 << 17)) != 0) && // AVX512DQ + ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0)) // AVX512VL { - result |= XArchIntrinsicConstants_Avx512f; - result |= XArchIntrinsicConstants_Evex; - - bool isAVX512_VLSupported = false; - if ((cpuidInfo[CPUID_EBX] & (1 << 31)) != 0) // AVX512VL - { - result |= XArchIntrinsicConstants_Avx512f_vl; - isAVX512_VLSupported = true; - } - - if ((cpuidInfo[CPUID_EBX] & (1 << 30)) != 0) // AVX512BW - { - result |= XArchIntrinsicConstants_Avx512bw; - if (isAVX512_VLSupported) // AVX512BW_VL - { - result |= XArchIntrinsicConstants_Avx512bw_vl; - } - } - - if ((cpuidInfo[CPUID_EBX] & (1 << 28)) != 0) // AVX512CD - { - result |= XArchIntrinsicConstants_Avx512cd; - if (isAVX512_VLSupported) // AVX512CD_VL - { - result |= XArchIntrinsicConstants_Avx512cd_vl; - } - } + // While the AVX-512 ISAs can be individually lit-up, they really + // need F, BW, CD, DQ, and VL to be fully functional without adding + // significant complexity into the JIT. Additionally, unlike AVX/AVX2 + // there was never really any hardware that didn't provide all 5 at + // once, with the notable exception being Knight's Landing which + // provided a similar but not quite the same feature. - if ((cpuidInfo[CPUID_EBX] & (1 << 17)) != 0) // AVX512DQ - { - result |= XArchIntrinsicConstants_Avx512dq; - if (isAVX512_VLSupported) // AVX512DQ_VL - { - result |= XArchIntrinsicConstants_Avx512dq_vl; - } - } + result |= XArchIntrinsicConstants_Evex; + result |= XArchIntrinsicConstants_Avx512; if ((cpuidInfo[CPUID_ECX] & (1 << 1)) != 0) // AVX512VBMI { result |= XArchIntrinsicConstants_Avx512Vbmi; - if (isAVX512_VLSupported) // AVX512VBMI_VL - { - result |= XArchIntrinsicConstants_Avx512Vbmi_vl; - } } } } @@ -282,15 +255,18 @@ int minipal_getcpufeatures(void) if ((cpuidInfo[CPUID_EDX] & (1 << 19)) != 0) // Avx10 { __cpuidex(cpuidInfo, 0x00000024, 0x00000000); - if((cpuidInfo[CPUID_EBX] & 0xFF) >= 1) // Avx10v1 - CPUID.(EAX=24H, ECX=00H):EBX[7:0] >= 1 + uint8_t avx10Version = (uint8_t)(cpuidInfo[CPUID_EBX] & 0xFF); + + if((avx10Version >= 1) && + ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) && // Avx10/V128 + ((cpuidInfo[CPUID_EBX] & (1 << 17)) != 0)) // Avx10/V256 { - if ((cpuidInfo[CPUID_EBX] & (1 << 16)) != 0) - { - result |= XArchIntrinsicConstants_Avx10v1; - result |= XArchIntrinsicConstants_Evex; - } + result |= XArchIntrinsicConstants_Evex; + result |= XArchIntrinsicConstants_Avx10v1; + + bool isV512Supported = (cpuidInfo[CPUID_EBX] & (1 << 18)) != 0; // Avx10/V512 - if ((cpuidInfo[CPUID_EBX] & (1 << 18)) != 0) + if (isV512Supported) { result |= XArchIntrinsicConstants_Avx10v1_V512; } diff --git a/src/native/minipal/cpufeatures.h b/src/native/minipal/cpufeatures.h index 11f72a90b15b5a..52527a4565036a 100644 --- a/src/native/minipal/cpufeatures.h +++ b/src/native/minipal/cpufeatures.h @@ -26,20 +26,12 @@ enum XArchIntrinsicConstants XArchIntrinsicConstants_Lzcnt = 0x1000, XArchIntrinsicConstants_AvxVnni = 0x2000, XArchIntrinsicConstants_Movbe = 0x4000, - XArchIntrinsicConstants_Avx512f = 0x8000, - XArchIntrinsicConstants_Avx512f_vl = 0x10000, - XArchIntrinsicConstants_Avx512bw = 0x20000, - XArchIntrinsicConstants_Avx512bw_vl = 0x40000, - XArchIntrinsicConstants_Avx512cd = 0x80000, - XArchIntrinsicConstants_Avx512cd_vl = 0x100000, - XArchIntrinsicConstants_Avx512dq = 0x200000, - XArchIntrinsicConstants_Avx512dq_vl = 0x400000, - XArchIntrinsicConstants_Avx512Vbmi = 0x800000, - XArchIntrinsicConstants_Avx512Vbmi_vl = 0x1000000, - XArchIntrinsicConstants_Serialize = 0x2000000, - XArchIntrinsicConstants_Avx10v1 = 0x4000000, - XArchIntrinsicConstants_Avx10v1_V512 = 0x8000000, - XArchIntrinsicConstants_Evex = 0x10000000, + XArchIntrinsicConstants_Avx512 = 0x8000, + XArchIntrinsicConstants_Avx512Vbmi = 0x10000, + XArchIntrinsicConstants_Serialize = 0x20000, + XArchIntrinsicConstants_Avx10v1 = 0x40000, + XArchIntrinsicConstants_Avx10v1_V512 = 0x80000, + XArchIntrinsicConstants_Evex = 0x100000, }; #endif // HOST_X86 || HOST_AMD64