Skip to content

Commit

Permalink
New X86 features and detections
Browse files Browse the repository at this point in the history
  • Loading branch information
yuyichao committed Jul 6, 2020
1 parent 9f83eaf commit 9ad9daa
Show file tree
Hide file tree
Showing 2 changed files with 75 additions and 8 deletions.
27 changes: 27 additions & 0 deletions src/features_x86.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,31 @@ JL_FEATURE_DEF(avx512vl, 32 * 2 + 31, 0)
JL_FEATURE_DEF(prefetchwt1, 32 * 3 + 0, 0)
JL_FEATURE_DEF(avx512vbmi, 32 * 3 + 1, 0)
JL_FEATURE_DEF(pku, 32 * 3 + 4, 0) // ospke
JL_FEATURE_DEF(waitpkg, 32 * 3 + 5, 0)
JL_FEATURE_DEF(avx512vbmi2, 32 * 3 + 6, 0)
JL_FEATURE_DEF(shstk, 32 * 3 + 7, 0)
JL_FEATURE_DEF(gfni, 32 * 3 + 8, 0)
JL_FEATURE_DEF(vaes, 32 * 3 + 9, 0)
JL_FEATURE_DEF(vpclmulqdq, 32 * 3 + 10, 0)
JL_FEATURE_DEF(avx512vnni, 32 * 3 + 11, 0)
JL_FEATURE_DEF(avx512bitalg, 32 * 3 + 12, 0)
JL_FEATURE_DEF(avx512vpopcntdq, 32 * 3 + 14, 0)
JL_FEATURE_DEF(rdpid, 32 * 3 + 22, 0)
JL_FEATURE_DEF(cldemote, 32 * 3 + 25, 0)
JL_FEATURE_DEF(movdiri, 32 * 3 + 27, 0)
JL_FEATURE_DEF(movdir64b, 32 * 3 + 28, 0)
JL_FEATURE_DEF(enqcmd, 32 * 3 + 29, 90000)

// EAX=7,ECX=0: EDX
// JL_FEATURE_DEF(avx5124vnniw, 32 * 4 + 2, ?????)
// JL_FEATURE_DEF(avx5124fmaps, 32 * 4 + 3, ?????)
JL_FEATURE_DEF(avx512vp2intersect, 32 * 4 + 8, 90000)
JL_FEATURE_DEF(serialize, 32 * 4 + 14, 110000)
JL_FEATURE_DEF(tsxldtrk, 32 * 4 + 16, 110000)
JL_FEATURE_DEF(pconfig, 32 * 4 + 18, 0)
JL_FEATURE_DEF_NAME(amx_bf16, 32 * 4 + 22, 110000, "amx-bf16")
JL_FEATURE_DEF_NAME(amx_tile, 32 * 4 + 24, 110000, "amx-tile")
JL_FEATURE_DEF_NAME(amx_int8, 32 * 4 + 25, 110000, "amx-int8")

// EAX=0x80000001: ECX
// ignore sahf on 32bit x86 since it is required
Expand All @@ -85,5 +105,12 @@ JL_FEATURE_DEF(xsaves, 32 * 7 + 3, 0)

// EAX=0x80000008: EBX
JL_FEATURE_DEF(clzero, 32 * 8 + 0, 0)
JL_FEATURE_DEF(wbnoinvd, 32 * 8 + 9, 0)

// EAX=7,ECX=1: EAX
JL_FEATURE_DEF(avx512bf16, 32 * 9 + 5, 90000)

// EAX=0x14,ECX=0: EBX
JL_FEATURE_DEF(ptwrite, 32 * 10 + 4, 0)

#undef JL_X86_64ONLY_VER
56 changes: 48 additions & 8 deletions src/processor_x86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ enum class CPU : uint32_t {
amd_znver1,
};

static constexpr size_t feature_sz = 9;
static constexpr size_t feature_sz = 11;
static constexpr FeatureName feature_names[] = {
#define JL_FEATURE_DEF(name, bit, llvmver) {#name, bit, llvmver},
#define JL_FEATURE_DEF_NAME(name, bit, llvmver, str) {str, bit, llvmver},
Expand Down Expand Up @@ -130,16 +130,27 @@ static constexpr FeatureDep deps[] = {
{avx, sse42},
{f16c, avx},
{avx2, avx},
{vaes, avx},
{vaes, aes},
{vpclmulqdq, avx},
{vpclmulqdq, pclmul},
{avx512f, avx2},
{avx512dq, avx512f},
{avx512ifma, avx512f},
{avx512pf, avx512f},
{avx512er, avx512f},
{avx512cd, avx512f},
{avx512bw, avx512f},
{avx512bf16, avx512bw},
{avx512bitalg, avx512bw},
{avx512vl, avx512f},
{avx512vbmi, avx512bw},
{avx512vbmi2, avx512bw},
{avx512vnni, avx512f},
{avx512vp2intersect, avx512f},
{avx512vpopcntdq, avx512f},
{amx_int8, amx_tile},
{amx_bf16, amx_tile},
{sse4a, sse3},
{xop, fma4},
{fma4, avx},
Expand Down Expand Up @@ -470,15 +481,23 @@ static inline void features_disable_avx512(T &features)
{
using namespace Feature;
unset_bits(features, avx512f, avx512dq, avx512ifma, avx512pf, avx512er, avx512cd,
avx512bw, avx512vl, avx512vbmi);
avx512bw, avx512vl, avx512vbmi, avx512vpopcntdq, avx512vbmi2, avx512vnni,
avx512bitalg, avx512vp2intersect, avx512bf16);
}

template<typename T>
static inline void features_disable_avx(T &features)
{
using namespace Feature;
unset_bits(features, avx, Feature::fma, f16c, xsave, avx2, xop, fma4,
xsaveopt, xsavec, xsaves);
xsaveopt, xsavec, xsaves, vaes, vpclmulqdq);
}

template<typename T>
static inline void features_disable_amx(T &features)
{
using namespace Feature;
unset_bits(features, amx_bf16, amx_tile, amx_int8);
}

static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu(void)
Expand Down Expand Up @@ -535,15 +554,25 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu(void)
jl_cpuidex(infoex8, 0x80000008, 0);
features[8] = infoex8[1];
}
if (maxleaf >= 7) {
int32_t info7[4];
jl_cpuidex(info7, 7, 1);
features[9] = info7[0];
}
if (maxleaf >= 0x14) {
int32_t info14[4];
jl_cpuidex(info14, 0x14, 0);
features[10] = info14[1];
}

// Fix up AVX bits to account for OS support and match LLVM model
uint64_t xcr0 = 0;
const uint32_t avx_mask = (1 << 27) | (1 << 28);
bool hasavx = test_all_bits(features[0], avx_mask);
if (hasavx) {
bool hasxsave = test_all_bits(features[0], 1 << 27);
if (hasxsave) {
xcr0 = get_xcr0();
hasavx = test_all_bits(xcr0, 0x6);
hasxsave = test_all_bits(xcr0, 0x6);
}
bool hasavx = hasxsave && test_all_bits(features[0], 1 << 28);
unset_bits(features, 32 + 27);
if (!hasavx)
features_disable_avx(features);
Expand All @@ -557,6 +586,10 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu(void)
#endif
if (!hasavx512save)
features_disable_avx512(features);
// AMX requires additional context to be saved by the OS.
bool hasamxsave = hasxsave && test_all_bits(xcr0, (1 << 17) | (1 << 18));
if (!hasamxsave)
features_disable_amx(features);
// Ignore feature bits that we are not interested in.
mask_features(feature_masks, &features[0]);

Expand Down Expand Up @@ -788,12 +821,16 @@ static void ensure_jit_target(bool imaging)
static constexpr uint32_t clone_simd[] = {Feature::sse3, Feature::ssse3,
Feature::sse41, Feature::sse42,
Feature::avx, Feature::avx2,
Feature::vaes, Feature::vpclmulqdq,
Feature::sse4a, Feature::avx512f,
Feature::avx512dq, Feature::avx512ifma,
Feature::avx512pf, Feature::avx512er,
Feature::avx512cd, Feature::avx512bw,
Feature::avx512vl, Feature::avx512vbmi,
Feature::avx512vpopcntdq};
Feature::avx512vpopcntdq,
Feature::avx512vbmi2, Feature::avx512vnni,
Feature::avx512bitalg, Feature::avx512bf16,
Feature::avx512vp2intersect};
for (auto fe: clone_math) {
if (!test_nbit(features0, fe) && test_nbit(t.en.features, fe)) {
t.en.flags |= JL_TARGET_CLONE_MATH;
Expand Down Expand Up @@ -847,6 +884,9 @@ get_llvm_target_noext(const TargetData<feature_sz> &data)
// returns a value that may not have 64bit support.
// This can happen with virtualization.
features.push_back("+64bit");
#endif
#if JL_LLVM_VERSION >= 90000
features.push_back("+cx8");
#endif
return std::make_pair(std::move(name), std::move(features));
}
Expand Down

0 comments on commit 9ad9daa

Please sign in to comment.