From c21c893589ff94bbb116c8ff4c3cdd52e300672e Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Tue, 15 Mar 2022 14:25:38 +0300 Subject: [PATCH 1/4] `-mtune=native` CPU autodetection for AMD Zen 3 CPU --- src/Target.cpp | 71 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) diff --git a/src/Target.cpp b/src/Target.cpp index 7e529ef924ee..c46bbc015c88 100644 --- a/src/Target.cpp +++ b/src/Target.cpp @@ -62,6 +62,63 @@ static void cpuid(int info[4], int infoType, int extra) { #endif #endif +#if defined(__x86_64__) || defined(__i386__) || defined(_MSC_VER) + +enum class VendorSignatures { + UNKNOWN, + GENUINE_INTEL, + AUTHENTIC_AMD, +}; + +VendorSignatures get_vendor_signature() { + int info[4]; + cpuid(info, 0, 0); + + if (info[0] < 1) { + return VendorSignatures::UNKNOWN; + } + + // "Genu ineI ntel" + if (info[1] == 0x756e6547 && info[3] == 0x49656e69 && info[2] == 0x6c65746e) { + return VendorSignatures::GENUINE_INTEL; + } + + // "Auth enti cAMD" + if (info[1] == 0x68747541 && info[3] == 0x69746e65 && info[2] == 0x444d4163) { + return VendorSignatures::AUTHENTIC_AMD; + } + + return VendorSignatures::UNKNOWN; +} + +void detect_family_and_model(unsigned info0, unsigned &family, + unsigned &model) { + family = (info0 >> 8) & 0xF; // Bits 8..11 + model = (info0 >> 4) & 0xF; // Bits 4..7 + if (family == 0x6 || family == 0xF) { + if (family == 0xF) { + // Examine extended family ID if family ID is 0xF. + family += (info0 >> 20) & 0xFf; // Bits 20..27 + } + // Examine extended model ID if family ID is 0x6 or 0xF. + model += ((info0 >> 16) & 0xF) << 4; // Bits 16..19 + } +} + +Target::Processor get_amd_processor(unsigned family, unsigned model) { + switch (family) { + case 0x19: + // FIXME: do we need to check model number? + return Target::Processor::ZnVer3; + default: + break; // Unknown AMD CPU. + } + + return Target::Processor::ProcessorGeneric; +} + +#endif // defined(__x86_64__) || defined(__i386__) || defined(_MSC_VER) + Target calculate_host_target() { Target::OS os = Target::OSUnknown; #ifdef __linux__ @@ -111,8 +168,18 @@ Target calculate_host_target() { #else Target::Arch arch = Target::X86; + VendorSignatures vendor_signature = get_vendor_signature(); + int info[4]; cpuid(info, 1, 0); + + unsigned family = 0, model = 0; + detect_family_and_model(info[0], family, model); + + if (vendor_signature == VendorSignatures::AUTHENTIC_AMD) { + processor = get_amd_processor(family, model); + } + bool have_sse41 = (info[2] & (1 << 19)) != 0; bool have_sse2 = (info[3] & (1 << 26)) != 0; bool have_avx = (info[2] & (1 << 28)) != 0; @@ -165,12 +232,15 @@ Target calculate_host_target() { } if ((info2[1] & avx512) == avx512) { initial_features.push_back(Target::AVX512); + // FIXME: port to family/model -based detection. if ((info2[1] & avx512_knl) == avx512_knl) { initial_features.push_back(Target::AVX512_KNL); } + // FIXME: port to family/model -based detection. if ((info2[1] & avx512_skylake) == avx512_skylake) { initial_features.push_back(Target::AVX512_Skylake); } + // FIXME: port to family/model -based detection. if ((info2[1] & avx512_cannonlake) == avx512_cannonlake) { initial_features.push_back(Target::AVX512_Cannonlake); @@ -178,6 +248,7 @@ Target calculate_host_target() { const uint32_t avx512bf16 = 1U << 5; // bf16 result in eax, with cpuid(eax=7, ecx=1) int info3[4]; cpuid(info3, 7, 1); + // FIXME: port to family/model -based detection. if ((info2[2] & avx512vnni) == avx512vnni && (info3[0] & avx512bf16) == avx512bf16) { initial_features.push_back(Target::AVX512_SapphireRapids); From 887b93c7459c9a17a236399f15c66faf5ab45cc5 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 6 Apr 2022 00:38:56 +0300 Subject: [PATCH 2/4] Address review notes. --- src/Target.cpp | 45 +++++++++++++++++++++++---------------------- 1 file changed, 23 insertions(+), 22 deletions(-) diff --git a/src/Target.cpp b/src/Target.cpp index c46bbc015c88..4f09a808189b 100644 --- a/src/Target.cpp +++ b/src/Target.cpp @@ -32,7 +32,7 @@ using std::vector; namespace { #ifdef _MSC_VER -static void cpuid(int info[4], int infoType, int extra) { +static void cpuid(unsigned info[4], unsigned infoType, unsigned extra) { __cpuidex(info, infoType, extra); } #else @@ -42,14 +42,14 @@ static void cpuid(int info[4], int infoType, int extra) { // (https://github.com/ispc/ispc/blob/master/builtins/dispatch.ll) #ifdef _LP64 -void cpuid(int info[4], int infoType, int extra) { +void cpuid(unsigned info[4], unsigned infoType, unsigned extra) { __asm__ __volatile__( "cpuid \n\t" : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) : "0"(infoType), "2"(extra)); } #else -static void cpuid(int info[4], int infoType, int extra) { +static void cpuid(unsigned info[4], unsigned infoType, unsigned extra) { // We save %ebx in case it's the PIC register __asm__ __volatile__( "mov{l}\t{%%}ebx, %1 \n\t" @@ -65,30 +65,30 @@ static void cpuid(int info[4], int infoType, int extra) { #if defined(__x86_64__) || defined(__i386__) || defined(_MSC_VER) enum class VendorSignatures { - UNKNOWN, - GENUINE_INTEL, - AUTHENTIC_AMD, + Unknown, + GenuineIntel, + AuthenticAMD, }; VendorSignatures get_vendor_signature() { - int info[4]; + unsigned info[4]; cpuid(info, 0, 0); if (info[0] < 1) { - return VendorSignatures::UNKNOWN; + return VendorSignatures::Unknown; } // "Genu ineI ntel" if (info[1] == 0x756e6547 && info[3] == 0x49656e69 && info[2] == 0x6c65746e) { - return VendorSignatures::GENUINE_INTEL; + return VendorSignatures::GenuineIntel; } // "Auth enti cAMD" if (info[1] == 0x68747541 && info[3] == 0x69746e65 && info[2] == 0x444d4163) { - return VendorSignatures::AUTHENTIC_AMD; + return VendorSignatures::AuthenticAMD; } - return VendorSignatures::UNKNOWN; + return VendorSignatures::Unknown; } void detect_family_and_model(unsigned info0, unsigned &family, @@ -107,9 +107,10 @@ void detect_family_and_model(unsigned info0, unsigned &family, Target::Processor get_amd_processor(unsigned family, unsigned model) { switch (family) { - case 0x19: - // FIXME: do we need to check model number? - return Target::Processor::ZnVer3; + case 0x19: // AMD Family 19h + if (model <= 0x0f || model == 0x21) { + return Target::Processor::ZnVer3; // 00h-0Fh, 21h: Zen3 + } default: break; // Unknown AMD CPU. } @@ -170,13 +171,13 @@ Target calculate_host_target() { VendorSignatures vendor_signature = get_vendor_signature(); - int info[4]; + unsigned info[4]; cpuid(info, 1, 0); unsigned family = 0, model = 0; detect_family_and_model(info[0], family, model); - if (vendor_signature == VendorSignatures::AUTHENTIC_AMD) { + if (vendor_signature == VendorSignatures::AuthenticAMD) { processor = get_amd_processor(family, model); } @@ -212,7 +213,7 @@ Target calculate_host_target() { if (use_64_bits && have_avx && have_f16c && have_rdrand) { // So far, so good. AVX2/512? // Call cpuid with eax=7, ecx=0 - int info2[4]; + unsigned info2[4]; cpuid(info2, 7, 0); const uint32_t avx2 = 1U << 5; const uint32_t avx512f = 1U << 16; @@ -232,23 +233,23 @@ Target calculate_host_target() { } if ((info2[1] & avx512) == avx512) { initial_features.push_back(Target::AVX512); - // FIXME: port to family/model -based detection. + // TODO: port to family/model -based detection. if ((info2[1] & avx512_knl) == avx512_knl) { initial_features.push_back(Target::AVX512_KNL); } - // FIXME: port to family/model -based detection. + // TODO: port to family/model -based detection. if ((info2[1] & avx512_skylake) == avx512_skylake) { initial_features.push_back(Target::AVX512_Skylake); } - // FIXME: port to family/model -based detection. + // TODO: port to family/model -based detection. if ((info2[1] & avx512_cannonlake) == avx512_cannonlake) { initial_features.push_back(Target::AVX512_Cannonlake); const uint32_t avx512vnni = 1U << 11; // vnni result in ecx const uint32_t avx512bf16 = 1U << 5; // bf16 result in eax, with cpuid(eax=7, ecx=1) - int info3[4]; + unsigned info3[4]; cpuid(info3, 7, 1); - // FIXME: port to family/model -based detection. + // TODO: port to family/model -based detection. if ((info2[2] & avx512vnni) == avx512vnni && (info3[0] & avx512bf16) == avx512bf16) { initial_features.push_back(Target::AVX512_SapphireRapids); From 504570c0a6da547471503de92bcb546eb5ecb892 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 6 Apr 2022 00:51:02 +0300 Subject: [PATCH 3/4] Fix MSVC build --- src/Target.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/Target.cpp b/src/Target.cpp index 4f09a808189b..c21d2b37b865 100644 --- a/src/Target.cpp +++ b/src/Target.cpp @@ -33,7 +33,9 @@ namespace { #ifdef _MSC_VER static void cpuid(unsigned info[4], unsigned infoType, unsigned extra) { - __cpuidex(info, infoType, extra); + int info_signed[4]; + __cpuidex(info_signed, infoType, extra); + std::copy_n(info_signed, 4, info); } #else From 2eec0fc7a88a235a6c96933b293a929caaac2f9b Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 6 Apr 2022 02:44:38 +0300 Subject: [PATCH 4/4] Address review notes --- src/Target.cpp | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/src/Target.cpp b/src/Target.cpp index c21d2b37b865..c7df681c52f0 100644 --- a/src/Target.cpp +++ b/src/Target.cpp @@ -32,10 +32,8 @@ using std::vector; namespace { #ifdef _MSC_VER -static void cpuid(unsigned info[4], unsigned infoType, unsigned extra) { - int info_signed[4]; - __cpuidex(info_signed, infoType, extra); - std::copy_n(info_signed, 4, info); +static void cpuid(int info[4], int infoType, int extra) { + __cpuidex(info, infoType, extra); } #else @@ -44,14 +42,14 @@ static void cpuid(unsigned info[4], unsigned infoType, unsigned extra) { // (https://github.com/ispc/ispc/blob/master/builtins/dispatch.ll) #ifdef _LP64 -void cpuid(unsigned info[4], unsigned infoType, unsigned extra) { +void cpuid(int info[4], int infoType, int extra) { __asm__ __volatile__( "cpuid \n\t" : "=a"(info[0]), "=b"(info[1]), "=c"(info[2]), "=d"(info[3]) : "0"(infoType), "2"(extra)); } #else -static void cpuid(unsigned info[4], unsigned infoType, unsigned extra) { +static void cpuid(int info[4], int infoType, int extra) { // We save %ebx in case it's the PIC register __asm__ __volatile__( "mov{l}\t{%%}ebx, %1 \n\t" @@ -73,7 +71,7 @@ enum class VendorSignatures { }; VendorSignatures get_vendor_signature() { - unsigned info[4]; + int info[4]; cpuid(info, 0, 0); if (info[0] < 1) { @@ -93,8 +91,7 @@ VendorSignatures get_vendor_signature() { return VendorSignatures::Unknown; } -void detect_family_and_model(unsigned info0, unsigned &family, - unsigned &model) { +void detect_family_and_model(int info0, unsigned &family, unsigned &model) { family = (info0 >> 8) & 0xF; // Bits 8..11 model = (info0 >> 4) & 0xF; // Bits 4..7 if (family == 0x6 || family == 0xF) { @@ -173,7 +170,7 @@ Target calculate_host_target() { VendorSignatures vendor_signature = get_vendor_signature(); - unsigned info[4]; + int info[4]; cpuid(info, 1, 0); unsigned family = 0, model = 0; @@ -215,7 +212,7 @@ Target calculate_host_target() { if (use_64_bits && have_avx && have_f16c && have_rdrand) { // So far, so good. AVX2/512? // Call cpuid with eax=7, ecx=0 - unsigned info2[4]; + int info2[4]; cpuid(info2, 7, 0); const uint32_t avx2 = 1U << 5; const uint32_t avx512f = 1U << 16; @@ -249,7 +246,7 @@ Target calculate_host_target() { const uint32_t avx512vnni = 1U << 11; // vnni result in ecx const uint32_t avx512bf16 = 1U << 5; // bf16 result in eax, with cpuid(eax=7, ecx=1) - unsigned info3[4]; + int info3[4]; cpuid(info3, 7, 1); // TODO: port to family/model -based detection. if ((info2[2] & avx512vnni) == avx512vnni &&