Skip to content

Commit

Permalink
-mtune=native CPU autodetection for AMD Zen 3 CPU
Browse files Browse the repository at this point in the history
  • Loading branch information
LebedevRI committed Mar 31, 2022
1 parent 40f895d commit c21c893
Showing 1 changed file with 71 additions and 0 deletions.
71 changes: 71 additions & 0 deletions src/Target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,63 @@ static void cpuid(int info[4], int infoType, int extra) {
#endif
#endif

#if defined(__x86_64__) || defined(__i386__) || defined(_MSC_VER)

enum class VendorSignatures {
UNKNOWN,
GENUINE_INTEL,
AUTHENTIC_AMD,
};

VendorSignatures get_vendor_signature() {
int info[4];
cpuid(info, 0, 0);

if (info[0] < 1) {
return VendorSignatures::UNKNOWN;
}

// "Genu ineI ntel"
if (info[1] == 0x756e6547 && info[3] == 0x49656e69 && info[2] == 0x6c65746e) {
return VendorSignatures::GENUINE_INTEL;
}

// "Auth enti cAMD"
if (info[1] == 0x68747541 && info[3] == 0x69746e65 && info[2] == 0x444d4163) {
return VendorSignatures::AUTHENTIC_AMD;
}

return VendorSignatures::UNKNOWN;
}

void detect_family_and_model(unsigned info0, unsigned &family,
unsigned &model) {
family = (info0 >> 8) & 0xF; // Bits 8..11
model = (info0 >> 4) & 0xF; // Bits 4..7
if (family == 0x6 || family == 0xF) {
if (family == 0xF) {
// Examine extended family ID if family ID is 0xF.
family += (info0 >> 20) & 0xFf; // Bits 20..27
}
// Examine extended model ID if family ID is 0x6 or 0xF.
model += ((info0 >> 16) & 0xF) << 4; // Bits 16..19
}
}

Target::Processor get_amd_processor(unsigned family, unsigned model) {
switch (family) {
case 0x19:
// FIXME: do we need to check model number?
return Target::Processor::ZnVer3;
default:
break; // Unknown AMD CPU.
}

return Target::Processor::ProcessorGeneric;
}

#endif // defined(__x86_64__) || defined(__i386__) || defined(_MSC_VER)

Target calculate_host_target() {
Target::OS os = Target::OSUnknown;
#ifdef __linux__
Expand Down Expand Up @@ -111,8 +168,18 @@ Target calculate_host_target() {
#else
Target::Arch arch = Target::X86;

VendorSignatures vendor_signature = get_vendor_signature();

int info[4];
cpuid(info, 1, 0);

unsigned family = 0, model = 0;
detect_family_and_model(info[0], family, model);

if (vendor_signature == VendorSignatures::AUTHENTIC_AMD) {
processor = get_amd_processor(family, model);
}

bool have_sse41 = (info[2] & (1 << 19)) != 0;
bool have_sse2 = (info[3] & (1 << 26)) != 0;
bool have_avx = (info[2] & (1 << 28)) != 0;
Expand Down Expand Up @@ -165,19 +232,23 @@ Target calculate_host_target() {
}
if ((info2[1] & avx512) == avx512) {
initial_features.push_back(Target::AVX512);
// FIXME: port to family/model -based detection.
if ((info2[1] & avx512_knl) == avx512_knl) {
initial_features.push_back(Target::AVX512_KNL);
}
// FIXME: port to family/model -based detection.
if ((info2[1] & avx512_skylake) == avx512_skylake) {
initial_features.push_back(Target::AVX512_Skylake);
}
// FIXME: port to family/model -based detection.
if ((info2[1] & avx512_cannonlake) == avx512_cannonlake) {
initial_features.push_back(Target::AVX512_Cannonlake);

const uint32_t avx512vnni = 1U << 11; // vnni result in ecx
const uint32_t avx512bf16 = 1U << 5; // bf16 result in eax, with cpuid(eax=7, ecx=1)
int info3[4];
cpuid(info3, 7, 1);
// FIXME: port to family/model -based detection.
if ((info2[2] & avx512vnni) == avx512vnni &&
(info3[0] & avx512bf16) == avx512bf16) {
initial_features.push_back(Target::AVX512_SapphireRapids);
Expand Down

0 comments on commit c21c893

Please sign in to comment.