Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add feature detection for ARM/MacOS #41924

Merged
merged 22 commits into from
Feb 15, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 59 additions & 19 deletions src/processor_arm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
# undef USE_DYN_GETAUXVAL
# include <sys/auxv.h>
# endif
#elif defined _CPU_AARCH64_ && defined _OS_DARWIN_
#include <sys/sysctl.h>
#include <string.h>
#endif

namespace ARM {
Expand Down Expand Up @@ -160,6 +163,8 @@ enum class CPU : uint32_t {
apple_a11,
apple_a12,
apple_a13,
apple_a14,
apple_m1,
apple_s4,
apple_s5,

Expand Down Expand Up @@ -240,6 +245,7 @@ constexpr auto armv8_3a_crypto = armv8_3a | get_feature_masks(aes, sha2);
constexpr auto armv8_4a = armv8_3a | get_feature_masks(v8_4a, dit, rcpc_immo, flagm);
constexpr auto armv8_4a_crypto = armv8_4a | get_feature_masks(aes, sha2);
constexpr auto armv8_5a = armv8_4a | get_feature_masks(v8_5a, sb, ccdp, altnzcv, fptoint);
constexpr auto armv8_5a_crypto = armv8_5a | get_feature_masks(aes, sha2);
constexpr auto armv8_6a = armv8_5a | get_feature_masks(v8_6a, i8mm, bf16);

// For ARM cores, the features required can be found in the technical reference manual
Expand Down Expand Up @@ -342,6 +348,10 @@ constexpr auto apple_a10 = armv8a_crc_crypto | get_feature_masks(rdm);
constexpr auto apple_a11 = armv8_2a_crypto | get_feature_masks(fullfp16);
constexpr auto apple_a12 = armv8_3a_crypto | get_feature_masks(fullfp16);
constexpr auto apple_a13 = armv8_4a_crypto | get_feature_masks(fp16fml, fullfp16, sha3);
constexpr auto apple_a14 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3);
constexpr auto apple_m1 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3);
// Features based on https://github.com/llvm/llvm-project/blob/82507f1798768280cf5d5aab95caaafbc7fe6f47/llvm/include/llvm/Support/AArch64TargetParser.def
// and sysctl -a hw.optional
constexpr auto apple_s4 = apple_a12;
constexpr auto apple_s5 = apple_a12;

Expand Down Expand Up @@ -420,6 +430,8 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
{"apple-a11", CPU::apple_a11, CPU::generic, 100000, Feature::apple_a11},
{"apple-a12", CPU::apple_a12, CPU::generic, 100000, Feature::apple_a12},
{"apple-a13", CPU::apple_a13, CPU::generic, 100000, Feature::apple_a13},
{"apple-a14", CPU::apple_a14, CPU::apple_a13, 120000, Feature::apple_a14},
{"apple-m1", CPU::apple_m1, CPU::apple_a14, 130000, Feature::apple_m1},
{"apple-s4", CPU::apple_s4, CPU::generic, 100000, Feature::apple_s4},
{"apple-s5", CPU::apple_s5, CPU::generic, 100000, Feature::apple_s5},
{"thunderx3t110", CPU::marvell_thunderx3t110, CPU::cavium_thunderx2t99, 110000,
Expand Down Expand Up @@ -662,13 +674,47 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = {
{"exynos-m2", CPU::samsung_exynos_m2, CPU::generic, UINT32_MAX, Feature::samsung_exynos_m2},
{"exynos-m3", CPU::samsung_exynos_m3, CPU::generic, 0, Feature::samsung_exynos_m3},
{"exynos-m4", CPU::samsung_exynos_m4, CPU::generic, 0, Feature::samsung_exynos_m4},
{"exynos-m5", CPU::samsung_exynos_m5, CPU::samsung_exynos_m4, 110000,
Feature::samsung_exynos_m5},
{"exynos-m5", CPU::samsung_exynos_m5, CPU::samsung_exynos_m4, 110000, Feature::samsung_exynos_m5},
{"apple-a7", CPU::apple_a7, CPU::generic, 0, Feature::apple_a7},
};
#endif
static constexpr size_t ncpu_names = sizeof(cpus) / sizeof(cpus[0]);

static inline const CPUSpec<CPU,feature_sz> *find_cpu(uint32_t cpu)
{
return ::find_cpu(cpu, cpus, ncpu_names);
}

static inline const CPUSpec<CPU,feature_sz> *find_cpu(llvm::StringRef name)
{
return ::find_cpu(name, cpus, ncpu_names);
}

static inline const char *find_cpu_name(uint32_t cpu)
{
return ::find_cpu_name(cpu, cpus, ncpu_names);
}

#if defined _CPU_AARCH64_ && defined _OS_DARWIN_

static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
{
char buffer[128];
size_t bufferlen = 128;
sysctlbyname("machdep.cpu.brand_string",&buffer,&bufferlen,NULL,0);
Copy link
Contributor

@yuyichao yuyichao Dec 5, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a good function to keep for future reference, but the fallback should be CPU::apple_m1. As the code is currently written, the next generation of chip will be detected as generic and I don't think that's desired. The linux version gets around this as much as possible by doing a full feature detection (so the only thing missing would be scheduling model that we can't do that much about...) but there's nothing like that here. I highly doubt apple will release a new processor for mac that has fewer userspace CPU features than M1 so it should be safe to assume so. And it seems to be what other projects assumes as well.


if(strcmp(buffer,"Apple M1") == 0)
return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
else if(strcmp(buffer,"Apple M1 Max") == 0)
return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
else if(strcmp(buffer,"Apple M1 Pro") == 0)
return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
else
return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1);
Comment on lines +706 to +713
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm, while I appreciate the intent to keep everything explicit, I wonder whether the repetition doesn't just add more clutter than necessary for now. Also, note that Apple themselves recommend parsing hw.optional to detect CPU features, although I suppose the number of desktop(-ish) Apple CPUs will remain managable for a number of years to come.

Copy link
Member Author

@gbaraldi gbaraldi Jan 28, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Parsing it is probably more complicated than just hardcoding things IMO, specially because the effort to add new features to an eventual parser is probably the same as just adding a new CPU to the list.

}

#else

// auxval reader

#ifndef AT_HWCAP
Expand Down Expand Up @@ -974,7 +1020,7 @@ static CPU get_cpu_name(CPUID cpuid)
default: return CPU::generic;
}
case 0x61: // 'a': Apple
// https://opensource.apple.com/source/xnu/xnu-6153.81.5/osfmk/arm/cpuid.h.auto.html
// https://opensource.apple.com/source/xnu/xnu-7195.141.2/osfmk/arm/cpuid.h.auto.html
switch (cpuid.part) {
case 0x0: // Swift
return CPU::apple_swift;
Expand Down Expand Up @@ -1002,6 +1048,12 @@ static CPU get_cpu_name(CPUID cpuid)
case 0x12: // Lightning
case 0x13: // Thunder
return CPU::apple_a13;
case 0x20: // Icestorm
case 0x21: // Firestorm
return CPU::apple_a14;
case 0x22: // Icestorm m1
case 0x23: // Firestorm m1
return CPU::apple_m1;
default: return CPU::generic;
}
case 0x68: // 'h': Huaxintong Semiconductor
Expand All @@ -1019,6 +1071,9 @@ static CPU get_cpu_name(CPUID cpuid)
}
}




namespace {

struct arm_arch {
Expand Down Expand Up @@ -1062,21 +1117,6 @@ static arm_arch get_elf_arch(void)
#endif
}

static inline const CPUSpec<CPU,feature_sz> *find_cpu(uint32_t cpu)
{
return ::find_cpu(cpu, cpus, ncpu_names);
}

static inline const CPUSpec<CPU,feature_sz> *find_cpu(llvm::StringRef name)
{
return ::find_cpu(name, cpus, ncpu_names);
}

static inline const char *find_cpu_name(uint32_t cpu)
{
return ::find_cpu_name(cpu, cpus, ncpu_names);
}

static arm_arch feature_arch_version(const FeatureList<feature_sz> &feature)
{
#ifdef _CPU_AARCH64_
Expand Down Expand Up @@ -1303,9 +1343,9 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu()
}
// Ignore feature bits that we are not interested in.
mask_features(feature_masks, &features[0]);

return std::make_pair(cpu, features);
}
#endif

static inline const std::pair<uint32_t,FeatureList<feature_sz>> &get_host_cpu()
{
Expand Down