-
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add feature detection for ARM/MacOS #41924
Changes from all commits
223ae7e
5f731c5
5c4e7f4
0b165f0
efe722c
8295ffb
399d22c
2fddfba
ed30203
9e3ffe8
d96e2ed
a3f5904
4ecb98d
498bac7
b44c8e2
6271618
aec01a6
98c9fa6
b41771d
ee77b8a
0053128
71d21f4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,9 @@ | |
# undef USE_DYN_GETAUXVAL | ||
# include <sys/auxv.h> | ||
# endif | ||
#elif defined _CPU_AARCH64_ && defined _OS_DARWIN_ | ||
#include <sys/sysctl.h> | ||
#include <string.h> | ||
#endif | ||
|
||
namespace ARM { | ||
|
@@ -160,6 +163,8 @@ enum class CPU : uint32_t { | |
apple_a11, | ||
apple_a12, | ||
apple_a13, | ||
apple_a14, | ||
apple_m1, | ||
apple_s4, | ||
apple_s5, | ||
|
||
|
@@ -240,6 +245,7 @@ constexpr auto armv8_3a_crypto = armv8_3a | get_feature_masks(aes, sha2); | |
constexpr auto armv8_4a = armv8_3a | get_feature_masks(v8_4a, dit, rcpc_immo, flagm); | ||
constexpr auto armv8_4a_crypto = armv8_4a | get_feature_masks(aes, sha2); | ||
constexpr auto armv8_5a = armv8_4a | get_feature_masks(v8_5a, sb, ccdp, altnzcv, fptoint); | ||
constexpr auto armv8_5a_crypto = armv8_5a | get_feature_masks(aes, sha2); | ||
constexpr auto armv8_6a = armv8_5a | get_feature_masks(v8_6a, i8mm, bf16); | ||
|
||
// For ARM cores, the features required can be found in the technical reference manual | ||
|
@@ -342,6 +348,10 @@ constexpr auto apple_a10 = armv8a_crc_crypto | get_feature_masks(rdm); | |
constexpr auto apple_a11 = armv8_2a_crypto | get_feature_masks(fullfp16); | ||
constexpr auto apple_a12 = armv8_3a_crypto | get_feature_masks(fullfp16); | ||
constexpr auto apple_a13 = armv8_4a_crypto | get_feature_masks(fp16fml, fullfp16, sha3); | ||
constexpr auto apple_a14 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3); | ||
constexpr auto apple_m1 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3); | ||
// Features based on https://github.com/llvm/llvm-project/blob/82507f1798768280cf5d5aab95caaafbc7fe6f47/llvm/include/llvm/Support/AArch64TargetParser.def | ||
// and sysctl -a hw.optional | ||
constexpr auto apple_s4 = apple_a12; | ||
constexpr auto apple_s5 = apple_a12; | ||
|
||
|
@@ -420,6 +430,8 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = { | |
{"apple-a11", CPU::apple_a11, CPU::generic, 100000, Feature::apple_a11}, | ||
{"apple-a12", CPU::apple_a12, CPU::generic, 100000, Feature::apple_a12}, | ||
{"apple-a13", CPU::apple_a13, CPU::generic, 100000, Feature::apple_a13}, | ||
{"apple-a14", CPU::apple_a14, CPU::apple_a13, 120000, Feature::apple_a14}, | ||
{"apple-m1", CPU::apple_m1, CPU::apple_a14, 130000, Feature::apple_m1}, | ||
{"apple-s4", CPU::apple_s4, CPU::generic, 100000, Feature::apple_s4}, | ||
{"apple-s5", CPU::apple_s5, CPU::generic, 100000, Feature::apple_s5}, | ||
{"thunderx3t110", CPU::marvell_thunderx3t110, CPU::cavium_thunderx2t99, 110000, | ||
|
@@ -662,13 +674,47 @@ static constexpr CPUSpec<CPU, feature_sz> cpus[] = { | |
{"exynos-m2", CPU::samsung_exynos_m2, CPU::generic, UINT32_MAX, Feature::samsung_exynos_m2}, | ||
{"exynos-m3", CPU::samsung_exynos_m3, CPU::generic, 0, Feature::samsung_exynos_m3}, | ||
{"exynos-m4", CPU::samsung_exynos_m4, CPU::generic, 0, Feature::samsung_exynos_m4}, | ||
{"exynos-m5", CPU::samsung_exynos_m5, CPU::samsung_exynos_m4, 110000, | ||
Feature::samsung_exynos_m5}, | ||
{"exynos-m5", CPU::samsung_exynos_m5, CPU::samsung_exynos_m4, 110000, Feature::samsung_exynos_m5}, | ||
{"apple-a7", CPU::apple_a7, CPU::generic, 0, Feature::apple_a7}, | ||
}; | ||
#endif | ||
static constexpr size_t ncpu_names = sizeof(cpus) / sizeof(cpus[0]); | ||
|
||
static inline const CPUSpec<CPU,feature_sz> *find_cpu(uint32_t cpu) | ||
{ | ||
return ::find_cpu(cpu, cpus, ncpu_names); | ||
} | ||
|
||
static inline const CPUSpec<CPU,feature_sz> *find_cpu(llvm::StringRef name) | ||
{ | ||
return ::find_cpu(name, cpus, ncpu_names); | ||
} | ||
|
||
static inline const char *find_cpu_name(uint32_t cpu) | ||
{ | ||
return ::find_cpu_name(cpu, cpus, ncpu_names); | ||
} | ||
|
||
#if defined _CPU_AARCH64_ && defined _OS_DARWIN_ | ||
|
||
static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu() | ||
{ | ||
char buffer[128]; | ||
size_t bufferlen = 128; | ||
sysctlbyname("machdep.cpu.brand_string",&buffer,&bufferlen,NULL,0); | ||
|
||
if(strcmp(buffer,"Apple M1") == 0) | ||
vchuravy marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1); | ||
else if(strcmp(buffer,"Apple M1 Max") == 0) | ||
return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1); | ||
else if(strcmp(buffer,"Apple M1 Pro") == 0) | ||
return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1); | ||
else | ||
return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1); | ||
Comment on lines
+706
to
+713
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm, while I appreciate the intent to keep everything explicit, I wonder whether the repetition doesn't just add more clutter than necessary for now. Also, note that Apple themselves recommend parsing There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Parsing it is probably more complicated than just hardcoding things IMO, specially because the effort to add new features to an eventual parser is probably the same as just adding a new CPU to the list. |
||
} | ||
|
||
#else | ||
|
||
// auxval reader | ||
|
||
#ifndef AT_HWCAP | ||
|
@@ -974,7 +1020,7 @@ static CPU get_cpu_name(CPUID cpuid) | |
default: return CPU::generic; | ||
} | ||
case 0x61: // 'a': Apple | ||
// https://opensource.apple.com/source/xnu/xnu-6153.81.5/osfmk/arm/cpuid.h.auto.html | ||
// https://opensource.apple.com/source/xnu/xnu-7195.141.2/osfmk/arm/cpuid.h.auto.html | ||
switch (cpuid.part) { | ||
case 0x0: // Swift | ||
return CPU::apple_swift; | ||
|
@@ -1002,6 +1048,12 @@ static CPU get_cpu_name(CPUID cpuid) | |
case 0x12: // Lightning | ||
case 0x13: // Thunder | ||
return CPU::apple_a13; | ||
case 0x20: // Icestorm | ||
case 0x21: // Firestorm | ||
return CPU::apple_a14; | ||
case 0x22: // Icestorm m1 | ||
case 0x23: // Firestorm m1 | ||
return CPU::apple_m1; | ||
default: return CPU::generic; | ||
} | ||
case 0x68: // 'h': Huaxintong Semiconductor | ||
|
@@ -1019,6 +1071,9 @@ static CPU get_cpu_name(CPUID cpuid) | |
} | ||
} | ||
|
||
|
||
|
||
|
||
namespace { | ||
|
||
struct arm_arch { | ||
|
@@ -1062,21 +1117,6 @@ static arm_arch get_elf_arch(void) | |
#endif | ||
} | ||
|
||
static inline const CPUSpec<CPU,feature_sz> *find_cpu(uint32_t cpu) | ||
{ | ||
return ::find_cpu(cpu, cpus, ncpu_names); | ||
} | ||
|
||
static inline const CPUSpec<CPU,feature_sz> *find_cpu(llvm::StringRef name) | ||
{ | ||
return ::find_cpu(name, cpus, ncpu_names); | ||
} | ||
|
||
static inline const char *find_cpu_name(uint32_t cpu) | ||
{ | ||
return ::find_cpu_name(cpu, cpus, ncpu_names); | ||
} | ||
|
||
static arm_arch feature_arch_version(const FeatureList<feature_sz> &feature) | ||
{ | ||
#ifdef _CPU_AARCH64_ | ||
|
@@ -1303,9 +1343,9 @@ static NOINLINE std::pair<uint32_t,FeatureList<feature_sz>> _get_host_cpu() | |
} | ||
// Ignore feature bits that we are not interested in. | ||
mask_features(feature_masks, &features[0]); | ||
|
||
return std::make_pair(cpu, features); | ||
} | ||
#endif | ||
|
||
static inline const std::pair<uint32_t,FeatureList<feature_sz>> &get_host_cpu() | ||
{ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a good function to keep for future reference, but the fallback should be
CPU::apple_m1
. As the code is currently written, the next generation of chip will be detected as generic and I don't think that's desired. The linux version gets around this as much as possible by doing a full feature detection (so the only thing missing would be scheduling model that we can't do that much about...) but there's nothing like that here. I highly doubt apple will release a new processor for mac that has fewer userspace CPU features than M1 so it should be safe to assume so. And it seems to be what other projects assumes as well.