-
-
Notifications
You must be signed in to change notification settings - Fork 5.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Improve arch/cpu detection/selection on ARM and AArch64 #18100
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -87,6 +87,7 @@ | |
|
||
#if defined(_CPU_ARM_) || defined(_CPU_AARCH64_) | ||
# include <llvm/IR/InlineAsm.h> | ||
# include <sys/utsname.h> | ||
#endif | ||
#if defined(USE_POLLY) | ||
#include <polly/RegisterPasses.h> | ||
|
@@ -5541,10 +5542,68 @@ static void init_julia_llvm_env(Module *m) | |
addOptimizationPasses(jl_globalPM); | ||
} | ||
|
||
static inline std::string getNativeTarget() | ||
{ | ||
std::string cpu = sys::getHostCPUName(); | ||
#if defined(_CPU_ARM_) | ||
// Try slightly harder than LLVM at determine the CPU architecture. | ||
if (cpu == "generic") { | ||
// This is the most reliable way I can find | ||
// `/proc/cpuinfo` changes between kernel versions | ||
struct utsname name; | ||
if (uname(&name) >= 0) { | ||
// name.machine is the elf_platform in the kernel. | ||
if (strcmp(name.machine, "armv6l") == 0) { | ||
return "armv6"; | ||
} | ||
if (strcmp(name.machine, "armv7l") == 0) { | ||
return "armv7"; | ||
} | ||
if (strcmp(name.machine, "armv7ml") == 0) { | ||
// Thumb | ||
return "armv7-m"; | ||
} | ||
if (strcmp(name.machine, "armv8l") == 0 || | ||
strcmp(name.machine, "aarch64") == 0) { | ||
return "armv8"; | ||
} | ||
} | ||
} | ||
#endif | ||
return cpu; | ||
} | ||
|
||
#if defined(_CPU_ARM_) || defined(_CPU_AARCH64_) | ||
// Check if the cpu name is a ARM/AArch64 arch name and return a | ||
// string that can be used as LLVM feature name | ||
static inline std::string checkARMArchFeature(const std::string &cpu) | ||
{ | ||
const char *prefix = "armv"; | ||
size_t prefix_len = strlen(prefix); | ||
if (cpu.size() <= prefix_len || | ||
memcmp(cpu.data(), prefix, prefix_len) != 0 || | ||
cpu[prefix_len] < '1' || cpu[prefix_len] > '9') | ||
return std::string(); | ||
#if defined(_CPU_ARM_) | ||
// "v7" and "v8" are not available in the form of `armv*` | ||
// in the feature list | ||
if (cpu == "armv7") { | ||
return "v7"; | ||
} | ||
else if (cpu == "armv8") { | ||
return "v8"; | ||
} | ||
return cpu; | ||
#else | ||
return cpu.substr(3); | ||
#endif | ||
} | ||
#endif | ||
|
||
// Helper to figure out what features to set for the LLVM target | ||
// If the user specifies native (or does not specify) we default | ||
// using the API provided by LLVM | ||
static inline SmallVector<std::string,10> getTargetFeatures() | ||
static inline SmallVector<std::string,10> getTargetFeatures(std::string &cpu) | ||
{ | ||
StringMap<bool> HostFeatures; | ||
if (!strcmp(jl_options.cpu_target,"native")) { | ||
|
@@ -5573,16 +5632,63 @@ static inline SmallVector<std::string,10> getTargetFeatures() | |
#endif | ||
|
||
// Figure out if we know the cpu_target | ||
std::string cpu = strcmp(jl_options.cpu_target,"native") ? jl_options.cpu_target : sys::getHostCPUName(); | ||
if (cpu.empty() || cpu == "generic") { | ||
jl_printf(JL_STDERR, "WARNING: unable to determine host cpu name.\n"); | ||
#if defined(_CPU_ARM_) && defined(__ARM_PCS_VFP) | ||
// Check if this is required when you have read the features directly from the processor | ||
// This affects the platform calling convention. | ||
// TODO: enable vfp3 for ARMv7+ (but adapt the ABI) | ||
HostFeatures["vfp2"] = true; | ||
#endif | ||
cpu = (strcmp(jl_options.cpu_target,"native") ? jl_options.cpu_target : | ||
getNativeTarget()); | ||
#if defined(_CPU_ARM_) | ||
// Figure out what we are compiling against from the C defines. | ||
// This might affect ABI but is fine since | ||
// 1. We define the C ABI explicitly. | ||
// 2. This does not change when running the same binary on different | ||
// machines. | ||
// This shouldn't affect making generic binaries since that requires a | ||
// generic C -march anyway. | ||
HostFeatures["vfp2"] = true; | ||
|
||
// Arch version | ||
#if __ARM_ARCH >= 8 | ||
HostFeatures["v8"] = true; | ||
#elif __ARM_ARCH >= 7 | ||
HostFeatures["v7"] = true; | ||
#else | ||
// minimum requirement | ||
HostFeatures["v6"] = true; | ||
#endif | ||
|
||
// ARM profile | ||
// Only do this on ARM and not AArch64 since LLVM aarch64 backend | ||
// doesn't support setting profiles. | ||
// AFAIK there's currently no 64bit R and M profile either | ||
// (v8r and v8m are both 32bit) | ||
#if defined(__ARM_ARCH_PROFILE) | ||
# if __ARM_ARCH_PROFILE == 'A' | ||
HostFeatures["aclass"] = true; | ||
# elif __ARM_ARCH_PROFILE == 'R' | ||
HostFeatures["rclass"] = true; | ||
# elif __ARM_ARCH_PROFILE == 'M' | ||
// Thumb | ||
HostFeatures["mclass"] = true; | ||
# endif | ||
#endif | ||
#endif // _CPU_ARM_ | ||
|
||
// On ARM and AArch64, allow using cpu_target to specify a CPU architecture | ||
// which is specified in the feature set in LLVM. | ||
#if defined(_CPU_ARM_) || defined(_CPU_AARCH64_) | ||
// Supported ARM arch names on LLVM 3.8: | ||
// armv6, armv6-m, armv6j, armv6k, armv6kz, armv6s-m, armv6t2, | ||
// armv7, armv7-a, armv7-m, armv7-r, armv7e-m, armv7k, armv7s, | ||
// armv8, armv8-a, armv8.1-a, armv8.2-a | ||
// Additional ARM arch names on LLVM 3.9: | ||
// armv8-m.base, armv8-m.main | ||
// | ||
// Supported AArch64 arch names on LLVM 3.8: | ||
// armv8.1a, armv8.2a | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. it's too bad llvm felt ProcDesc needed to be private: http://llvm.org/docs/doxygen/html/MCSubtargetInfo_8h_source.html#l00033 or we could have auto-generated this list for the user on-demand and provided useful help messages. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The list is auto-generated with llc and is just listed here for better reference. We also support generating this list with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OTOH, not having access to this list at runtime (other than letting LLVM printing a help message) mean that some of the logic above has to be hard coded instead of going through a fallback list and printing the cpu/feature not-recognized warning once instead of once every codegen.... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I didn't realize that worked. In llvm 3.3, it also called exit, but now it continues, so I guess we could intentionally create a |
||
std::string arm_arch = checkARMArchFeature(cpu); | ||
if (!arm_arch.empty()) { | ||
HostFeatures[arm_arch] = true; | ||
cpu = "generic"; | ||
} | ||
#endif | ||
|
||
SmallVector<std::string,10> attr; | ||
for (StringMap<bool>::const_iterator it = HostFeatures.begin(); it != HostFeatures.end(); it++) { | ||
|
@@ -5699,8 +5805,8 @@ extern "C" void jl_init_codegen(void) | |
TheTriple.setEnvironment(Triple::ELF); | ||
#endif | ||
#endif | ||
std::string TheCPU = strcmp(jl_options.cpu_target,"native") ? jl_options.cpu_target : sys::getHostCPUName(); | ||
SmallVector<std::string, 10> targetFeatures = getTargetFeatures( ); | ||
std::string TheCPU; | ||
SmallVector<std::string, 10> targetFeatures = getTargetFeatures(TheCPU); | ||
jl_TargetMachine = eb.selectTarget( | ||
TheTriple, | ||
"", | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
should we suggest this to llvm too?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Potentially by including this in the feature detection function. The LLVM cpu/feature separation seems to be inconsistent with other targets since the cpu list doesn't include any generic target (other than the "generic" target itself). This is possibly related to the difficulty to determine the base instruction set. I'll probably open an LLVM issue asking about this.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
https://llvm.org/bugs/show_bug.cgi?id=29030