From 223ae7efca8d0853850b094eb1b30dc386fd128d Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Wed, 18 Aug 2021 19:26:59 -0300 Subject: [PATCH 01/16] Add feature detection for ARM/MacOS --- .../md5 | 1 + .../sha512 | 1 + src/processor_arm.cpp | 28 +++++++++++++++++-- 3 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 deps/checksums/SuiteSparse.v5.8.1+2.aarch64-apple-darwin.tar.gz/md5 create mode 100644 deps/checksums/SuiteSparse.v5.8.1+2.aarch64-apple-darwin.tar.gz/sha512 diff --git a/deps/checksums/SuiteSparse.v5.8.1+2.aarch64-apple-darwin.tar.gz/md5 b/deps/checksums/SuiteSparse.v5.8.1+2.aarch64-apple-darwin.tar.gz/md5 new file mode 100644 index 0000000000000..1adeeda0d1f23 --- /dev/null +++ b/deps/checksums/SuiteSparse.v5.8.1+2.aarch64-apple-darwin.tar.gz/md5 @@ -0,0 +1 @@ +949a6cd6ac04e41f89cef401d7e5ca74 diff --git a/deps/checksums/SuiteSparse.v5.8.1+2.aarch64-apple-darwin.tar.gz/sha512 b/deps/checksums/SuiteSparse.v5.8.1+2.aarch64-apple-darwin.tar.gz/sha512 new file mode 100644 index 0000000000000..9a2bec6f14a58 --- /dev/null +++ b/deps/checksums/SuiteSparse.v5.8.1+2.aarch64-apple-darwin.tar.gz/sha512 @@ -0,0 +1 @@ +60612bd0bfeb1e00a452aea8024454110838eb20889be3f82fa6ee25884f791d7fd21c156aaca68e94561f084ab83b44f71c7740486d8233c5451ff267e85d9a diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp index a411314e34e9d..2b5694281bb38 100644 --- a/src/processor_arm.cpp +++ b/src/processor_arm.cpp @@ -160,6 +160,8 @@ enum class CPU : uint32_t { apple_a11, apple_a12, apple_a13, + apple_a14, + apple_m1, apple_s4, apple_s5, @@ -342,7 +344,10 @@ constexpr auto apple_a10 = armv8a_crc_crypto | get_feature_masks(rdm); constexpr auto apple_a11 = armv8_2a_crypto | get_feature_masks(fullfp16); constexpr auto apple_a12 = armv8_3a_crypto | get_feature_masks(fullfp16); constexpr auto apple_a13 = armv8_4a_crypto | get_feature_masks(fp16fml, fullfp16, sha3); -constexpr auto apple_s4 = apple_a12; +constexpr auto apple_a14 = armv8_5a | get_feature_masks(dotprod,fp16fml, fullfp16, sha3); +constexpr auto apple_m1 = armv8_5a | get_feature_masks(dotprod,fp16fml, fullfp16, sha3); // Features based on https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/Support/AArch64TargetParser.def + // and sysctl -a hw.optional +constexpr auto apple_s4 = apple_a12; constexpr auto apple_s5 = apple_a12; } @@ -420,6 +425,8 @@ static constexpr CPUSpec cpus[] = { {"apple-a11", CPU::apple_a11, CPU::generic, 100000, Feature::apple_a11}, {"apple-a12", CPU::apple_a12, CPU::generic, 100000, Feature::apple_a12}, {"apple-a13", CPU::apple_a13, CPU::generic, 100000, Feature::apple_a13}, + {"apple-a14", CPU::apple_a14, CPU::generic, 100000, Feature::apple_a14}, + {"apple-m1", CPU::apple_m1, CPU::generic, 100000, Feature::apple_m1}, {"apple-s4", CPU::apple_s4, CPU::generic, 100000, Feature::apple_s4}, {"apple-s5", CPU::apple_s5, CPU::generic, 100000, Feature::apple_s5}, {"thunderx3t110", CPU::marvell_thunderx3t110, CPU::cavium_thunderx2t99, 110000, @@ -1002,6 +1009,10 @@ static CPU get_cpu_name(CPUID cpuid) case 0x12: // Lightning case 0x13: // Thunder return CPU::apple_a13; + case 0x20: // Icestorm + case 0x21: // Firestorm + return CPU::apple_a14; + // return CPU::apple_m1; //LLVM doesn't have support for this name yet default: return CPU::generic; } case 0x68: // 'h': Huaxintong Semiconductor @@ -1187,6 +1198,18 @@ static NOINLINE std::pair> _get_host_cpu() // Here we assume that only the lower 32bit are used on aarch64 // Change the cast here when that's not the case anymore (and when there's features in the // high bits that we want to detect). +#ifdef _CPU_AARCH64_ && _OS_DARWIN_ + CPUID info = { + uint8_t(0x61), + uint8_t(0), + uint16_t(0x21) + }; // Hardcoded Firestorm core data based on https://opensource.apple.com/source/xnu/xnu-7195.141.2/osfmk/arm/cpuid.h.auto.html + std::vector> list; + auto name = (uint32_t)get_cpu_name(info); + auto arch = get_elf_arch(); + features = find_cpu(name)->features; + list.emplace_back(name, info); +#else features[0] = (uint32_t)jl_getauxval(AT_HWCAP); features[1] = (uint32_t)jl_getauxval(AT_HWCAP2); #ifdef _CPU_AARCH64_ @@ -1290,6 +1313,7 @@ static NOINLINE std::pair> _get_host_cpu() CPU::arm_cortex_a17 }; shrink_big_little(list, v7order, sizeof(v7order) / sizeof(CPU)); +#endif #endif uint32_t cpu = 0; if (list.empty()) { @@ -1301,9 +1325,9 @@ static NOINLINE std::pair> _get_host_cpu() // one... cpu = list[0].first; } + // Ignore feature bits that we are not interested in. mask_features(feature_masks, &features[0]); - return std::make_pair(cpu, features); } From 5f731c57c567734e99bf49686a3cdb1a4701b6a7 Mon Sep 17 00:00:00 2001 From: gbaraldi Date: Wed, 18 Aug 2021 19:57:51 -0300 Subject: [PATCH 02/16] Delete md5 --- .../SuiteSparse.v5.8.1+2.aarch64-apple-darwin.tar.gz/md5 | 1 - 1 file changed, 1 deletion(-) delete mode 100644 deps/checksums/SuiteSparse.v5.8.1+2.aarch64-apple-darwin.tar.gz/md5 diff --git a/deps/checksums/SuiteSparse.v5.8.1+2.aarch64-apple-darwin.tar.gz/md5 b/deps/checksums/SuiteSparse.v5.8.1+2.aarch64-apple-darwin.tar.gz/md5 deleted file mode 100644 index 1adeeda0d1f23..0000000000000 --- a/deps/checksums/SuiteSparse.v5.8.1+2.aarch64-apple-darwin.tar.gz/md5 +++ /dev/null @@ -1 +0,0 @@ -949a6cd6ac04e41f89cef401d7e5ca74 From 5c4e7f46b9c8d668551680b89e4c8c95753cf26e Mon Sep 17 00:00:00 2001 From: gbaraldi Date: Wed, 18 Aug 2021 19:58:05 -0300 Subject: [PATCH 03/16] Delete sha512 --- .../SuiteSparse.v5.8.1+2.aarch64-apple-darwin.tar.gz/sha512 | 1 - 1 file changed, 1 deletion(-) delete mode 100644 deps/checksums/SuiteSparse.v5.8.1+2.aarch64-apple-darwin.tar.gz/sha512 diff --git a/deps/checksums/SuiteSparse.v5.8.1+2.aarch64-apple-darwin.tar.gz/sha512 b/deps/checksums/SuiteSparse.v5.8.1+2.aarch64-apple-darwin.tar.gz/sha512 deleted file mode 100644 index 9a2bec6f14a58..0000000000000 --- a/deps/checksums/SuiteSparse.v5.8.1+2.aarch64-apple-darwin.tar.gz/sha512 +++ /dev/null @@ -1 +0,0 @@ -60612bd0bfeb1e00a452aea8024454110838eb20889be3f82fa6ee25884f791d7fd21c156aaca68e94561f084ab83b44f71c7740486d8233c5451ff267e85d9a From 0b165f0d793f95b3653964f771f68f7d8e40a05f Mon Sep 17 00:00:00 2001 From: gbaraldi Date: Wed, 18 Aug 2021 19:59:40 -0300 Subject: [PATCH 04/16] Change to Permalink --- src/processor_arm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp index 2b5694281bb38..a16cd7849d6d5 100644 --- a/src/processor_arm.cpp +++ b/src/processor_arm.cpp @@ -345,7 +345,7 @@ constexpr auto apple_a11 = armv8_2a_crypto | get_feature_masks(fullfp16); constexpr auto apple_a12 = armv8_3a_crypto | get_feature_masks(fullfp16); constexpr auto apple_a13 = armv8_4a_crypto | get_feature_masks(fp16fml, fullfp16, sha3); constexpr auto apple_a14 = armv8_5a | get_feature_masks(dotprod,fp16fml, fullfp16, sha3); -constexpr auto apple_m1 = armv8_5a | get_feature_masks(dotprod,fp16fml, fullfp16, sha3); // Features based on https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/Support/AArch64TargetParser.def +constexpr auto apple_m1 = armv8_5a | get_feature_masks(dotprod,fp16fml, fullfp16, sha3); // Features based on https://github.com/llvm/llvm-project/blob/82507f1798768280cf5d5aab95caaafbc7fe6f47/llvm/include/llvm/Support/AArch64TargetParser.def // and sysctl -a hw.optional constexpr auto apple_s4 = apple_a12; constexpr auto apple_s5 = apple_a12; From efe722c327f2524504da477c62ad4e266120a4b5 Mon Sep 17 00:00:00 2001 From: gbaraldi Date: Wed, 18 Aug 2021 19:59:40 -0300 Subject: [PATCH 05/16] Change to Permalink --- src/processor_arm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp index 2b5694281bb38..e29549471a5ad 100644 --- a/src/processor_arm.cpp +++ b/src/processor_arm.cpp @@ -345,7 +345,7 @@ constexpr auto apple_a11 = armv8_2a_crypto | get_feature_masks(fullfp16); constexpr auto apple_a12 = armv8_3a_crypto | get_feature_masks(fullfp16); constexpr auto apple_a13 = armv8_4a_crypto | get_feature_masks(fp16fml, fullfp16, sha3); constexpr auto apple_a14 = armv8_5a | get_feature_masks(dotprod,fp16fml, fullfp16, sha3); -constexpr auto apple_m1 = armv8_5a | get_feature_masks(dotprod,fp16fml, fullfp16, sha3); // Features based on https://github.com/llvm/llvm-project/blob/main/llvm/include/llvm/Support/AArch64TargetParser.def +constexpr auto apple_m1 = armv8_5a | get_feature_masks(dotprod,fp16fml, fullfp16, sha3); // Features based on https://github.com/llvm/llvm-project/blob/82507f1798768280cf5d5aab95caaafbc7fe6f47/llvm/include/llvm/Support/AArch64TargetParser.def // and sysctl -a hw.optional constexpr auto apple_s4 = apple_a12; constexpr auto apple_s5 = apple_a12; From 8295ffb34c6108ed6b6e9d3b8efc45d910a99834 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Thu, 19 Aug 2021 09:04:00 -0300 Subject: [PATCH 06/16] Fixing whitespaces --- src/processor_arm.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp index e29549471a5ad..dd21d8d1b8bb2 100644 --- a/src/processor_arm.cpp +++ b/src/processor_arm.cpp @@ -345,9 +345,8 @@ constexpr auto apple_a11 = armv8_2a_crypto | get_feature_masks(fullfp16); constexpr auto apple_a12 = armv8_3a_crypto | get_feature_masks(fullfp16); constexpr auto apple_a13 = armv8_4a_crypto | get_feature_masks(fp16fml, fullfp16, sha3); constexpr auto apple_a14 = armv8_5a | get_feature_masks(dotprod,fp16fml, fullfp16, sha3); -constexpr auto apple_m1 = armv8_5a | get_feature_masks(dotprod,fp16fml, fullfp16, sha3); // Features based on https://github.com/llvm/llvm-project/blob/82507f1798768280cf5d5aab95caaafbc7fe6f47/llvm/include/llvm/Support/AArch64TargetParser.def - // and sysctl -a hw.optional -constexpr auto apple_s4 = apple_a12; +constexpr auto apple_m1 = armv8_5a | get_feature_masks(dotprod,fp16fml, fullfp16, sha3); // Features based on https://github.com/llvm/llvm-project/blob/82507f1798768280cf5d5aab95caaafbc7fe6f47/llvm/include/llvm/Support/AArch64TargetParser.def and sysctl -a hw.optional +constexpr auto apple_s4 = apple_a12; constexpr auto apple_s5 = apple_a12; } @@ -1325,7 +1324,6 @@ static NOINLINE std::pair> _get_host_cpu() // one... cpu = list[0].first; } - // Ignore feature bits that we are not interested in. mask_features(feature_masks, &features[0]); return std::make_pair(cpu, features); From d96e2edd4836771664fd6d2a0bfd45ad0e76a906 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Sat, 4 Dec 2021 13:00:04 -0300 Subject: [PATCH 07/16] Add review changes --- src/processor_arm.cpp | 42 +++++++++++++++++++++++++++++++----------- 1 file changed, 31 insertions(+), 11 deletions(-) diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp index 43183bbec015c..2476448c2a338 100644 --- a/src/processor_arm.cpp +++ b/src/processor_arm.cpp @@ -242,6 +242,7 @@ constexpr auto armv8_3a_crypto = armv8_3a | get_feature_masks(aes, sha2); constexpr auto armv8_4a = armv8_3a | get_feature_masks(v8_4a, dit, rcpc_immo, flagm); constexpr auto armv8_4a_crypto = armv8_4a | get_feature_masks(aes, sha2); constexpr auto armv8_5a = armv8_4a | get_feature_masks(v8_5a, sb, ccdp, altnzcv, fptoint); +constexpr auto armv8_5a_crypto = armv8_5a | get_feature_masks(aes, sha2); constexpr auto armv8_6a = armv8_5a | get_feature_masks(v8_6a, i8mm, bf16); // For ARM cores, the features required can be found in the technical reference manual @@ -344,8 +345,8 @@ constexpr auto apple_a10 = armv8a_crc_crypto | get_feature_masks(rdm); constexpr auto apple_a11 = armv8_2a_crypto | get_feature_masks(fullfp16); constexpr auto apple_a12 = armv8_3a_crypto | get_feature_masks(fullfp16); constexpr auto apple_a13 = armv8_4a_crypto | get_feature_masks(fp16fml, fullfp16, sha3); -constexpr auto apple_a14 = armv8_5a | get_feature_masks(dotprod,fp16fml, fullfp16, sha3); -constexpr auto apple_m1 = armv8_5a | get_feature_masks(dotprod,fp16fml, fullfp16, sha3); +constexpr auto apple_a14 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3); +constexpr auto apple_m1 = armv8_5a_crypto | get_feature_masks(dotprod,fp16fml, fullfp16, sha3); // Features based on https://github.com/llvm/llvm-project/blob/82507f1798768280cf5d5aab95caaafbc7fe6f47/llvm/include/llvm/Support/AArch64TargetParser.def // and sysctl -a hw.optional constexpr auto apple_s4 = apple_a12; @@ -426,8 +427,8 @@ static constexpr CPUSpec cpus[] = { {"apple-a11", CPU::apple_a11, CPU::generic, 100000, Feature::apple_a11}, {"apple-a12", CPU::apple_a12, CPU::generic, 100000, Feature::apple_a12}, {"apple-a13", CPU::apple_a13, CPU::generic, 100000, Feature::apple_a13}, - {"apple-a14", CPU::apple_a14, CPU::generic, 100000, Feature::apple_a14}, - {"apple-m1", CPU::apple_m1, CPU::generic, 100000, Feature::apple_m1}, + {"apple-a14", CPU::apple_a14, CPU::apple_a13, 100000, Feature::apple_a14}, + {"apple-m1", CPU::apple_m1, CPU::apple_a14, 100000, Feature::apple_m1}, {"apple-s4", CPU::apple_s4, CPU::generic, 100000, Feature::apple_s4}, {"apple-s5", CPU::apple_s5, CPU::generic, 100000, Feature::apple_s5}, {"thunderx3t110", CPU::marvell_thunderx3t110, CPU::cavium_thunderx2t99, 110000, @@ -1013,7 +1014,9 @@ static CPU get_cpu_name(CPUID cpuid) case 0x20: // Icestorm case 0x21: // Firestorm return CPU::apple_a14; - // return CPU::apple_m1; //LLVM doesn't have support for this name yet + case 0x22: // Icestorm m1 + case 0x23: // Firestorm m1 + return CPU::apple_m1; default: return CPU::generic; } case 0x68: // 'h': Huaxintong Semiconductor @@ -1193,24 +1196,41 @@ static void shrink_big_little(std::vector> &list, } } +#ifdef _CPU_AARCH64_ && _OS_DARWIN_ static NOINLINE std::pair> _get_host_cpu() { FeatureList features = {}; - // Here we assume that only the lower 32bit are used on aarch64 - // Change the cast here when that's not the case anymore (and when there's features in the - // high bits that we want to detect). -#ifdef _CPU_AARCH64_ && _OS_DARWIN_ CPUID info = { uint8_t(0x61), uint8_t(0), - uint16_t(0x21) + uint16_t(0x23) }; // Hardcoded Firestorm core data based on https://opensource.apple.com/source/xnu/xnu-7195.141.2/osfmk/arm/cpuid.h.auto.html std::vector> list; auto name = (uint32_t)get_cpu_name(info); auto arch = get_elf_arch(); features = find_cpu(name)->features; list.emplace_back(name, info); + uint32_t cpu = 0; + if (list.empty()) { + cpu = (uint32_t)generic_for_arch(arch); + } + else { + // This also covers `list.size() > 1` case which means there's a unknown combination + // consists of CPU's we know. Unclear what else we could try so just randomly return + // one... + cpu = list[0].first; + } + // Ignore feature bits that we are not interested in. + mask_features(feature_masks, &features[0]); + return std::make_pair(cpu, features); +} #else +static NOINLINE std::pair> _get_host_cpu() +{ + FeatureList features = {}; + // Here we assume that only the lower 32bit are used on aarch64 + // Change the cast here when that's not the case anymore (and when there's features in the + // high bits that we want to detect). features[0] = (uint32_t)jl_getauxval(AT_HWCAP); features[1] = (uint32_t)jl_getauxval(AT_HWCAP2); #ifdef _CPU_AARCH64_ @@ -1314,7 +1334,6 @@ static NOINLINE std::pair> _get_host_cpu() CPU::arm_cortex_a17 }; shrink_big_little(list, v7order, sizeof(v7order) / sizeof(CPU)); -#endif #endif uint32_t cpu = 0; if (list.empty()) { @@ -1330,6 +1349,7 @@ static NOINLINE std::pair> _get_host_cpu() mask_features(feature_masks, &features[0]); return std::make_pair(cpu, features); } +#endif static inline const std::pair> &get_host_cpu() { From a3f5904e9d5f8c9590d2967daa9bbf242166dbb9 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Sat, 4 Dec 2021 13:01:45 -0300 Subject: [PATCH 08/16] Fix LLVM versions --- src/processor_arm.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp index 2476448c2a338..4d48455ecbefb 100644 --- a/src/processor_arm.cpp +++ b/src/processor_arm.cpp @@ -427,8 +427,8 @@ static constexpr CPUSpec cpus[] = { {"apple-a11", CPU::apple_a11, CPU::generic, 100000, Feature::apple_a11}, {"apple-a12", CPU::apple_a12, CPU::generic, 100000, Feature::apple_a12}, {"apple-a13", CPU::apple_a13, CPU::generic, 100000, Feature::apple_a13}, - {"apple-a14", CPU::apple_a14, CPU::apple_a13, 100000, Feature::apple_a14}, - {"apple-m1", CPU::apple_m1, CPU::apple_a14, 100000, Feature::apple_m1}, + {"apple-a14", CPU::apple_a14, CPU::apple_a13, 120000, Feature::apple_a14}, + {"apple-m1", CPU::apple_m1, CPU::apple_a14, 130000, Feature::apple_m1}, {"apple-s4", CPU::apple_s4, CPU::generic, 100000, Feature::apple_s4}, {"apple-s5", CPU::apple_s5, CPU::generic, 100000, Feature::apple_s5}, {"thunderx3t110", CPU::marvell_thunderx3t110, CPU::cavium_thunderx2t99, 110000, From 4ecb98d528ed9dbb2e7467da1db1179e20a54c59 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Sat, 4 Dec 2021 13:03:11 -0300 Subject: [PATCH 09/16] Whistespacing fun --- src/processor_arm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp index 4d48455ecbefb..1c2d5ee024284 100644 --- a/src/processor_arm.cpp +++ b/src/processor_arm.cpp @@ -1016,7 +1016,7 @@ static CPU get_cpu_name(CPUID cpuid) return CPU::apple_a14; case 0x22: // Icestorm m1 case 0x23: // Firestorm m1 - return CPU::apple_m1; + return CPU::apple_m1; default: return CPU::generic; } case 0x68: // 'h': Huaxintong Semiconductor From 498bac74f967aa06ccd550d8c206896fcd831e6e Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Sat, 4 Dec 2021 13:14:22 -0300 Subject: [PATCH 10/16] Fix preprocessor warning --- src/processor_arm.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp index 1c2d5ee024284..5270b771a65d8 100644 --- a/src/processor_arm.cpp +++ b/src/processor_arm.cpp @@ -1196,7 +1196,7 @@ static void shrink_big_little(std::vector> &list, } } -#ifdef _CPU_AARCH64_ && _OS_DARWIN_ +#if defined _CPU_AARCH64_ && defined _OS_DARWIN_ static NOINLINE std::pair> _get_host_cpu() { FeatureList features = {}; From b44c8e2aa1ca3208c0fa340f4b770c456ec856f4 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Sun, 5 Dec 2021 11:21:38 -0300 Subject: [PATCH 11/16] Specific logic for apple arm --- src/processor_arm.cpp | 89 ++++++++++++++++++++----------------------- 1 file changed, 42 insertions(+), 47 deletions(-) diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp index 5270b771a65d8..cfdd1ad1f0af5 100644 --- a/src/processor_arm.cpp +++ b/src/processor_arm.cpp @@ -20,7 +20,10 @@ # include # endif #endif - +#if defined _CPU_AARCH64_ && defined _OS_DARWIN_ +#include +#include +#endif namespace ARM { enum class CPU : uint32_t { generic = 0, @@ -671,8 +674,7 @@ static constexpr CPUSpec cpus[] = { {"exynos-m2", CPU::samsung_exynos_m2, CPU::generic, UINT32_MAX, Feature::samsung_exynos_m2}, {"exynos-m3", CPU::samsung_exynos_m3, CPU::generic, 0, Feature::samsung_exynos_m3}, {"exynos-m4", CPU::samsung_exynos_m4, CPU::generic, 0, Feature::samsung_exynos_m4}, - {"exynos-m5", CPU::samsung_exynos_m5, CPU::samsung_exynos_m4, 110000, - Feature::samsung_exynos_m5}, + {"exynos-m5", CPU::samsung_exynos_m5, CPU::samsung_exynos_m4, 110000, Feature::samsung_exynos_m5}, {"apple-a7", CPU::apple_a7, CPU::generic, 0, Feature::apple_a7}, }; #endif @@ -1033,6 +1035,43 @@ static CPU get_cpu_name(CPUID cpuid) return CPU::generic; } } +static inline const CPUSpec *find_cpu(uint32_t cpu) +{ + return ::find_cpu(cpu, cpus, ncpu_names); +} + +static inline const CPUSpec *find_cpu(llvm::StringRef name) +{ + return ::find_cpu(name, cpus, ncpu_names); +} + +static inline const char *find_cpu_name(uint32_t cpu) +{ + return ::find_cpu_name(cpu, cpus, ncpu_names); +} + +#if defined _CPU_AARCH64_ && defined _OS_DARWIN_ +static CPUID get_apple_cpu() +{ + char buffer[128]; + size_t bufferlen = 128; + sysctlbyname("machdep.cpu.brand_string",&buffer,&bufferlen,NULL,0); + if(strcmp(buffer,"Apple M1") == 0) + return CPUID{0x61, 0,0x23}; + else + return CPUID{0, 0, 0}; // Firestorm core data based on https://opensource.apple.com/source/xnu/xnu-7195.141.2/osfmk/arm/cpuid.h.auto.html +} + +static NOINLINE std::pair> _get_host_cpu() +{ + FeatureList features = {}; + CPUID info = get_apple_cpu(); + auto name = (uint32_t)get_cpu_name(info); + features = find_cpu(name)->features; + return std::make_pair(name, features); +} +#else + namespace { @@ -1077,21 +1116,6 @@ static arm_arch get_elf_arch(void) #endif } -static inline const CPUSpec *find_cpu(uint32_t cpu) -{ - return ::find_cpu(cpu, cpus, ncpu_names); -} - -static inline const CPUSpec *find_cpu(llvm::StringRef name) -{ - return ::find_cpu(name, cpus, ncpu_names); -} - -static inline const char *find_cpu_name(uint32_t cpu) -{ - return ::find_cpu_name(cpu, cpus, ncpu_names); -} - static arm_arch feature_arch_version(const FeatureList &feature) { #ifdef _CPU_AARCH64_ @@ -1196,35 +1220,6 @@ static void shrink_big_little(std::vector> &list, } } -#if defined _CPU_AARCH64_ && defined _OS_DARWIN_ -static NOINLINE std::pair> _get_host_cpu() -{ - FeatureList features = {}; - CPUID info = { - uint8_t(0x61), - uint8_t(0), - uint16_t(0x23) - }; // Hardcoded Firestorm core data based on https://opensource.apple.com/source/xnu/xnu-7195.141.2/osfmk/arm/cpuid.h.auto.html - std::vector> list; - auto name = (uint32_t)get_cpu_name(info); - auto arch = get_elf_arch(); - features = find_cpu(name)->features; - list.emplace_back(name, info); - uint32_t cpu = 0; - if (list.empty()) { - cpu = (uint32_t)generic_for_arch(arch); - } - else { - // This also covers `list.size() > 1` case which means there's a unknown combination - // consists of CPU's we know. Unclear what else we could try so just randomly return - // one... - cpu = list[0].first; - } - // Ignore feature bits that we are not interested in. - mask_features(feature_masks, &features[0]); - return std::make_pair(cpu, features); -} -#else static NOINLINE std::pair> _get_host_cpu() { FeatureList features = {}; From 6271618f2ce8f11bf1d92cc45d94bcb122755d6c Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Sun, 5 Dec 2021 11:40:46 -0300 Subject: [PATCH 12/16] Move some ifdefs around --- src/processor_arm.cpp | 70 ++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp index cfdd1ad1f0af5..2b9310d37d94c 100644 --- a/src/processor_arm.cpp +++ b/src/processor_arm.cpp @@ -680,6 +680,42 @@ static constexpr CPUSpec cpus[] = { #endif static constexpr size_t ncpu_names = sizeof(cpus) / sizeof(cpus[0]); +static inline const CPUSpec *find_cpu(uint32_t cpu) +{ + return ::find_cpu(cpu, cpus, ncpu_names); +} + +static inline const CPUSpec *find_cpu(llvm::StringRef name) +{ + return ::find_cpu(name, cpus, ncpu_names); +} + +static inline const char *find_cpu_name(uint32_t cpu) +{ + return ::find_cpu_name(cpu, cpus, ncpu_names); +} + +#if defined _CPU_AARCH64_ && defined _OS_DARWIN_ +static CPU get_cpu_name() +{ + char buffer[128]; + size_t bufferlen = 128; + sysctlbyname("machdep.cpu.brand_string",&buffer,&bufferlen,NULL,0); + if(strcmp(buffer,"Apple M1") == 0) + return CPU::apple_m1; + else + return CPU::generic;// Firestorm core data based on https://opensource.apple.com/source/xnu/xnu-7195.141.2/osfmk/arm/cpuid.h.auto.html +} +static NOINLINE std::pair> _get_host_cpu() +{ + FeatureList features = {}; + + auto name = (uint32_t)get_cpu_name(); + features = find_cpu(name)->features; + return std::make_pair(name, features); +} +#else + // auxval reader #ifndef AT_HWCAP @@ -1035,42 +1071,8 @@ static CPU get_cpu_name(CPUID cpuid) return CPU::generic; } } -static inline const CPUSpec *find_cpu(uint32_t cpu) -{ - return ::find_cpu(cpu, cpus, ncpu_names); -} - -static inline const CPUSpec *find_cpu(llvm::StringRef name) -{ - return ::find_cpu(name, cpus, ncpu_names); -} - -static inline const char *find_cpu_name(uint32_t cpu) -{ - return ::find_cpu_name(cpu, cpus, ncpu_names); -} -#if defined _CPU_AARCH64_ && defined _OS_DARWIN_ -static CPUID get_apple_cpu() -{ - char buffer[128]; - size_t bufferlen = 128; - sysctlbyname("machdep.cpu.brand_string",&buffer,&bufferlen,NULL,0); - if(strcmp(buffer,"Apple M1") == 0) - return CPUID{0x61, 0,0x23}; - else - return CPUID{0, 0, 0}; // Firestorm core data based on https://opensource.apple.com/source/xnu/xnu-7195.141.2/osfmk/arm/cpuid.h.auto.html -} -static NOINLINE std::pair> _get_host_cpu() -{ - FeatureList features = {}; - CPUID info = get_apple_cpu(); - auto name = (uint32_t)get_cpu_name(info); - features = find_cpu(name)->features; - return std::make_pair(name, features); -} -#else namespace { From aec01a6c44d2809ad6b35c62ad5d9339de7da2ad Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Sun, 5 Dec 2021 11:45:03 -0300 Subject: [PATCH 13/16] whitespace --- src/processor_arm.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp index 2b9310d37d94c..a29b5a3f145ba 100644 --- a/src/processor_arm.cpp +++ b/src/processor_arm.cpp @@ -702,8 +702,8 @@ static CPU get_cpu_name() size_t bufferlen = 128; sysctlbyname("machdep.cpu.brand_string",&buffer,&bufferlen,NULL,0); if(strcmp(buffer,"Apple M1") == 0) - return CPU::apple_m1; - else + return CPU::apple_m1; + else return CPU::generic;// Firestorm core data based on https://opensource.apple.com/source/xnu/xnu-7195.141.2/osfmk/arm/cpuid.h.auto.html } static NOINLINE std::pair> _get_host_cpu() From 98c9fa6aa4f54a7ab2e31f658b16178381be210e Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Sun, 5 Dec 2021 17:50:51 -0300 Subject: [PATCH 14/16] simplifying the code --- src/processor_arm.cpp | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp index a29b5a3f145ba..f9135e28fc136 100644 --- a/src/processor_arm.cpp +++ b/src/processor_arm.cpp @@ -19,11 +19,11 @@ # undef USE_DYN_GETAUXVAL # include # endif -#endif -#if defined _CPU_AARCH64_ && defined _OS_DARWIN_ +#elif defined _CPU_AARCH64_ && defined _OS_DARWIN_ #include #include #endif + namespace ARM { enum class CPU : uint32_t { generic = 0, @@ -696,24 +696,19 @@ static inline const char *find_cpu_name(uint32_t cpu) } #if defined _CPU_AARCH64_ && defined _OS_DARWIN_ -static CPU get_cpu_name() + +static NOINLINE std::pair> _get_host_cpu() { char buffer[128]; size_t bufferlen = 128; sysctlbyname("machdep.cpu.brand_string",&buffer,&bufferlen,NULL,0); - if(strcmp(buffer,"Apple M1") == 0) - return CPU::apple_m1; + + if(strcmp(buffer,"Apple M1") == 0)// Firestorm core data based on https://opensource.apple.com/source/xnu/xnu-7195.141.2/osfmk/arm/cpuid.h.auto.html + return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1); else - return CPU::generic;// Firestorm core data based on https://opensource.apple.com/source/xnu/xnu-7195.141.2/osfmk/arm/cpuid.h.auto.html + return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1); } -static NOINLINE std::pair> _get_host_cpu() -{ - FeatureList features = {}; - auto name = (uint32_t)get_cpu_name(); - features = find_cpu(name)->features; - return std::make_pair(name, features); -} #else // auxval reader From b41771d202e2f8c7e22cc55f485041c8ce0db7f2 Mon Sep 17 00:00:00 2001 From: Gabriel Baraldi Date: Sun, 5 Dec 2021 22:26:26 -0300 Subject: [PATCH 15/16] Fix URL --- src/processor_arm.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp index f9135e28fc136..cbed6fdb35a91 100644 --- a/src/processor_arm.cpp +++ b/src/processor_arm.cpp @@ -703,7 +703,7 @@ static NOINLINE std::pair> _get_host_cpu() size_t bufferlen = 128; sysctlbyname("machdep.cpu.brand_string",&buffer,&bufferlen,NULL,0); - if(strcmp(buffer,"Apple M1") == 0)// Firestorm core data based on https://opensource.apple.com/source/xnu/xnu-7195.141.2/osfmk/arm/cpuid.h.auto.html + if(strcmp(buffer,"Apple M1") == 0) return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1); else return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1); @@ -1016,7 +1016,7 @@ static CPU get_cpu_name(CPUID cpuid) default: return CPU::generic; } case 0x61: // 'a': Apple - // https://opensource.apple.com/source/xnu/xnu-6153.81.5/osfmk/arm/cpuid.h.auto.html + // https://opensource.apple.com/source/xnu/xnu-7195.141.2/osfmk/arm/cpuid.h.auto.html switch (cpuid.part) { case 0x0: // Swift return CPU::apple_swift; From 0053128c5e73b7f8e193d9e508cd16994fcaa252 Mon Sep 17 00:00:00 2001 From: gbaraldi Date: Wed, 19 Jan 2022 23:09:00 -0300 Subject: [PATCH 16/16] Add M1 Pro and Max --- src/processor_arm.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/processor_arm.cpp b/src/processor_arm.cpp index cbed6fdb35a91..f5cc2a42a4870 100644 --- a/src/processor_arm.cpp +++ b/src/processor_arm.cpp @@ -705,6 +705,10 @@ static NOINLINE std::pair> _get_host_cpu() if(strcmp(buffer,"Apple M1") == 0) return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1); + else if(strcmp(buffer,"Apple M1 Max") == 0) + return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1); + else if(strcmp(buffer,"Apple M1 Pro") == 0) + return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1); else return std::make_pair((uint32_t)CPU::apple_m1, Feature::apple_m1); }