From c24a3cf3b6cbf39650f018b1834226c48747fdad Mon Sep 17 00:00:00 2001 From: inspiremenow Date: Sun, 8 Sep 2024 20:21:49 +0800 Subject: [PATCH] feat: add support for NCNN_ISA environment variable detection Changes: - Implemented `get_isa_env` function to retrieve and parse ISA flags from the environment variable. Signed-off-by: Kaiyao Duan --- src/cpu.cpp | 104 +++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 91 insertions(+), 13 deletions(-) diff --git a/src/cpu.cpp b/src/cpu.cpp index b1afbba3f651..542bf47c0108 100644 --- a/src/cpu.cpp +++ b/src/cpu.cpp @@ -23,6 +23,7 @@ #endif // __wasi__ #include #include +#include #ifdef _OPENMP #if NCNN_SIMPLEOMP @@ -1859,6 +1860,55 @@ static int detect_cpu_is_arm_a53_a55() #endif // __aarch64__ #endif // defined __ANDROID__ || defined __linux__ +static int get_isa_env(const char* isa_flags) +{ + const char* isa = getenv("NCNN_ISA"); + + if (!isa) + { + return false; + } + + char* isa_copy = strdup(isa); + char* token = strtok(isa_copy, " ,"); + + while (token != NULL) + { + if (strcmp(token, isa_flags) == 0) + { + if (isa_flags[0] == '+') + return false; + if (isa_flags[0] == '-') + { + memmove(token, token + 1, strlen(token)); + fprintf(stderr, "warning: %s disabled via environment variable!\n", token); + return true; + } + } + token = strtok(NULL, " ,"); + } + + free(isa_copy); + return false; +} + +#if (__aarch64__ || __arm__) +static int is_cpu_arm_cpuid_disabled = get_isa_env("-cpuid"); +static int is_cpu_arm_asimdhp_disabled = get_isa_env("-asimdhp"); +static int is_cpu_arm_asimddp_disabled = get_isa_env("-asimddp"); +static int is_cpu_arm_asimdfhm_disabled = get_isa_env("-asimdfhm"); +static int is_cpu_arm_bf16_disabled = get_isa_env("-bf16"); +static int is_cpu_arm_i8mm_disabled = get_isa_env("-i8mm"); +static int is_cpu_arm_sve_disabled = get_isa_env("-sve"); +static int is_cpu_arm_sve2_disabled = get_isa_env("-sve2"); +static int is_cpu_arm_svebf16_disabled = get_isa_env("-svebf16"); +static int is_cpu_arm_svei8mm_disabled = get_isa_env("-svei8mm"); +static int is_cpu_arm_svef32mm_disabled = get_isa_env("-svef32mm"); +static int is_cpu_arm_edsp_disabled = get_isa_env("-edsp"); +static int is_cpu_arm_vfpv4_disabled = get_isa_env("-vfpv4"); +static int is_cpu_arm_neon_disabled = get_isa_env("-neon"); +#endif + // the initialization static void initialize_global_cpu_info() { @@ -1909,16 +1959,16 @@ static void initialize_global_cpu_info() #endif #if defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) - g_cpu_support_x86_avx = get_cpu_support_x86_avx(); - g_cpu_support_x86_fma = get_cpu_support_x86_fma(); - g_cpu_support_x86_xop = get_cpu_support_x86_xop(); - g_cpu_support_x86_f16c = get_cpu_support_x86_f16c(); - g_cpu_support_x86_avx2 = get_cpu_support_x86_avx2(); - g_cpu_support_x86_avx_vnni = get_cpu_support_x86_avx_vnni(); - g_cpu_support_x86_avx512 = get_cpu_support_x86_avx512(); - g_cpu_support_x86_avx512_vnni = get_cpu_support_x86_avx512_vnni(); - g_cpu_support_x86_avx512_bf16 = get_cpu_support_x86_avx512_bf16(); - g_cpu_support_x86_avx512_fp16 = get_cpu_support_x86_avx512_fp16(); + g_cpu_support_x86_avx = get_cpu_support_x86_avx() && !(get_isa_env("-avx")); + g_cpu_support_x86_fma = get_cpu_support_x86_fma() && !(get_isa_env("-fma")); + g_cpu_support_x86_xop = get_cpu_support_x86_xop() && !(get_isa_env("-xop")); + g_cpu_support_x86_f16c = get_cpu_support_x86_f16c() && !(get_isa_env("-f16c")); + g_cpu_support_x86_avx2 = get_cpu_support_x86_avx2() && !(get_isa_env("-avx2")); + g_cpu_support_x86_avx_vnni = get_cpu_support_x86_avx_vnni() && !(get_isa_env("-avx_vnni")); + g_cpu_support_x86_avx512 = get_cpu_support_x86_avx512() && !(get_isa_env("-avx512")); + g_cpu_support_x86_avx512_vnni = get_cpu_support_x86_avx512_vnni() && !(get_isa_env("-avx512_vnni")); + g_cpu_support_x86_avx512_bf16 = get_cpu_support_x86_avx512_bf16() && !(get_isa_env("-bf16")); + g_cpu_support_x86_avx512_fp16 = get_cpu_support_x86_avx512_fp16() && !(get_isa_env("-fp16")); #endif // defined(__i386__) || defined(__x86_64__) || defined(_M_IX86) || defined(_M_X64) g_cpu_level2_cachesize = get_cpu_level2_cachesize(); @@ -2087,6 +2137,8 @@ int cpu_support_arm_edsp() { try_initialize_global_cpu_info(); #if __arm__ && !__aarch64__ + if (is_cpu_arm_edsp_disabled) + return 0; #if defined _WIN32 return g_cpu_support_arm_edsp; #elif defined __ANDROID__ || defined __linux__ @@ -2107,6 +2159,8 @@ int cpu_support_arm_neon() #if __aarch64__ return 1; #elif __arm__ + if (is_cpu_arm_neon_disabled) + return 0; #if defined _WIN32 return g_cpu_support_arm_neon; #elif defined __ANDROID__ || defined __linux__ @@ -2127,6 +2181,8 @@ int cpu_support_arm_vfpv4() #if __aarch64__ return 1; #elif __arm__ + if (is_cpu_arm_vfpv4_disabled) + return 0; #if defined _WIN32 return g_cpu_support_arm_vfpv4; #elif defined __ANDROID__ || defined __linux__ @@ -2145,6 +2201,8 @@ int cpu_support_arm_asimdhp() { try_initialize_global_cpu_info(); #if __aarch64__ + if (is_cpu_arm_asimdhp_disabled) + return 0; #if defined _WIN32 return g_cpu_support_arm_asimdhp; #elif defined __ANDROID__ || defined __linux__ @@ -2173,6 +2231,8 @@ int cpu_support_arm_cpuid() { try_initialize_global_cpu_info(); #if __aarch64__ + if (is_cpu_arm_cpuid_disabled) + return 0; #if defined _WIN32 return g_cpu_support_arm_cpuid; #elif defined __ANDROID__ || defined __linux__ @@ -2191,6 +2251,8 @@ int cpu_support_arm_asimddp() { try_initialize_global_cpu_info(); #if __aarch64__ + if (is_cpu_arm_asimddp_disabled) + return 0; #if defined _WIN32 return g_cpu_support_arm_asimddp; #elif defined __ANDROID__ || defined __linux__ @@ -2217,6 +2279,8 @@ int cpu_support_arm_asimdfhm() { try_initialize_global_cpu_info(); #if __aarch64__ + if (is_cpu_arm_asimdfhm_disabled) + return 0; #if defined _WIN32 return g_cpu_support_arm_asimdfhm; #elif defined __ANDROID__ || defined __linux__ @@ -2243,6 +2307,8 @@ int cpu_support_arm_bf16() { try_initialize_global_cpu_info(); #if __aarch64__ + if (is_cpu_arm_bf16_disabled) + return 0; #if defined _WIN32 return g_cpu_support_arm_bf16; #elif defined __ANDROID__ || defined __linux__ @@ -2267,6 +2333,8 @@ int cpu_support_arm_i8mm() { try_initialize_global_cpu_info(); #if __aarch64__ + if (is_cpu_arm_i8mm_disabled) + return 0; #if defined _WIN32 return g_cpu_support_arm_i8mm; #elif defined __ANDROID__ || defined __linux__ @@ -2291,6 +2359,8 @@ int cpu_support_arm_sve() { try_initialize_global_cpu_info(); #if __aarch64__ + if (is_cpu_arm_sve_disabled) + return 0; #if defined _WIN32 return g_cpu_support_arm_sve; #elif defined __ANDROID__ || defined __linux__ @@ -2309,6 +2379,8 @@ int cpu_support_arm_sve2() { try_initialize_global_cpu_info(); #if __aarch64__ + if (is_cpu_arm_sve2_disabled) + return 0; #if defined _WIN32 return g_cpu_support_arm_sve2; #elif defined __ANDROID__ || defined __linux__ @@ -2327,6 +2399,8 @@ int cpu_support_arm_svebf16() { try_initialize_global_cpu_info(); #if __aarch64__ + if (is_cpu_arm_svebf16_disabled) + return 0; #if defined _WIN32 return g_cpu_support_arm_svebf16; #elif defined __ANDROID__ || defined __linux__ @@ -2345,6 +2419,8 @@ int cpu_support_arm_svei8mm() { try_initialize_global_cpu_info(); #if __aarch64__ + if (is_cpu_arm_svei8mm_disabled) + return 0; #if defined _WIN32 return g_cpu_support_arm_svei8mm; #elif defined __ANDROID__ || defined __linux__ @@ -2363,6 +2439,8 @@ int cpu_support_arm_svef32mm() { try_initialize_global_cpu_info(); #if __aarch64__ + if (is_cpu_arm_svef32mm_disabled) + return 0; #if defined _WIN32 return g_cpu_support_arm_svef32mm; #elif defined __ANDROID__ || defined __linux__ @@ -2482,7 +2560,7 @@ int cpu_support_mips_msa() try_initialize_global_cpu_info(); #if defined __ANDROID__ || defined __linux__ #if __mips__ - return g_hwcaps & HWCAP_MIPS_MSA; + return (g_hwcaps & HWCAP_MIPS_MSA) && !get_isa_env(("-msa")); #else return 0; #endif @@ -2538,7 +2616,7 @@ int cpu_support_riscv_v() try_initialize_global_cpu_info(); #if defined __ANDROID__ || defined __linux__ #if __riscv - return g_hwcaps & COMPAT_HWCAP_ISA_V; + return (g_hwcaps & COMPAT_HWCAP_ISA_V) && !get_isa_env(("-rvv")); #else return 0; #endif @@ -2554,7 +2632,7 @@ int cpu_support_riscv_zfh() #if __riscv // v + f does not imply zfh, but how to discover zfh properly ? // upstream issue https://github.com/riscv/riscv-isa-manual/issues/414 - return g_hwcaps & COMPAT_HWCAP_ISA_V && g_hwcaps & COMPAT_HWCAP_ISA_F; + return (g_hwcaps & COMPAT_HWCAP_ISA_V && g_hwcaps & COMPAT_HWCAP_ISA_F) && !get_isa_env(("-rvzfh")); #else return 0; #endif