From 972e73490640298e8cdcb5060ef25bc203a93737 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 22 Oct 2024 22:43:32 -0700 Subject: [PATCH] lib/x86: Specify evex512 and no-evex512 when appropriate libdeflate failed to build with -mno-evex512 with clang 18+ or gcc 14+ because that flag causes the evex512 feature (which is new in those compiler versions) to have to be explicitly added to functions that use AVX512 with 512-bit vectors. Add this feature where appropriate. For the same compiler versions, also add no-evex512 to the functions that use AVX512 + 256-bit vectors so that they are ready for AVX10/256. Resolves https://github.com/ebiggers/libdeflate/issues/394 --- lib/x86/adler32_impl.h | 4 ++-- lib/x86/cpu_features.h | 8 ++++++++ lib/x86/crc32_impl.h | 4 ++-- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/lib/x86/adler32_impl.h b/lib/x86/adler32_impl.h index 458a9e09..100a2301 100644 --- a/lib/x86/adler32_impl.h +++ b/lib/x86/adler32_impl.h @@ -82,7 +82,7 @@ */ # define adler32_x86_avx512_vl256_vnni adler32_x86_avx512_vl256_vnni # define SUFFIX _avx512_vl256_vnni -# define ATTRIBUTES _target_attribute("avx512bw,avx512vl,avx512vnni") +# define ATTRIBUTES _target_attribute("avx512bw,avx512vl,avx512vnni" NO_EVEX512) # define VL 32 # define USE_VNNI 1 # define USE_AVX512 1 @@ -95,7 +95,7 @@ */ # define adler32_x86_avx512_vl512_vnni adler32_x86_avx512_vl512_vnni # define SUFFIX _avx512_vl512_vnni -# define ATTRIBUTES _target_attribute("avx512bw,avx512vnni") +# define ATTRIBUTES _target_attribute("avx512bw,avx512vnni" EVEX512) # define VL 64 # define USE_VNNI 1 # define USE_AVX512 1 diff --git a/lib/x86/cpu_features.h b/lib/x86/cpu_features.h index 8dda21fd..e70bc80f 100644 --- a/lib/x86/cpu_features.h +++ b/lib/x86/cpu_features.h @@ -164,6 +164,14 @@ static inline u32 get_x86_cpu_features(void) { return 0; } # define HAVE_AVXVNNI(features) ((features) & X86_CPU_FEATURE_AVXVNNI) #endif +#if GCC_PREREQ(14, 0) || CLANG_PREREQ(18, 0, 18000000) +# define EVEX512 ",evex512" /* needed to override potential -mno-evex512 */ +# define NO_EVEX512 ",no-evex512" /* needed for AVX10/256 compatibility */ +#else +# define EVEX512 "" +# define NO_EVEX512 "" +#endif + #endif /* ARCH_X86_32 || ARCH_X86_64 */ #endif /* LIB_X86_CPU_FEATURES_H */ diff --git a/lib/x86/crc32_impl.h b/lib/x86/crc32_impl.h index 54996d9c..1747aa85 100644 --- a/lib/x86/crc32_impl.h +++ b/lib/x86/crc32_impl.h @@ -106,7 +106,7 @@ static const u8 MAYBE_UNUSED shift_tab[48] = { */ # define crc32_x86_vpclmulqdq_avx512_vl256 crc32_x86_vpclmulqdq_avx512_vl256 # define SUFFIX _vpclmulqdq_avx512_vl256 -# define ATTRIBUTES _target_attribute("vpclmulqdq,pclmul,avx512bw,avx512vl") +# define ATTRIBUTES _target_attribute("vpclmulqdq,pclmul,avx512bw,avx512vl" NO_EVEX512) # define VL 32 # define USE_SSE4_1 1 # define USE_AVX512 1 @@ -119,7 +119,7 @@ static const u8 MAYBE_UNUSED shift_tab[48] = { */ # define crc32_x86_vpclmulqdq_avx512_vl512 crc32_x86_vpclmulqdq_avx512_vl512 # define SUFFIX _vpclmulqdq_avx512_vl512 -# define ATTRIBUTES _target_attribute("vpclmulqdq,pclmul,avx512bw,avx512vl") +# define ATTRIBUTES _target_attribute("vpclmulqdq,pclmul,avx512bw,avx512vl" EVEX512) # define VL 64 # define USE_SSE4_1 1 # define USE_AVX512 1