From 43c5e5c63d6d92ba8ae3bc1b3c9dc1716a4a505e Mon Sep 17 00:00:00 2001 From: Kim Walisch Date: Sat, 22 Jun 2024 17:42:53 +0200 Subject: [PATCH] Fix std::countr_zero() usage --- include/primesieve/ctz.hpp | 53 ++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 25 deletions(-) diff --git a/include/primesieve/ctz.hpp b/include/primesieve/ctz.hpp index 06bf5eee..c45eedca 100644 --- a/include/primesieve/ctz.hpp +++ b/include/primesieve/ctz.hpp @@ -28,31 +28,9 @@ #define HAS_TZCNT #endif -// In 2022 std::countr_zero(x) generates good assembly for -// most compilers & CPU architectures, except for: -// 1) GCC & Clang on x64 without __BMI__. -// 2) MSVC on x64 without __AVX2__. -// Hence on x64 CPUs we only use std::countr_zero(x) if -// the compiler generates the TZCNT instruction. -#if defined(HAS_CPP20_BIT_HEADER) && \ - (defined(HAS_TZCNT) || !defined(IS_X64)) - -#define HAS_CTZ64 -#define CTZ64_SUPPORTS_ZERO - -namespace { - -inline int ctz64(uint64_t x) -{ - // No undefined behavior, std::countr_zero(0) = 64 - return std::countr_zero(x); -} - -} // namespace - -#elif (defined(__GNUC__) || \ - defined(__clang__)) && \ - defined(__x86_64__) +#if (defined(__GNUC__) || \ + defined(__clang__)) && \ + defined(__x86_64__) #define HAS_CTZ64 #define CTZ64_SUPPORTS_ZERO @@ -125,6 +103,31 @@ inline uint64_t ctz64(uint64_t x) // No undefined behavior, _tzcnt_u64(0) = 64. #define ctz64(x) _tzcnt_u64(x) +#elif __cplusplus >= 202002L && \ + __has_include() && \ + (!defined(IS_X64) || defined(HAS_TZCNT)) + +#include + +// No undefined behavior, std::countr_zero(0) = 64 +#define CTZ64_SUPPORTS_ZERO +#define HAS_CTZ64 + +namespace { + +inline int ctz64(uint64_t x) +{ + // In 2022 std::countr_zero(x) generates good assembly for + // most compilers & CPU architectures, except for: + // 1) GCC & Clang on x64 without __BMI__. + // 2) MSVC on x64 without __AVX2__. + // Hence on x64 CPUs we only use std::countr_zero(x) if + // the compiler generates the TZCNT instruction. + return std::countr_zero(x); +} + +} // namespace + #elif defined(__GNUC__) || \ __has_builtin(__builtin_ctzl)