From c168cef4c249c88fff6ed1cb0961aca667272131 Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Sat, 20 May 2023 17:43:00 +0800 Subject: [PATCH 1/9] Concentrated header for internal bit utilities Also reduces inclusion dependency on ``. --- stl/CMakeLists.txt | 1 + stl/inc/__msvc_bit_utils.hpp | 455 ++++++++++++++++++ stl/inc/__msvc_format_ucd_tables.hpp | 3 +- stl/inc/__msvc_int128.hpp | 40 +- stl/inc/bit | 131 +---- stl/inc/bitset | 2 +- stl/inc/complex | 5 +- stl/inc/header-units.json | 1 + stl/inc/limits | 291 ----------- stl/inc/numeric | 28 +- stl/inc/random | 2 +- .../std/tests/GH_001411_core_headers/test.cpp | 13 + 12 files changed, 518 insertions(+), 454 deletions(-) create mode 100644 stl/inc/__msvc_bit_utils.hpp diff --git a/stl/CMakeLists.txt b/stl/CMakeLists.txt index 6c2e456eed..073d718828 100644 --- a/stl/CMakeLists.txt +++ b/stl/CMakeLists.txt @@ -7,6 +7,7 @@ set(HEADERS ${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_all_public_headers.hpp + ${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_bit_utils.hpp ${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_chrono.hpp ${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_cxx_stdatomic.hpp ${CMAKE_CURRENT_LIST_DIR}/inc/__msvc_filebuf.hpp diff --git a/stl/inc/__msvc_bit_utils.hpp b/stl/inc/__msvc_bit_utils.hpp new file mode 100644 index 0000000000..bc23847c08 --- /dev/null +++ b/stl/inc/__msvc_bit_utils.hpp @@ -0,0 +1,455 @@ +// __msvc_bit_utils.hpp internal header (core) + +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#pragma once +#ifndef __MSVC_BIT_UTILS_HPP +#define __MSVC_BIT_UTILS_HPP +#include +#if _STL_COMPILER_PREPROCESSOR + +#include +#include + +#include _STL_INTRIN_HEADER + +// TRANSITION, GH-2129, move down to _Arm64_popcount +#if (defined(_M_ARM64) || defined(_M_ARM64EC)) && !defined(_M_CEE_PURE) && !defined(__CUDACC__) \ + && !defined(__INTEL_COMPILER) && !defined(__clang__) // TRANSITION, LLVM-51488 +#define _HAS_NEON_INTRINSICS 1 +#else // ^^^ intrinsics available / intrinsics unavailable vvv +#define _HAS_NEON_INTRINSICS 0 +#endif // ^^^ intrinsics unavailable ^^^ + +#if _HAS_NEON_INTRINSICS +#include // TRANSITION, GH-2129 +#endif // _HAS_NEON_INTRINSICS + +#pragma pack(push, _CRT_PACKING) +#pragma warning(push, _STL_WARNING_LEVEL) +#pragma warning(disable : _STL_DISABLED_WARNINGS) +_STL_DISABLE_CLANG_WARNINGS +#pragma push_macro("new") +#undef new + +_STD_BEGIN +extern "C" { +extern int __isa_available; +} + +_INLINE_VAR constexpr int _Stl_isa_available_sse42 = 2; // equal to __ISA_AVAILABLE_SSE42 +_INLINE_VAR constexpr int _Stl_isa_available_avx2 = 5; // equal to __ISA_AVAILABLE_AVX2 + +template +_INLINE_VAR constexpr int _Unsigned_integer_digits = sizeof(_UInt) * CHAR_BIT; + +// Implementation of countl_zero without using specialized CPU instructions. +// Used at compile time and when said instructions are not supported. +// see "Hacker's Delight" section 5-3 +template +_NODISCARD constexpr int _Countl_zero_fallback(_Ty _Val) noexcept { + _Ty _Yy = 0; + + unsigned int _Nn = _Unsigned_integer_digits<_Ty>; + unsigned int _Cc = _Unsigned_integer_digits<_Ty> / 2; + do { + _Yy = static_cast<_Ty>(_Val >> _Cc); + if (_Yy != 0) { + _Nn -= _Cc; + _Val = _Yy; + } + _Cc >>= 1; + } while (_Cc != 0); + return static_cast(_Nn) - static_cast(_Val); +} + +#if defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC)) +template +_NODISCARD int _Countl_zero_lzcnt(const _Ty _Val) noexcept { + constexpr int _Digits = _Unsigned_integer_digits<_Ty>; + + if constexpr (_Digits <= 16) { + return static_cast(__lzcnt16(_Val) - (16 - _Digits)); + } else if constexpr (_Digits == 32) { + return static_cast(__lzcnt(_Val)); + } else { +#ifdef _M_IX86 + const unsigned int _High = _Val >> 32; + const auto _Low = static_cast(_Val); + if (_High == 0) { + return 32 + _Countl_zero_lzcnt(_Low); + } else { + return _Countl_zero_lzcnt(_High); + } +#else // ^^^ _M_IX86 / !_M_IX86 vvv + return static_cast(__lzcnt64(_Val)); +#endif // _M_IX86 + } +} + +template +_NODISCARD int _Countl_zero_bsr(const _Ty _Val) noexcept { + constexpr int _Digits = _Unsigned_integer_digits<_Ty>; + + unsigned long _Result; + if constexpr (_Digits <= 32) { + if (!_BitScanReverse(&_Result, _Val)) { + return _Digits; + } + } else { +#ifdef _M_IX86 + const unsigned int _High = _Val >> 32; + if (_BitScanReverse(&_Result, _High)) { + return static_cast(31 - _Result); + } + + const auto _Low = static_cast(_Val); + if (!_BitScanReverse(&_Result, _Low)) { + return _Digits; + } +#else // ^^^ _M_IX86 / !_M_IX86 vvv + if (!_BitScanReverse64(&_Result, _Val)) { + return _Digits; + } +#endif // _M_IX86 + } + return static_cast(_Digits - 1 - _Result); +} + +template +_NODISCARD int _Checked_x86_x64_countl_zero(const _Ty _Val) noexcept { +#ifdef __AVX2__ + return _Countl_zero_lzcnt(_Val); +#else // __AVX2__ + const bool _Definitely_have_lzcnt = __isa_available >= _Stl_isa_available_avx2; + if (_Definitely_have_lzcnt) { + return _Countl_zero_lzcnt(_Val); + } else { + return _Countl_zero_bsr(_Val); + } +#endif // __AVX2__ +} +#endif // defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC)) + +#if defined(_M_ARM) || defined(_M_ARM64) +#ifdef __clang__ // TRANSITION, GH-1586 +_NODISCARD constexpr int _Clang_arm_arm64_countl_zero(const unsigned short _Val) { + return __builtin_clzs(_Val); +} + +_NODISCARD constexpr int _Clang_arm_arm64_countl_zero(const unsigned int _Val) { + return __builtin_clz(_Val); +} + +_NODISCARD constexpr int _Clang_arm_arm64_countl_zero(const unsigned long _Val) { + return __builtin_clzl(_Val); +} + +_NODISCARD constexpr int _Clang_arm_arm64_countl_zero(const unsigned long long _Val) { + return __builtin_clzll(_Val); +} +#endif // TRANSITION, GH-1586 + +template +_NODISCARD int _Checked_arm_arm64_countl_zero(const _Ty _Val) noexcept { + constexpr int _Digits = _Unsigned_integer_digits<_Ty>; + if (_Val == 0) { + return _Digits; + } + +#ifdef __clang__ // TRANSITION, GH-1586 + if constexpr (is_same_v, unsigned char>) { + return _Clang_arm_arm64_countl_zero(static_cast(_Val)) + - (_Unsigned_integer_digits - _Digits); + } else { + return _Clang_arm_arm64_countl_zero(_Val); + } +#else // ^^^ workaround / no workaround vvv + if constexpr (_Digits <= 32) { + return static_cast(_CountLeadingZeros(_Val)) - (_Unsigned_integer_digits - _Digits); + } else { + return static_cast(_CountLeadingZeros64(_Val)); + } +#endif // TRANSITION, GH-1586 +} +#endif // defined(_M_ARM) || defined(_M_ARM64) + +template +_NODISCARD constexpr int _Countl_zero_internal(const _Ty _Val) noexcept { +#if defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC)) + if (!_Is_constant_evaluated()) { + return _Checked_x86_x64_countl_zero(_Val); + } +#elif defined(_M_ARM) || defined(_M_ARM64) + if (!_Is_constant_evaluated()) { + return _Checked_arm_arm64_countl_zero(_Val); + } +#endif // defined(_M_ARM) || defined(_M_ARM64) + + return _Countl_zero_fallback(_Val); +} + +// Implementation of countr_zero without using specialized CPU instructions. +// Used at compile time and when said instructions are not supported. +// see "Hacker's Delight" section 5-4 +template +_NODISCARD constexpr int _Countr_zero_fallback(const _Ty _Val) noexcept { + constexpr int _Digits = _Unsigned_integer_digits<_Ty>; + return _Digits - _Countl_zero_fallback(static_cast<_Ty>(static_cast<_Ty>(~_Val) & static_cast<_Ty>(_Val - 1))); +} + +// Implementation of popcount without using specialized CPU instructions. +// Used at compile time and when said instructions are not supported. +template +_NODISCARD constexpr int _Popcount_fallback(_Ty _Val) noexcept { + constexpr int _Digits = _Unsigned_integer_digits<_Ty>; +#if defined(_M_IX86) || defined(_M_ARM) + if constexpr (_Digits == 64) { + // 64-bit bit operations on architectures without 64-bit registers are less efficient, + // hence we split the value so that it fits in 32-bit registers + return _Popcount_fallback(static_cast(_Val)) + + _Popcount_fallback(static_cast(_Val >> 32)); + } +#endif // defined(_M_IX86) || defined(_M_ARM) + // we static_cast these bit patterns in order to truncate them to the correct size + _Val = static_cast<_Ty>(_Val - ((_Val >> 1) & static_cast<_Ty>(0x5555'5555'5555'5555ull))); + _Val = static_cast<_Ty>((_Val & static_cast<_Ty>(0x3333'3333'3333'3333ull)) + + ((_Val >> 2) & static_cast<_Ty>(0x3333'3333'3333'3333ull))); + _Val = static_cast<_Ty>((_Val + (_Val >> 4)) & static_cast<_Ty>(0x0F0F'0F0F'0F0F'0F0Full)); + // Multiply by one in each byte, so that it will have the sum of all source bytes in the highest byte + _Val = static_cast<_Ty>(_Val * static_cast<_Ty>(0x0101'0101'0101'0101ull)); + // Extract highest byte + return static_cast(_Val >> (_Digits - 8)); +} + +#if (defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC))) && !defined(_M_CEE_PURE) && !defined(__CUDACC__) \ + && !defined(__INTEL_COMPILER) +#define _HAS_TZCNT_BSF_INTRINSICS 1 +#else // ^^^ intrinsics available / intrinsics unavailable vvv +#define _HAS_TZCNT_BSF_INTRINSICS 0 +#endif // ^^^ intrinsics unavailable ^^^ + +#if _HAS_TZCNT_BSF_INTRINSICS +#ifdef __clang__ +#define _TZCNT_U32 __builtin_ia32_tzcnt_u32 +#define _TZCNT_U64 __builtin_ia32_tzcnt_u64 +#else // ^^^ __clang__ / !__clang__ vvv +#define _TZCNT_U32 _tzcnt_u32 +#define _TZCNT_U64 _tzcnt_u64 +#endif // __clang__ + +template +_NODISCARD int _Countr_zero_tzcnt(const _Ty _Val) noexcept { + constexpr int _Digits = _Unsigned_integer_digits<_Ty>; + constexpr _Ty _Max = static_cast<_Ty>(-1); // equal to (numeric_limits<_Ty>::max)() + + if constexpr (_Digits <= 32) { + // Intended widening to int. This operation means that a narrow 0 will widen + // to 0xFFFF....FFFF0... instead of 0. We need this to avoid counting all the zeros + // of the wider type. + return static_cast(_TZCNT_U32(static_cast(~_Max | _Val))); + } else { +#ifdef _M_IX86 + const auto _Low = static_cast(_Val); + if (_Low == 0) { + const unsigned int _High = _Val >> 32; + return static_cast(32 + _TZCNT_U32(_High)); + } else { + return static_cast(_TZCNT_U32(_Low)); + } +#else // ^^^ _M_IX86 / !_M_IX86 vvv + return static_cast(_TZCNT_U64(_Val)); +#endif // _M_IX86 + } +} + +#undef _TZCNT_U32 +#undef _TZCNT_U64 + +template +_NODISCARD int _Countr_zero_bsf(const _Ty _Val) noexcept { + constexpr int _Digits = _Unsigned_integer_digits<_Ty>; + constexpr _Ty _Max = static_cast<_Ty>(-1); // equal to (numeric_limits<_Ty>::max)() + + unsigned long _Result; + if constexpr (_Digits <= 32) { + // Intended widening to int. This operation means that a narrow 0 will widen + // to 0xFFFF....FFFF0... instead of 0. We need this to avoid counting all the zeros + // of the wider type. + if (!_BitScanForward(&_Result, static_cast(~_Max | _Val))) { + return _Digits; + } + } else { +#ifdef _M_IX86 + const auto _Low = static_cast(_Val); + if (_BitScanForward(&_Result, _Low)) { + return static_cast(_Result); + } + + const unsigned int _High = _Val >> 32; + if (!_BitScanForward(&_Result, _High)) { + return _Digits; + } else { + return static_cast(_Result + 32); + } +#else // ^^^ _M_IX86 / !_M_IX86 vvv + if (!_BitScanForward64(&_Result, _Val)) { + return _Digits; + } +#endif // _M_IX86 + } + return static_cast(_Result); +} + +template +_NODISCARD int _Checked_x86_x64_countr_zero(const _Ty _Val) noexcept { +#ifdef __AVX2__ + return _Countr_zero_tzcnt(_Val); +#else // __AVX2__ + const bool _Definitely_have_tzcnt = __isa_available >= _Stl_isa_available_avx2; + if (_Definitely_have_tzcnt) { + return _Countr_zero_tzcnt(_Val); + } else { + return _Countr_zero_bsf(_Val); + } +#endif // __AVX2__ +} + +#endif // _HAS_TZCNT_BSF_INTRINSICS + +#if (defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC))) && !defined(_M_CEE_PURE) && !defined(__CUDACC__) \ + && !defined(__INTEL_COMPILER) +#define _HAS_POPCNT_INTRINSICS 1 +#else // ^^^ intrinsics available / intrinsics unavailable vvv +#define _HAS_POPCNT_INTRINSICS 0 +#endif // ^^^ intrinsics unavailable ^^^ + +#if _HAS_POPCNT_INTRINSICS +template +_NODISCARD int _Unchecked_x86_x64_popcount(const _Ty _Val) noexcept { + constexpr int _Digits = _Unsigned_integer_digits<_Ty>; + if constexpr (_Digits <= 16) { + return static_cast(__popcnt16(_Val)); + } else if constexpr (_Digits == 32) { + return static_cast(__popcnt(_Val)); + } else { +#ifdef _M_IX86 + return static_cast(__popcnt(_Val >> 32) + __popcnt(static_cast(_Val))); +#else // ^^^ _M_IX86 / !_M_IX86 vvv + return static_cast(__popcnt64(_Val)); +#endif // _M_IX86 + } +} + +template +_NODISCARD int _Checked_x86_x64_popcount(const _Ty _Val) noexcept { +#ifndef __AVX__ + const bool _Definitely_have_popcnt = __isa_available >= _Stl_isa_available_sse42; + if (!_Definitely_have_popcnt) { + return _Popcount_fallback(_Val); + } +#endif // !defined(__AVX__) + return _Unchecked_x86_x64_popcount(_Val); +} +#endif // _HAS_POPCNT_INTRINSICS + +#if _HAS_NEON_INTRINSICS +_NODISCARD inline int _Arm64_popcount(const unsigned long long _Val) noexcept { + const __n64 _Temp = neon_cnt(__uint64ToN64_v(_Val)); + return neon_addv8(_Temp).n8_i8[0]; +} +#endif // _HAS_NEON_INTRINSICS + +template +constexpr bool _Is_standard_unsigned_integer = + _Is_any_of_v, unsigned char, unsigned short, unsigned int, unsigned long, unsigned long long>; + +template , int> = 0> +_NODISCARD _CONSTEXPR20 int _Countr_zero(const _Ty _Val) noexcept { +#if _HAS_TZCNT_BSF_INTRINSICS +#if _HAS_CXX20 + if (!_STD is_constant_evaluated()) +#endif // _HAS_CXX20 + { + return _Checked_x86_x64_countr_zero(_Val); + } +#endif // _HAS_TZCNT_BSF_INTRINSICS + return _Countr_zero_fallback(_Val); +} + +template +constexpr decltype(auto) _Select_countr_zero_impl(_Fn _Callback) { + // TRANSITION, DevCom-1527995: Lambdas in this function ensure inlining +#if _HAS_TZCNT_BSF_INTRINSICS && _HAS_CXX20 + if (!_STD is_constant_evaluated()) { +#ifdef __AVX2__ + return _Callback([](_Ty _Val) { return _Countr_zero_tzcnt(_Val); }); +#else // ^^^ AVX2 / not AVX2 vvv + const bool _Definitely_have_tzcnt = __isa_available >= _Stl_isa_available_avx2; + if (_Definitely_have_tzcnt) { + return _Callback([](_Ty _Val) { return _Countr_zero_tzcnt(_Val); }); + } else { + return _Callback([](_Ty _Val) { return _Countr_zero_bsf(_Val); }); + } +#endif // ^^^ not AVX2 ^^^ + } +#endif // ^^^ _HAS_TZCNT_BSF_INTRINSICS && _HAS_CXX20 ^^^ + // C++17 constexpr gcd() calls this function, so it should be constexpr unless we detect runtime evaluation. + return _Callback([](_Ty _Val) { return _Countr_zero_fallback(_Val); }); +} + +template , int> = 0> +_NODISCARD _CONSTEXPR20 int _Popcount(const _Ty _Val) noexcept { +#if _HAS_POPCNT_INTRINSICS || _HAS_NEON_INTRINSICS +#if _HAS_CXX20 + if (!_STD is_constant_evaluated()) +#endif // _HAS_CXX20 + { +#if _HAS_POPCNT_INTRINSICS + return _Checked_x86_x64_popcount(_Val); +#elif _HAS_NEON_INTRINSICS // ^^^ x86/x64 intrinsics available / ARM64 intrinsics available vvv + return _Arm64_popcount(_Val); +#endif // ^^^ ARM64 intrinsics available ^^^ + } +#endif // ^^^ any intrinsics available ^^^ + return _Popcount_fallback(_Val); +} + +template +_CONSTEXPR20 decltype(auto) _Select_popcount_impl(_Fn _Callback) { + // TRANSITION, DevCom-1527995: Lambdas in this function ensure inlining +#if _HAS_POPCNT_INTRINSICS || _HAS_NEON_INTRINSICS +#if _HAS_CXX20 + if (!_STD is_constant_evaluated()) +#endif // _HAS_CXX20 + { +#if _HAS_POPCNT_INTRINSICS +#ifndef __AVX__ + const bool _Definitely_have_popcnt = __isa_available >= _Stl_isa_available_sse42; + if (!_Definitely_have_popcnt) { + return _Callback([](_Ty _Val) { return _Popcount_fallback(_Val); }); + } +#endif // !defined(__AVX__) + return _Callback([](_Ty _Val) { return _Unchecked_x86_x64_popcount(_Val); }); +#elif _HAS_NEON_INTRINSICS // ^^^ x86/x64 intrinsics available / ARM64 intrinsics available vvv + return _Callback([](_Ty _Val) { return _Arm64_popcount(_Val); }); +#endif // ^^^ ARM64 intrinsics available ^^^ + } +#endif // ^^^ any intrinsics available ^^^ + return _Callback([](_Ty _Val) { return _Popcount_fallback(_Val); }); +} + +#undef _HAS_POPCNT_INTRINSICS +#undef _HAS_TZCNT_BSF_INTRINSICS + +_STD_END + +#undef _HAS_NEON_INTRINSICS + +#pragma pop_macro("new") +_STL_RESTORE_CLANG_WARNINGS +#pragma warning(pop) +#pragma pack(pop) +#endif // _STL_COMPILER_PREPROCESSOR +#endif // __MSVC_BIT_UTILS_HPP diff --git a/stl/inc/__msvc_format_ucd_tables.hpp b/stl/inc/__msvc_format_ucd_tables.hpp index d6bc251ca3..3109d150ca 100644 --- a/stl/inc/__msvc_format_ucd_tables.hpp +++ b/stl/inc/__msvc_format_ucd_tables.hpp @@ -61,7 +61,6 @@ #if _STL_COMPILER_PREPROCESSOR #include -#include #include #pragma pack(push, _CRT_PACKING) @@ -79,7 +78,7 @@ struct _Unicode_property_data { uint16_t _Props_and_size[_NumRanges]; _NODISCARD constexpr _ValueEnum _Get_property_for_codepoint(const uint32_t _Code_point) const noexcept { ptrdiff_t _Upper_idx = _STD upper_bound(_Lower_bounds, _STD end(_Lower_bounds), _Code_point) - _Lower_bounds; - constexpr auto _No_value_constant = static_cast<_ValueEnum>((numeric_limits::max)()); + constexpr auto _No_value_constant = static_cast<_ValueEnum>(UINT8_MAX); if (_Upper_idx == 0) { return _No_value_constant; } diff --git a/stl/inc/__msvc_int128.hpp b/stl/inc/__msvc_int128.hpp index a537b05f7b..457db6257b 100644 --- a/stl/inc/__msvc_int128.hpp +++ b/stl/inc/__msvc_int128.hpp @@ -9,14 +9,13 @@ #include #if _STL_COMPILER_PREPROCESSOR +#include <__msvc_bit_utils.hpp> #include -#include #include #include _STL_INTRIN_HEADER #if _HAS_CXX20 -#include #include #define _ZERO_OR_NO_INIT #else // ^^^ _HAS_CXX20 / !_HAS_CXX20 vvv @@ -143,7 +142,7 @@ struct static constexpr void _Knuth_4_3_1_M( const uint32_t (&__u)[__m], const uint32_t (&__v)[__n], uint32_t (&__w)[__n + __m]) noexcept { #ifdef _ENABLE_STL_INTERNAL_CHECK - constexpr auto _Int_max = static_cast((numeric_limits::max)()); + constexpr auto _Int_max = static_cast(INT_MAX); _STL_INTERNAL_STATIC_ASSERT(__m <= _Int_max); _STL_INTERNAL_STATIC_ASSERT(__n <= _Int_max); #endif // _ENABLE_STL_INTERNAL_CHECK @@ -192,7 +191,7 @@ struct static constexpr void _Knuth_4_3_1_D(uint32_t* const __u, const size_t __u_size, const uint32_t* const __v, const size_t __v_size, uint32_t* const __q) noexcept { // Pre: __u + [0, __u_size), __v + [0, __v_size), and __q + [0, __u_size - __v_size) are all valid ranges - // constexpr auto _Int_max = static_cast((numeric_limits::max)()); + // constexpr auto _Int_max = static_cast(INT_MAX); // _STL_INTERNAL_CHECK(__v_size <= _Int_max); const int __n = static_cast(__v_size); // _STL_INTERNAL_CHECK(__u_size > __v_size); @@ -252,11 +251,7 @@ struct } #endif // _STL_128_DIV_INTRINSICS -#if _HAS_CXX20 - const auto __d = _STD countl_zero(static_cast(_Div >> 32)); -#else // ^^^ _HAS_CXX20 / !_HAS_CXX20 vvv - const auto __d = _Countl_zero_fallback(static_cast(_Div >> 32)); -#endif // ^^^ !_HAS_CXX20 ^^^ + const auto __d = _Countl_zero_internal(static_cast(_Div >> 32)); if (__d >= 32) { // _Div < 2^32 auto _Rem = (_High << 32) | (_Low >> 32); auto _Result = _Rem / static_cast(_Div); @@ -464,11 +459,7 @@ struct // _STL_INTERNAL_CHECK(_Den._Word[1] != 0); // _STL_INTERNAL_CHECK(_Num._Word[1] > _Den._Word[1]); // Normalize by shifting both left until _Den's high bit is set (So _Den's high digit is >= b / 2) -#if _HAS_CXX20 - const auto __d = _STD countl_zero(_Den._Word[1]); -#else // ^^^ _HAS_CXX20 / !_HAS_CXX20 vvv - const auto __d = _Countl_zero_fallback(_Den._Word[1]); -#endif // ^^^ !_HAS_CXX20 ^^^ + const auto __d = _Countl_zero_internal(_Den._Word[1]); _Den <<= __d; auto _High_digit = __d == 0 ? 0 : _Num._Word[1] >> (64 - __d); // This creates a third digit for _Num _Num <<= __d; @@ -513,11 +504,7 @@ struct } return __qhat; #else // ^^^ 128-bit intrinsics / no such intrinsics vvv -#if _HAS_CXX20 - auto __d = _STD countl_zero(_Den._Word[1]); -#else // ^^^ _HAS_CXX20 / !_HAS_CXX20 vvv - auto __d = _Countl_zero_fallback(_Den._Word[1]); -#endif // ^^^ !_HAS_CXX20 ^^^ + auto __d = _Countl_zero_internal(_Den._Word[1]); const bool _Three_word_den = __d >= 32; __d &= 31; uint32_t __u[5]{ @@ -597,11 +584,7 @@ struct // _STL_INTERNAL_CHECK(_Den._Word[1] != 0); // _STL_INTERNAL_CHECK(_Num._Word[1] > _Den._Word[1]); // Normalize by shifting both left until _Den's high bit is set (So _Den's high digit is >= b / 2) -#if _HAS_CXX20 - const auto __d = _STD countl_zero(_Den._Word[1]); -#else // ^^^ _HAS_CXX20 / !_HAS_CXX20 vvv - const auto __d = _Countl_zero_fallback(_Den._Word[1]); -#endif // ^^^ !_HAS_CXX20 ^^^ + const auto __d = _Countl_zero_internal(_Den._Word[1]); _Den <<= __d; auto _High_digit = __d == 0 ? 0 : _Num._Word[1] >> (64 - __d); // This creates a third digit for _Num _Num <<= __d; @@ -648,11 +631,7 @@ struct (void) _AddCarry64(_Carry, _Num._Word[1], _Den._Word[1], _Num._Word[1]); } #else // ^^^ 128-bit intrinsics / no such intrinsics vvv -#if _HAS_CXX20 - auto __d = _STD countl_zero(_Den._Word[1]); -#else // ^^^ _HAS_CXX20 / !_HAS_CXX20 vvv - auto __d = _Countl_zero_fallback(_Den._Word[1]); -#endif // ^^^ !_HAS_CXX20 ^^^ + auto __d = _Countl_zero_internal(_Den._Word[1]); const bool _Three_word_den = __d >= 32; __d &= 31; uint32_t __u[5]{ @@ -986,6 +965,9 @@ struct _Unsigned128 : _Base128 { } }; +_EXPORT_STD template +class numeric_limits; + template <> class numeric_limits<_Unsigned128> : public _Num_int_base { public: diff --git a/stl/inc/bit b/stl/inc/bit index d621beee0f..12a73d8be4 100644 --- a/stl/inc/bit +++ b/stl/inc/bit @@ -12,9 +12,8 @@ _EMIT_STL_WARNING(STL4038, "The contents of are available only with C++20 or later."); #else // ^^^ !_HAS_CXX20 / _HAS_CXX20 vvv +#include <__msvc_bit_utils.hpp> #include -#include -#include #include #include _STL_INTRIN_HEADER @@ -95,7 +94,7 @@ _NODISCARD constexpr _Ty bit_ceil(const _Ty _Val) noexcept /* strengthened */ { return _Ty{1}; } - const int _Num = numeric_limits<_Ty>::digits - _STD countl_zero(static_cast<_Ty>(_Val - 1)); + const int _Num = _Unsigned_integer_digits<_Ty> - _STD countl_zero(static_cast<_Ty>(_Val - 1)); if constexpr (sizeof(_Ty) < sizeof(unsigned int)) { // for types subject to integral promotion if (_STD is_constant_evaluated()) { @@ -108,7 +107,7 @@ _NODISCARD constexpr _Ty bit_ceil(const _Ty _Val) noexcept /* strengthened */ { // "Preconditions: N is representable as a value of type T." // "Remarks: A function call expression that violates the precondition in the Preconditions: element // is not a core constant expression (7.7)." - if (_Num == numeric_limits<_Ty>::digits) { + if (_Num == _Unsigned_integer_digits<_Ty>) { _Precondition_violation_in_bit_ceil(); } } @@ -123,12 +122,12 @@ _NODISCARD constexpr _Ty bit_floor(const _Ty _Val) noexcept { return 0; } - return static_cast<_Ty>(_Ty{1} << (numeric_limits<_Ty>::digits - 1 - _STD countl_zero(_Val))); + return static_cast<_Ty>(_Ty{1} << (_Unsigned_integer_digits<_Ty> - 1 - _STD countl_zero(_Val))); } _EXPORT_STD template , int> = 0> _NODISCARD constexpr int bit_width(const _Ty _Val) noexcept { - return numeric_limits<_Ty>::digits - _STD countl_zero(_Val); + return _Unsigned_integer_digits<_Ty> - _STD countl_zero(_Val); } _EXPORT_STD template , int> = 0> @@ -136,7 +135,7 @@ _NODISCARD constexpr _Ty rotr(_Ty _Val, int _Rotation) noexcept; _EXPORT_STD template , int> = 0> _NODISCARD constexpr _Ty rotl(const _Ty _Val, const int _Rotation) noexcept { - constexpr auto _Digits = numeric_limits<_Ty>::digits; + constexpr auto _Digits = _Unsigned_integer_digits<_Ty>; if (!_STD is_constant_evaluated()) { if constexpr (_Digits == 64) { @@ -164,7 +163,7 @@ _NODISCARD constexpr _Ty rotl(const _Ty _Val, const int _Rotation) noexcept { _EXPORT_STD template , int> /* = 0 */> _NODISCARD constexpr _Ty rotr(const _Ty _Val, const int _Rotation) noexcept { - constexpr auto _Digits = numeric_limits<_Ty>::digits; + constexpr auto _Digits = _Unsigned_integer_digits<_Ty>; if (!_STD is_constant_evaluated()) { if constexpr (_Digits == 64) { @@ -190,122 +189,6 @@ _NODISCARD constexpr _Ty rotr(const _Ty _Val, const int _Rotation) noexcept { } } -#if defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC)) - -extern "C" { -extern int __isa_available; -} - -template -_NODISCARD int _Countl_zero_lzcnt(const _Ty _Val) noexcept { - constexpr int _Digits = numeric_limits<_Ty>::digits; - - if constexpr (_Digits <= 16) { - return static_cast(__lzcnt16(_Val) - (16 - _Digits)); - } else if constexpr (_Digits == 32) { - return static_cast(__lzcnt(_Val)); - } else { -#ifdef _M_IX86 - const unsigned int _High = _Val >> 32; - const auto _Low = static_cast(_Val); - if (_High == 0) { - return 32 + _Countl_zero_lzcnt(_Low); - } else { - return _Countl_zero_lzcnt(_High); - } -#else // ^^^ _M_IX86 / !_M_IX86 vvv - return static_cast(__lzcnt64(_Val)); -#endif // _M_IX86 - } -} - -template -_NODISCARD int _Countl_zero_bsr(const _Ty _Val) noexcept { - constexpr int _Digits = numeric_limits<_Ty>::digits; - - unsigned long _Result; - if constexpr (_Digits <= 32) { - if (!_BitScanReverse(&_Result, _Val)) { - return _Digits; - } - } else { -#ifdef _M_IX86 - const unsigned int _High = _Val >> 32; - if (_BitScanReverse(&_Result, _High)) { - return static_cast(31 - _Result); - } - - const auto _Low = static_cast(_Val); - if (!_BitScanReverse(&_Result, _Low)) { - return _Digits; - } -#else // ^^^ _M_IX86 / !_M_IX86 vvv - if (!_BitScanReverse64(&_Result, _Val)) { - return _Digits; - } -#endif // _M_IX86 - } - return static_cast(_Digits - 1 - _Result); -} - -template -_NODISCARD int _Checked_x86_x64_countl_zero(const _Ty _Val) noexcept { -#ifdef __AVX2__ - return _Countl_zero_lzcnt(_Val); -#else // __AVX2__ - const bool _Definitely_have_lzcnt = __isa_available >= __ISA_AVAILABLE_AVX2; - if (_Definitely_have_lzcnt) { - return _Countl_zero_lzcnt(_Val); - } else { - return _Countl_zero_bsr(_Val); - } -#endif // __AVX2__ -} -#endif // defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC)) - -#if defined(_M_ARM) || defined(_M_ARM64) -#ifdef __clang__ // TRANSITION, GH-1586 -_NODISCARD constexpr int _Clang_arm_arm64_countl_zero(const unsigned short _Val) { - return __builtin_clzs(_Val); -} - -_NODISCARD constexpr int _Clang_arm_arm64_countl_zero(const unsigned int _Val) { - return __builtin_clz(_Val); -} - -_NODISCARD constexpr int _Clang_arm_arm64_countl_zero(const unsigned long _Val) { - return __builtin_clzl(_Val); -} - -_NODISCARD constexpr int _Clang_arm_arm64_countl_zero(const unsigned long long _Val) { - return __builtin_clzll(_Val); -} -#endif // TRANSITION, GH-1586 - -template -_NODISCARD int _Checked_arm_arm64_countl_zero(const _Ty _Val) noexcept { - constexpr int _Digits = numeric_limits<_Ty>::digits; - if (_Val == 0) { - return _Digits; - } - -#ifdef __clang__ // TRANSITION, GH-1586 - if constexpr (is_same_v, unsigned char>) { - return _Clang_arm_arm64_countl_zero(static_cast(_Val)) - - (numeric_limits::digits - _Digits); - } else { - return _Clang_arm_arm64_countl_zero(_Val); - } -#else // ^^^ workaround / no workaround vvv - if constexpr (_Digits <= 32) { - return static_cast(_CountLeadingZeros(_Val)) - (numeric_limits::digits - _Digits); - } else { - return static_cast(_CountLeadingZeros64(_Val)); - } -#endif // TRANSITION, GH-1586 -} -#endif // defined(_M_ARM) || defined(_M_ARM64) - _EXPORT_STD template , int> /* = 0 */> _NODISCARD constexpr int countl_zero(const _Ty _Val) noexcept { #if defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC)) diff --git a/stl/inc/bitset b/stl/inc/bitset index e9c5242319..2c38a5d243 100644 --- a/stl/inc/bitset +++ b/stl/inc/bitset @@ -8,8 +8,8 @@ #define _BITSET_ #include #if _STL_COMPILER_PREPROCESSOR +#include <__msvc_bit_utils.hpp> #include -#include #include #pragma pack(push, _CRT_PACKING) diff --git a/stl/inc/complex b/stl/inc/complex index d869f9d5e7..c95ed2d8b2 100644 --- a/stl/inc/complex +++ b/stl/inc/complex @@ -26,9 +26,8 @@ // TRANSITION, not using x86/x64 FMA intrinsics for Clang yet #elif defined(_M_IX86) || defined(_M_X64) #define _FMP_USING_X86_X64_INTRINSICS +#include <__msvc_bit_utils.hpp> #include -#include -extern "C" int __isa_available; extern "C" __m128d __cdecl _mm_fmsub_sd(__m128d, __m128d, __m128d); #endif // ^^^ defined(_M_IX86) || defined(_M_X64) ^^^ @@ -177,7 +176,7 @@ namespace _Float_multi_prec { #ifdef __AVX2__ return {_Prod0, _Sqr_error_x86_x64_fma(_Xval, _Prod0)}; #else // ^^^ defined(__AVX2__) / !defined(__AVX2__) vvv - const bool _Definitely_have_fma = __isa_available >= __ISA_AVAILABLE_AVX2; + const bool _Definitely_have_fma = __isa_available >= _Stl_isa_available_avx2; if (_Definitely_have_fma) { return {_Prod0, _Sqr_error_x86_x64_fma(_Xval, _Prod0)}; } else { diff --git a/stl/inc/header-units.json b/stl/inc/header-units.json index 345af8adf5..0966ec2acc 100644 --- a/stl/inc/header-units.json +++ b/stl/inc/header-units.json @@ -5,6 +5,7 @@ "Version": "1.0", "BuildAsHeaderUnits": [ // "__msvc_all_public_headers.hpp", // for testing, not production + "__msvc_bit_utils.hpp", "__msvc_chrono.hpp", "__msvc_cxx_stdatomic.hpp", "__msvc_filebuf.hpp", diff --git a/stl/inc/limits b/stl/inc/limits index b957442428..ac59bbaf2c 100644 --- a/stl/inc/limits +++ b/stl/inc/limits @@ -11,23 +11,10 @@ #include #include #include -#include #include #include _STL_INTRIN_HEADER -// TRANSITION, GH-2129, move down to _Arm64_popcount -#if (defined(_M_ARM64) || defined(_M_ARM64EC)) && !defined(_M_CEE_PURE) && !defined(__CUDACC__) \ - && !defined(__INTEL_COMPILER) && !defined(__clang__) // TRANSITION, LLVM-51488 -#define _HAS_NEON_INTRINSICS 1 -#else // ^^^ intrinsics available / intrinsics unavailable vvv -#define _HAS_NEON_INTRINSICS 0 -#endif // ^^^ intrinsics unavailable ^^^ - -#if _HAS_NEON_INTRINSICS -#include // TRANSITION, GH-2129 -#endif - #pragma pack(push, _CRT_PACKING) #pragma warning(push, _STL_WARNING_LEVEL) #pragma warning(disable : _STL_DISABLED_WARNINGS) @@ -1006,284 +993,6 @@ public: static constexpr int min_exponent10 = LDBL_MIN_10_EXP; }; -// Implementation of countl_zero without using specialized CPU instructions. -// Used at compile time and when said instructions are not supported. -// see "Hacker's Delight" section 5-3 -template -_NODISCARD constexpr int _Countl_zero_fallback(_Ty _Val) noexcept { - _Ty _Yy = 0; - - unsigned int _Nn = numeric_limits<_Ty>::digits; - unsigned int _Cc = numeric_limits<_Ty>::digits / 2; - do { - _Yy = static_cast<_Ty>(_Val >> _Cc); - if (_Yy != 0) { - _Nn -= _Cc; - _Val = _Yy; - } - _Cc >>= 1; - } while (_Cc != 0); - return static_cast(_Nn) - static_cast(_Val); -} - -// Implementation of countr_zero without using specialized CPU instructions. -// Used at compile time and when said instructions are not supported. -// see "Hacker's Delight" section 5-4 -template -_NODISCARD constexpr int _Countr_zero_fallback(const _Ty _Val) noexcept { - constexpr int _Digits = numeric_limits<_Ty>::digits; - return _Digits - _Countl_zero_fallback(static_cast<_Ty>(static_cast<_Ty>(~_Val) & static_cast<_Ty>(_Val - 1))); -} - -// Implementation of popcount without using specialized CPU instructions. -// Used at compile time and when said instructions are not supported. -template -_NODISCARD constexpr int _Popcount_fallback(_Ty _Val) noexcept { - constexpr int _Digits = numeric_limits<_Ty>::digits; -#if defined(_M_IX86) || defined(_M_ARM) - if constexpr (_Digits == 64) { - // 64-bit bit operations on architectures without 64-bit registers are less efficient, - // hence we split the value so that it fits in 32-bit registers - return _Popcount_fallback(static_cast(_Val)) - + _Popcount_fallback(static_cast(_Val >> 32)); - } -#endif // defined(_M_IX86) || defined(_M_ARM) - // we static_cast these bit patterns in order to truncate them to the correct size - _Val = static_cast<_Ty>(_Val - ((_Val >> 1) & static_cast<_Ty>(0x5555'5555'5555'5555ull))); - _Val = static_cast<_Ty>((_Val & static_cast<_Ty>(0x3333'3333'3333'3333ull)) - + ((_Val >> 2) & static_cast<_Ty>(0x3333'3333'3333'3333ull))); - _Val = static_cast<_Ty>((_Val + (_Val >> 4)) & static_cast<_Ty>(0x0F0F'0F0F'0F0F'0F0Full)); - // Multiply by one in each byte, so that it will have the sum of all source bytes in the highest byte - _Val = static_cast<_Ty>(_Val * static_cast<_Ty>(0x0101'0101'0101'0101ull)); - // Extract highest byte - return static_cast(_Val >> (_Digits - 8)); -} - -#if (defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC))) && !defined(_M_CEE_PURE) && !defined(__CUDACC__) \ - && !defined(__INTEL_COMPILER) -#define _HAS_TZCNT_BSF_INTRINSICS 1 -#else // ^^^ intrinsics available / intrinsics unavailable vvv -#define _HAS_TZCNT_BSF_INTRINSICS 0 -#endif // ^^^ intrinsics unavailable ^^^ - -#if _HAS_TZCNT_BSF_INTRINSICS -extern "C" { -extern int __isa_available; -} - -#ifdef __clang__ -#define _TZCNT_U32 __builtin_ia32_tzcnt_u32 -#define _TZCNT_U64 __builtin_ia32_tzcnt_u64 -#else // ^^^ __clang__ / !__clang__ vvv -#define _TZCNT_U32 _tzcnt_u32 -#define _TZCNT_U64 _tzcnt_u64 -#endif // __clang__ - -template -_NODISCARD int _Countr_zero_tzcnt(const _Ty _Val) noexcept { - constexpr int _Digits = numeric_limits<_Ty>::digits; - constexpr _Ty _Max = (numeric_limits<_Ty>::max)(); - - if constexpr (_Digits <= 32) { - // Intended widening to int. This operation means that a narrow 0 will widen - // to 0xFFFF....FFFF0... instead of 0. We need this to avoid counting all the zeros - // of the wider type. - return static_cast(_TZCNT_U32(static_cast(~_Max | _Val))); - } else { -#ifdef _M_IX86 - const auto _Low = static_cast(_Val); - if (_Low == 0) { - const unsigned int _High = _Val >> 32; - return static_cast(32 + _TZCNT_U32(_High)); - } else { - return static_cast(_TZCNT_U32(_Low)); - } -#else // ^^^ _M_IX86 / !_M_IX86 vvv - return static_cast(_TZCNT_U64(_Val)); -#endif // _M_IX86 - } -} - -#undef _TZCNT_U32 -#undef _TZCNT_U64 - -template -_NODISCARD int _Countr_zero_bsf(const _Ty _Val) noexcept { - constexpr int _Digits = numeric_limits<_Ty>::digits; - constexpr _Ty _Max = (numeric_limits<_Ty>::max)(); - - unsigned long _Result; - if constexpr (_Digits <= 32) { - // Intended widening to int. This operation means that a narrow 0 will widen - // to 0xFFFF....FFFF0... instead of 0. We need this to avoid counting all the zeros - // of the wider type. - if (!_BitScanForward(&_Result, static_cast(~_Max | _Val))) { - return _Digits; - } - } else { -#ifdef _M_IX86 - const auto _Low = static_cast(_Val); - if (_BitScanForward(&_Result, _Low)) { - return static_cast(_Result); - } - - const unsigned int _High = _Val >> 32; - if (!_BitScanForward(&_Result, _High)) { - return _Digits; - } else { - return static_cast(_Result + 32); - } -#else // ^^^ _M_IX86 / !_M_IX86 vvv - if (!_BitScanForward64(&_Result, _Val)) { - return _Digits; - } -#endif // _M_IX86 - } - return static_cast(_Result); -} - -template -_NODISCARD int _Checked_x86_x64_countr_zero(const _Ty _Val) noexcept { -#ifdef __AVX2__ - return _Countr_zero_tzcnt(_Val); -#else // __AVX2__ - const bool _Definitely_have_tzcnt = __isa_available >= __ISA_AVAILABLE_AVX2; - if (_Definitely_have_tzcnt) { - return _Countr_zero_tzcnt(_Val); - } else { - return _Countr_zero_bsf(_Val); - } -#endif // __AVX2__ -} - -#endif // _HAS_TZCNT_BSF_INTRINSICS - -#if (defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC))) && !defined(_M_CEE_PURE) && !defined(__CUDACC__) \ - && !defined(__INTEL_COMPILER) -#define _HAS_POPCNT_INTRINSICS 1 -#else // ^^^ intrinsics available / intrinsics unavailable vvv -#define _HAS_POPCNT_INTRINSICS 0 -#endif // ^^^ intrinsics unavailable ^^^ - -#if _HAS_POPCNT_INTRINSICS -template -_NODISCARD int _Unchecked_x86_x64_popcount(const _Ty _Val) noexcept { - constexpr int _Digits = numeric_limits<_Ty>::digits; - if constexpr (_Digits <= 16) { - return static_cast(__popcnt16(_Val)); - } else if constexpr (_Digits == 32) { - return static_cast(__popcnt(_Val)); - } else { -#ifdef _M_IX86 - return static_cast(__popcnt(_Val >> 32) + __popcnt(static_cast(_Val))); -#else // ^^^ _M_IX86 / !_M_IX86 vvv - return static_cast(__popcnt64(_Val)); -#endif // _M_IX86 - } -} - -template -_NODISCARD int _Checked_x86_x64_popcount(const _Ty _Val) noexcept { -#ifndef __AVX__ - const bool _Definitely_have_popcnt = __isa_available >= __ISA_AVAILABLE_SSE42; - if (!_Definitely_have_popcnt) { - return _Popcount_fallback(_Val); - } -#endif // !defined(__AVX__) - return _Unchecked_x86_x64_popcount(_Val); -} -#endif // _HAS_POPCNT_INTRINSICS - -#if _HAS_NEON_INTRINSICS -_NODISCARD inline int _Arm64_popcount(const unsigned long long _Val) noexcept { - const __n64 _Temp = neon_cnt(__uint64ToN64_v(_Val)); - return neon_addv8(_Temp).n8_i8[0]; -} -#endif // _HAS_NEON_INTRINSICS - -template -constexpr bool _Is_standard_unsigned_integer = - _Is_any_of_v, unsigned char, unsigned short, unsigned int, unsigned long, unsigned long long>; - -template , int> = 0> -_NODISCARD _CONSTEXPR20 int _Countr_zero(const _Ty _Val) noexcept { -#if _HAS_TZCNT_BSF_INTRINSICS -#if _HAS_CXX20 - if (!_STD is_constant_evaluated()) -#endif // _HAS_CXX20 - { - return _Checked_x86_x64_countr_zero(_Val); - } -#endif // _HAS_TZCNT_BSF_INTRINSICS - return _Countr_zero_fallback(_Val); -} - -template -constexpr decltype(auto) _Select_countr_zero_impl(_Fn _Callback) { - // TRANSITION, DevCom-1527995: Lambdas in this function ensure inlining -#if _HAS_TZCNT_BSF_INTRINSICS && _HAS_CXX20 - if (!_STD is_constant_evaluated()) { -#ifdef __AVX2__ - return _Callback([](_Ty _Val) { return _Countr_zero_tzcnt(_Val); }); -#else // ^^^ AVX2 / not AVX2 vvv - const bool _Definitely_have_tzcnt = __isa_available >= __ISA_AVAILABLE_AVX2; - if (_Definitely_have_tzcnt) { - return _Callback([](_Ty _Val) { return _Countr_zero_tzcnt(_Val); }); - } else { - return _Callback([](_Ty _Val) { return _Countr_zero_bsf(_Val); }); - } -#endif // ^^^ not AVX2 ^^^ - } -#endif // ^^^ _HAS_TZCNT_BSF_INTRINSICS && _HAS_CXX20 ^^^ - // C++17 constexpr gcd() calls this function, so it should be constexpr unless we detect runtime evaluation. - return _Callback([](_Ty _Val) { return _Countr_zero_fallback(_Val); }); -} - -template , int> = 0> -_NODISCARD _CONSTEXPR20 int _Popcount(const _Ty _Val) noexcept { -#if _HAS_POPCNT_INTRINSICS || _HAS_NEON_INTRINSICS -#if _HAS_CXX20 - if (!_STD is_constant_evaluated()) -#endif // _HAS_CXX20 - { -#if _HAS_POPCNT_INTRINSICS - return _Checked_x86_x64_popcount(_Val); -#elif _HAS_NEON_INTRINSICS // ^^^ x86/x64 intrinsics available / ARM64 intrinsics available vvv - return _Arm64_popcount(_Val); -#endif // ^^^ ARM64 intrinsics available ^^^ - } -#endif // ^^^ any intrinsics available ^^^ - return _Popcount_fallback(_Val); -} - -template -_CONSTEXPR20 decltype(auto) _Select_popcount_impl(_Fn _Callback) { - // TRANSITION, DevCom-1527995: Lambdas in this function ensure inlining -#if _HAS_POPCNT_INTRINSICS || _HAS_NEON_INTRINSICS -#if _HAS_CXX20 - if (!_STD is_constant_evaluated()) -#endif // _HAS_CXX20 - { -#if _HAS_POPCNT_INTRINSICS -#ifndef __AVX__ - const bool _Definitely_have_popcnt = __isa_available >= __ISA_AVAILABLE_SSE42; - if (!_Definitely_have_popcnt) { - return _Callback([](_Ty _Val) { return _Popcount_fallback(_Val); }); - } -#endif // !defined(__AVX__) - return _Callback([](_Ty _Val) { return _Unchecked_x86_x64_popcount(_Val); }); -#elif _HAS_NEON_INTRINSICS // ^^^ x86/x64 intrinsics available / ARM64 intrinsics available vvv - return _Callback([](_Ty _Val) { return _Arm64_popcount(_Val); }); -#endif // ^^^ ARM64 intrinsics available ^^^ - } -#endif // ^^^ any intrinsics available ^^^ - return _Callback([](_Ty _Val) { return _Popcount_fallback(_Val); }); -} - -#undef _HAS_POPCNT_INTRINSICS -#undef _HAS_TZCNT_BSF_INTRINSICS -#undef _HAS_NEON_INTRINSICS - _STD_END #pragma pop_macro("new") _STL_RESTORE_CLANG_WARNINGS diff --git a/stl/inc/numeric b/stl/inc/numeric index de51fb2854..b787216382 100644 --- a/stl/inc/numeric +++ b/stl/inc/numeric @@ -11,9 +11,13 @@ #include #if _HAS_CXX17 -#include +#include <__msvc_bit_utils.hpp> #endif // _HAS_CXX17 +#if _HAS_CXX20 +#include +#endif // _HAS_CXX20 + #pragma pack(push, _CRT_PACKING) #pragma warning(push, _STL_WARNING_LEVEL) #pragma warning(disable : _STL_DISABLED_WARNINGS) @@ -648,6 +652,24 @@ _NODISCARD constexpr common_type_t<_Mt, _Nt> lcm(const _Mt _Mx, const _Nt _Nx) n #endif // _HAS_CXX17 #if _HAS_CXX20 +template +inline constexpr _Flt _Floating_max{}; +template <> +inline constexpr float _Floating_max = FLT_MAX; +template <> +inline constexpr double _Floating_max = DBL_MAX; +template <> +inline constexpr long double _Floating_max = LDBL_MAX; + +template +inline constexpr _Flt _Floating_min{}; +template <> +inline constexpr float _Floating_min = FLT_MIN; +template <> +inline constexpr double _Floating_min = DBL_MIN; +template <> +inline constexpr long double _Floating_min = LDBL_MIN; + _EXPORT_STD template && !is_same_v, bool>, int> = 0> _NODISCARD constexpr _Ty midpoint(const _Ty _Val1, const _Ty _Val2) noexcept { if constexpr (is_floating_point_v<_Ty>) { @@ -666,7 +688,7 @@ _NODISCARD constexpr _Ty midpoint(const _Ty _Val1, const _Ty _Val2) noexcept { } } - constexpr _Ty _High_limit = (numeric_limits<_Ty>::max)() / 2; + constexpr _Ty _High_limit = _Floating_max> / 2; const auto _Val1_a = _Float_abs(_Val1); const auto _Val2_a = _Float_abs(_Val2); if (_Val1_a <= _High_limit && _Val2_a <= _High_limit) { @@ -690,7 +712,7 @@ _NODISCARD constexpr _Ty midpoint(const _Ty _Val1, const _Ty _Val2) noexcept { // In the default rounding mode this less than one ULP difference will always be rounded away, so under // /fp:fast we could avoid these tests if we had some means of detecting it in the caller. - constexpr _Ty _Low_limit = (numeric_limits<_Ty>::min)() * 2; + constexpr _Ty _Low_limit = _Floating_min> * 2; if (_Val1_a < _Low_limit) { return _Val1 + _Val2 / 2; } diff --git a/stl/inc/random b/stl/inc/random index cdc97d29ea..6960de75eb 100644 --- a/stl/inc/random +++ b/stl/inc/random @@ -2169,7 +2169,7 @@ _NODISCARD _Flt _Float_upper_bound(_Ty _Val) { constexpr auto _Mask = static_cast<_Ty>(-1) << (_Ty_digits - _Flt_digits); #ifdef _M_CEE_PURE constexpr auto _Ty_32or64_digits = numeric_limits<_Ty_32or64>::digits; - const auto _Log_plus1 = _Ty_32or64_digits - _Countl_zero_fallback(static_cast<_Ty_32or64>(_Val | _Ty{1})); + const auto _Log_plus1 = _Ty_32or64_digits - _Countl_zero_internal(static_cast<_Ty_32or64>(_Val | _Ty{1})); #else // _M_CEE_PURE const auto _Log_plus1 = _Bit_scan_reverse(static_cast<_Ty_32or64>(_Val | _Ty{1})); #endif // _M_CEE_PURE diff --git a/tests/std/tests/GH_001411_core_headers/test.cpp b/tests/std/tests/GH_001411_core_headers/test.cpp index ced205530c..a3cbc49e9b 100644 --- a/tests/std/tests/GH_001411_core_headers/test.cpp +++ b/tests/std/tests/GH_001411_core_headers/test.cpp @@ -19,11 +19,24 @@ #include #endif // _HAS_CXX17 +#if _HAS_CXX23 +#include <__msvc_print.hpp> +#endif // _HAS_CXX23 + +// <__msvc_bit_utils.hpp> is included by and // <__msvc_iter_core.hpp> is included by // should not be included outside of // is included by // is included by every public core header +// Also test GH-3692 "Including emits a non-reserved name" +#include + +#define STATIC_ASSERT(...) static_assert(__VA_ARGS__, #__VA_ARGS__) + +STATIC_ASSERT(std::_Stl_isa_available_sse42 == __ISA_AVAILABLE_SSE42); +STATIC_ASSERT(std::_Stl_isa_available_avx2 == __ISA_AVAILABLE_AVX2); + #ifdef _YVALS #error Core headers should not include . #endif From 42fa4a96f04c7680108c9f34a5c07c1973e89e50 Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Sat, 20 May 2023 17:59:51 +0800 Subject: [PATCH 2/9] Forgotten change in `` --- stl/inc/vector | 1 + 1 file changed, 1 insertion(+) diff --git a/stl/inc/vector b/stl/inc/vector index b0942c2d2b..28a97bbc61 100644 --- a/stl/inc/vector +++ b/stl/inc/vector @@ -8,6 +8,7 @@ #define _VECTOR_ #include #if _STL_COMPILER_PREPROCESSOR +#include <__msvc_bit_utils.hpp> #include #if _HAS_CXX17 From 2ca8acf22f22363c6152c337b7e01e335a59fb6b Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Sat, 20 May 2023 18:20:52 +0800 Subject: [PATCH 3/9] Restore inclusion dependency in `<__msvc_int128.hpp>` --- stl/inc/__msvc_int128.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/stl/inc/__msvc_int128.hpp b/stl/inc/__msvc_int128.hpp index 457db6257b..f9d6bd278f 100644 --- a/stl/inc/__msvc_int128.hpp +++ b/stl/inc/__msvc_int128.hpp @@ -11,6 +11,7 @@ #if _STL_COMPILER_PREPROCESSOR #include <__msvc_bit_utils.hpp> #include +#include #include #include _STL_INTRIN_HEADER @@ -965,9 +966,6 @@ struct _Unsigned128 : _Base128 { } }; -_EXPORT_STD template -class numeric_limits; - template <> class numeric_limits<_Unsigned128> : public _Num_int_base { public: From c5589b95c4dd9aaa170402726fa36fdfaad56564 Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Sat, 20 May 2023 18:24:47 +0800 Subject: [PATCH 4/9] Manually include `` in `` --- stl/inc/ranges | 1 + 1 file changed, 1 insertion(+) diff --git a/stl/inc/ranges b/stl/inc/ranges index cc5c841ba9..e039a10890 100644 --- a/stl/inc/ranges +++ b/stl/inc/ranges @@ -12,6 +12,7 @@ _EMIT_STL_WARNING(STL4038, "The contents of are available only with C++20 or later."); #else // ^^^ !defined(__cpp_lib_ranges) / defined(__cpp_lib_ranges) vvv #include <__msvc_int128.hpp> +#include #include #include #include From f298492f1f55178c22f3ce5e62f52a9fe85acdfb Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Sat, 20 May 2023 18:56:52 +0800 Subject: [PATCH 5/9] Manually include `` in a test file --- .../GH_002431_byte_range_find_with_unreachable_sentinel/test.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/std/tests/GH_002431_byte_range_find_with_unreachable_sentinel/test.cpp b/tests/std/tests/GH_002431_byte_range_find_with_unreachable_sentinel/test.cpp index 5650aae404..efb5a03368 100644 --- a/tests/std/tests/GH_002431_byte_range_find_with_unreachable_sentinel/test.cpp +++ b/tests/std/tests/GH_002431_byte_range_find_with_unreachable_sentinel/test.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include From 071e2ec19bbe6f63fa646772ab280900d455ca89 Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Sat, 20 May 2023 22:59:18 +0800 Subject: [PATCH 6/9] Specific test for identifier `ISA_AVAILABILITY` --- .../GH_002206_unreserved_names/test.compile.pass.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/std/tests/GH_002206_unreserved_names/test.compile.pass.cpp b/tests/std/tests/GH_002206_unreserved_names/test.compile.pass.cpp index fbd1149a46..9325dad78e 100644 --- a/tests/std/tests/GH_002206_unreserved_names/test.compile.pass.cpp +++ b/tests/std/tests/GH_002206_unreserved_names/test.compile.pass.cpp @@ -1,9 +1,10 @@ // Copyright (c) Microsoft Corporation. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -#define nsec delete -#define sec delete -#define xtime delete -#define xtime_get delete +#define ISA_AVAILABILITY delete +#define nsec delete +#define sec delete +#define xtime delete +#define xtime_get delete #include <__msvc_all_public_headers.hpp> From 039695a47ddac1f641c9e9e5626888d0579df69e Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Wed, 24 May 2023 10:49:41 +0800 Subject: [PATCH 7/9] Move `_Countl_zero_internal` to downstream Also - add `_STL_INTERNAL_STATIC_ASSERT`; - add `_INLINE_VAR` to `_Is_standard_unsigned_integer`. --- stl/inc/__msvc_bit_utils.hpp | 17 +---------------- stl/inc/__msvc_int128.hpp | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/stl/inc/__msvc_bit_utils.hpp b/stl/inc/__msvc_bit_utils.hpp index bc23847c08..8914fd00b8 100644 --- a/stl/inc/__msvc_bit_utils.hpp +++ b/stl/inc/__msvc_bit_utils.hpp @@ -175,21 +175,6 @@ _NODISCARD int _Checked_arm_arm64_countl_zero(const _Ty _Val) noexcept { } #endif // defined(_M_ARM) || defined(_M_ARM64) -template -_NODISCARD constexpr int _Countl_zero_internal(const _Ty _Val) noexcept { -#if defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC)) - if (!_Is_constant_evaluated()) { - return _Checked_x86_x64_countl_zero(_Val); - } -#elif defined(_M_ARM) || defined(_M_ARM64) - if (!_Is_constant_evaluated()) { - return _Checked_arm_arm64_countl_zero(_Val); - } -#endif // defined(_M_ARM) || defined(_M_ARM64) - - return _Countl_zero_fallback(_Val); -} - // Implementation of countr_zero without using specialized CPU instructions. // Used at compile time and when said instructions are not supported. // see "Hacker's Delight" section 5-4 @@ -362,7 +347,7 @@ _NODISCARD inline int _Arm64_popcount(const unsigned long long _Val) noexcept { #endif // _HAS_NEON_INTRINSICS template -constexpr bool _Is_standard_unsigned_integer = +_INLINE_VAR constexpr bool _Is_standard_unsigned_integer = _Is_any_of_v, unsigned char, unsigned short, unsigned int, unsigned long, unsigned long long>; template , int> = 0> diff --git a/stl/inc/__msvc_int128.hpp b/stl/inc/__msvc_int128.hpp index f9d6bd278f..f8ac4f3989 100644 --- a/stl/inc/__msvc_int128.hpp +++ b/stl/inc/__msvc_int128.hpp @@ -56,6 +56,22 @@ _STD_BEGIN #define _STL_128_DIV_INTRINSICS 0 #endif // ^^^ intrinsics unavailable ^^^ +template +_NODISCARD constexpr int _Countl_zero_internal(const _Ty _Val) noexcept { + _STL_INTERNAL_STATIC_ASSERT(_Is_standard_unsigned_integer<_Ty>); +#if defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC)) + if (!_Is_constant_evaluated()) { + return _Checked_x86_x64_countl_zero(_Val); + } +#elif defined(_M_ARM) || defined(_M_ARM64) + if (!_Is_constant_evaluated()) { + return _Checked_arm_arm64_countl_zero(_Val); + } +#endif // defined(_M_ARM) || defined(_M_ARM64) + + return _Countl_zero_fallback(_Val); +} + struct #ifndef _M_ARM alignas(16) From daa6e6a31542b11329904e497c7af9de841b0de6 Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Fri, 26 May 2023 13:19:30 +0800 Subject: [PATCH 8/9] Include `` in `` only `#if _HAS_CXX23` --- stl/inc/ranges | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/ranges b/stl/inc/ranges index e039a10890..cbcb041344 100644 --- a/stl/inc/ranges +++ b/stl/inc/ranges @@ -12,7 +12,6 @@ _EMIT_STL_WARNING(STL4038, "The contents of are available only with C++20 or later."); #else // ^^^ !defined(__cpp_lib_ranges) / defined(__cpp_lib_ranges) vvv #include <__msvc_int128.hpp> -#include #include #include #include @@ -22,6 +21,7 @@ _EMIT_STL_WARNING(STL4038, "The contents of are available only with C++ #if _HAS_CXX23 #include +#include #endif // _HAS_CXX23 #pragma pack(push, _CRT_PACKING) From 8216003a8fe044cd6e17112d0c241eb0148a3ff9 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 14 Jun 2023 13:25:18 -0700 Subject: [PATCH 9/9] Add `_HAS_COUNTL_ZERO_INTRINSICS` guard. --- stl/inc/__msvc_bit_utils.hpp | 8 ++++++++ stl/inc/__msvc_int128.hpp | 2 ++ stl/inc/bit | 2 ++ 3 files changed, 12 insertions(+) diff --git a/stl/inc/__msvc_bit_utils.hpp b/stl/inc/__msvc_bit_utils.hpp index 8914fd00b8..04f45b03f2 100644 --- a/stl/inc/__msvc_bit_utils.hpp +++ b/stl/inc/__msvc_bit_utils.hpp @@ -64,6 +64,13 @@ _NODISCARD constexpr int _Countl_zero_fallback(_Ty _Val) noexcept { return static_cast(_Nn) - static_cast(_Val); } +#if !defined(_M_CEE_PURE) && !defined(__CUDACC__) && !defined(__INTEL_COMPILER) +#define _HAS_COUNTL_ZERO_INTRINSICS 1 +#else // ^^^ intrinsics available / intrinsics unavailable vvv +#define _HAS_COUNTL_ZERO_INTRINSICS 0 +#endif // ^^^ intrinsics unavailable ^^^ + +#if _HAS_COUNTL_ZERO_INTRINSICS #if defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC)) template _NODISCARD int _Countl_zero_lzcnt(const _Ty _Val) noexcept { @@ -174,6 +181,7 @@ _NODISCARD int _Checked_arm_arm64_countl_zero(const _Ty _Val) noexcept { #endif // TRANSITION, GH-1586 } #endif // defined(_M_ARM) || defined(_M_ARM64) +#endif // _HAS_COUNTL_ZERO_INTRINSICS // Implementation of countr_zero without using specialized CPU instructions. // Used at compile time and when said instructions are not supported. diff --git a/stl/inc/__msvc_int128.hpp b/stl/inc/__msvc_int128.hpp index f8ac4f3989..61ea8e29b1 100644 --- a/stl/inc/__msvc_int128.hpp +++ b/stl/inc/__msvc_int128.hpp @@ -59,6 +59,7 @@ _STD_BEGIN template _NODISCARD constexpr int _Countl_zero_internal(const _Ty _Val) noexcept { _STL_INTERNAL_STATIC_ASSERT(_Is_standard_unsigned_integer<_Ty>); +#if _HAS_COUNTL_ZERO_INTRINSICS #if defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC)) if (!_Is_constant_evaluated()) { return _Checked_x86_x64_countl_zero(_Val); @@ -68,6 +69,7 @@ _NODISCARD constexpr int _Countl_zero_internal(const _Ty _Val) noexcept { return _Checked_arm_arm64_countl_zero(_Val); } #endif // defined(_M_ARM) || defined(_M_ARM64) +#endif // _HAS_COUNTL_ZERO_INTRINSICS return _Countl_zero_fallback(_Val); } diff --git a/stl/inc/bit b/stl/inc/bit index 12a73d8be4..262a59ebbd 100644 --- a/stl/inc/bit +++ b/stl/inc/bit @@ -191,6 +191,7 @@ _NODISCARD constexpr _Ty rotr(const _Ty _Val, const int _Rotation) noexcept { _EXPORT_STD template , int> /* = 0 */> _NODISCARD constexpr int countl_zero(const _Ty _Val) noexcept { +#if _HAS_COUNTL_ZERO_INTRINSICS #if defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC)) if (!_STD is_constant_evaluated()) { return _Checked_x86_x64_countl_zero(_Val); @@ -200,6 +201,7 @@ _NODISCARD constexpr int countl_zero(const _Ty _Val) noexcept { return _Checked_arm_arm64_countl_zero(_Val); } #endif // defined(_M_ARM) || defined(_M_ARM64) +#endif // _HAS_COUNTL_ZERO_INTRINSICS return _Countl_zero_fallback(_Val); }