From cd7420ad28734715bb81ec55219668dfd3a5e0a5 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 6 Aug 2023 17:07:00 +0300 Subject: [PATCH 01/37] vectorize `min/max_element` using SSE4.1/AVX for floats Resolves #2439 --- stl/inc/algorithm | 8 +- stl/inc/xutility | 22 +- stl/src/vector_algorithms.cpp | 379 ++++++++++++++++-- .../VSO_0000000_vector_algorithms/test.cpp | 17 + 4 files changed, 381 insertions(+), 45 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index c72b4042ce..43b7624511 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -54,6 +54,8 @@ _Min_max_element_t __stdcall __std_minmax_element_1(const void* _First, const vo _Min_max_element_t __stdcall __std_minmax_element_2(const void* _First, const void* _Last, bool _Signed) noexcept; _Min_max_element_t __stdcall __std_minmax_element_4(const void* _First, const void* _Last, bool _Signed) noexcept; _Min_max_element_t __stdcall __std_minmax_element_8(const void* _First, const void* _Last, bool _Signed) noexcept; +_Min_max_element_t __stdcall __std_minmax_element_f(const void* _First, const void* _Last, bool _Unused) noexcept; +_Min_max_element_t __stdcall __std_minmax_element_d(const void* _First, const void* _Last, bool _Unused) noexcept; _END_EXTERN_C template @@ -62,7 +64,11 @@ _STD pair<_Ty*, _Ty*> __std_minmax_element(_Ty* _First, _Ty* _Last) noexcept { _Min_max_element_t _Res; - if constexpr (sizeof(_Ty) == 1) { + if constexpr (_STD is_same_v<_Ty, float>) { + _Res = __std_minmax_element_f(_First, _Last, false); + } else if constexpr (_STD is_same_v<_Ty, double> || _STD is_same_v<_Ty, long double>) { + _Res = __std_minmax_element_d(_First, _Last, false); + } else if constexpr(sizeof(_Ty) == 1) { _Res = __std_minmax_element_1(_First, _Last, _Signed); } else if constexpr (sizeof(_Ty) == 2) { _Res = __std_minmax_element_2(_First, _Last, _Signed); diff --git a/stl/inc/xutility b/stl/inc/xutility index 6c588a6b58..2a0baa250d 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -87,11 +87,15 @@ const void* __stdcall __std_min_element_1(const void* _First, const void* _Last, const void* __stdcall __std_min_element_2(const void* _First, const void* _Last, bool _Signed) noexcept; const void* __stdcall __std_min_element_4(const void* _First, const void* _Last, bool _Signed) noexcept; const void* __stdcall __std_min_element_8(const void* _First, const void* _Last, bool _Signed) noexcept; +const void* __stdcall __std_min_element_f(const void* _First, const void* _Last, bool _Unused) noexcept; +const void* __stdcall __std_min_element_d(const void* _First, const void* _Last, bool _Unused) noexcept; const void* __stdcall __std_max_element_1(const void* _First, const void* _Last, bool _Signed) noexcept; const void* __stdcall __std_max_element_2(const void* _First, const void* _Last, bool _Signed) noexcept; const void* __stdcall __std_max_element_4(const void* _First, const void* _Last, bool _Signed) noexcept; const void* __stdcall __std_max_element_8(const void* _First, const void* _Last, bool _Signed) noexcept; +const void* __stdcall __std_max_element_f(const void* _First, const void* _Last, bool _Unused) noexcept; +const void* __stdcall __std_max_element_d(const void* _First, const void* _Last, bool _Unused) noexcept; _END_EXTERN_C template @@ -157,7 +161,11 @@ template _Ty* __std_min_element(_Ty* _First, _Ty* _Last) noexcept { constexpr bool _Signed = _STD is_signed_v<_Ty>; - if constexpr (sizeof(_Ty) == 1) { + if constexpr (_STD is_same_v<_Ty, float>) { + return const_cast<_Ty*>(static_cast(__std_min_element_f(_First, _Last, false))); + } else if constexpr (_STD is_same_v<_Ty, double> || _STD is_same_v<_Ty, long double>) { + return const_cast<_Ty*>(static_cast(__std_min_element_d(_First, _Last, false))); + }else if constexpr(sizeof(_Ty) == 1) { return const_cast<_Ty*>(static_cast(__std_min_element_1(_First, _Last, _Signed))); } else if constexpr (sizeof(_Ty) == 2) { return const_cast<_Ty*>(static_cast(__std_min_element_2(_First, _Last, _Signed))); @@ -174,7 +182,11 @@ template _Ty* __std_max_element(_Ty* _First, _Ty* _Last) noexcept { constexpr bool _Signed = _STD is_signed_v<_Ty>; - if constexpr (sizeof(_Ty) == 1) { + if constexpr (_STD is_same_v<_Ty, float>) { + return const_cast<_Ty*>(static_cast(__std_max_element_f(_First, _Last, false))); + } else if constexpr (_STD is_same_v<_Ty, double> || _STD is_same_v<_Ty, long double>) { + return const_cast<_Ty*>(static_cast(__std_max_element_d(_First, _Last, false))); + } else if constexpr (sizeof(_Ty) == 1) { return const_cast<_Ty*>(static_cast(__std_max_element_1(_First, _Last, _Signed))); } else if constexpr (sizeof(_Ty) == 2) { return const_cast<_Ty*>(static_cast(__std_max_element_2(_First, _Last, _Signed))); @@ -6626,7 +6638,11 @@ template > _INLINE_VAR constexpr bool _Is_min_max_optimization_safe = // Activate the vector algorithms for min_/max_element? _Iterator_is_contiguous<_Iter> // The iterator must be contiguous so we can get raw pointers. && !_Iterator_is_volatile<_Iter> // The iterator must not be volatile. - && conjunction_v, is_pointer<_Elem>>, // Element is of integral or pointer type. + && conjunction_v, // Element is floating point or... +#endif + is_integral<_Elem>, is_pointer<_Elem>>, // ... integral or pointer type. disjunction< // And either of the following: #ifdef __cpp_lib_concepts is_same<_Pr, _RANGES less>, // predicate is ranges::less diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 7c1d0eb714..cafd58d746 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. + // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #ifdef _M_CEE_PURE @@ -533,6 +534,19 @@ namespace { _Mode_both = _Mode_min | _Mode_max, }; + template <_Min_max_mode _Mode, class _Ty> + auto _Minmax_tail_f( + const void* _First, const void* _Last, _Min_max_element_t& _Res, _Ty _Cur_min, _Ty _Cur_max) noexcept { + + if constexpr (_Mode == _Mode_min) { + return _Min_tail(_First, _Last, _Res._Min, static_cast<_Ty>(_Cur_min)); + } else if constexpr (_Mode == _Mode_max) { + return _Max_tail(_First, _Last, _Res._Max, static_cast<_Ty>(_Cur_max)); + } else { + return _Both_tail(_First, _Last, _Res, static_cast<_Ty>(_Cur_min), static_cast<_Ty>(_Cur_max)); + } + } + template <_Min_max_mode _Mode, class _STy, class _UTy> auto _Minmax_tail(const void* _First, const void* _Last, _Min_max_element_t& _Res, bool _Sign, _UTy _Cur_min, _UTy _Cur_max) noexcept { @@ -561,6 +575,8 @@ namespace { } struct _Minmax_traits_1 { + static constexpr bool _Is_floating = false; + using _Signed_t = int8_t; using _Unsigned_t = uint8_t; @@ -571,6 +587,10 @@ namespace { static constexpr bool _Has_portion_max = true; static constexpr size_t _Portion_max = 256; + static __m128i _Load(const void* _Src) { + return _mm_loadu_si128(reinterpret_cast(_Src)); + } + static __m128i _Sign_correction(const __m128i _Val, const bool _Sign) noexcept { alignas(16) static constexpr _Unsigned_t _Sign_corrections[2][16] = { {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, {}}; @@ -626,6 +646,10 @@ namespace { return _mm_cmpgt_epi8(_First, _Second); } + static __m128i _Cmp_eq_idx(const __m128i _First, const __m128i _Second) noexcept { + return _mm_cmpeq_epi8(_First, _Second); + } + static __m128i _Min(const __m128i _First, const __m128i _Second, __m128i) noexcept { return _mm_min_epi8(_First, _Second); } @@ -633,10 +657,16 @@ namespace { static __m128i _Max(const __m128i _First, const __m128i _Second, __m128i) noexcept { return _mm_max_epi8(_First, _Second); } + + static __m128i _Mask_cast(__m128i _Mask) noexcept { + return _Mask; + } #endif // !_M_ARM64EC }; struct _Minmax_traits_2 { + static constexpr bool _Is_floating = false; + using _Signed_t = int16_t; using _Unsigned_t = uint16_t; @@ -647,6 +677,10 @@ namespace { static constexpr bool _Has_portion_max = true; static constexpr size_t _Portion_max = 65536; + static __m128i _Load(const void* _Src) { + return _mm_loadu_si128(reinterpret_cast(_Src)); + } + static __m128i _Sign_correction(const __m128i _Val, const bool _Sign) noexcept { alignas(16) static constexpr _Unsigned_t _Sign_corrections[2][8] = { 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, {}}; @@ -703,6 +737,10 @@ namespace { return _mm_cmpgt_epi16(_First, _Second); } + static __m128i _Cmp_eq_idx(const __m128i _First, const __m128i _Second) noexcept { + return _mm_cmpeq_epi16(_First, _Second); + } + static __m128i _Min(const __m128i _First, const __m128i _Second, __m128i) noexcept { return _mm_min_epi16(_First, _Second); } @@ -710,10 +748,16 @@ namespace { static __m128i _Max(const __m128i _First, const __m128i _Second, __m128i) noexcept { return _mm_max_epi16(_First, _Second); } + + static __m128i _Mask_cast(__m128i _Mask) noexcept { + return _Mask; + } #endif // !_M_ARM64EC }; struct _Minmax_traits_4 { + static constexpr bool _Is_floating = false; + using _Signed_t = int32_t; using _Unsigned_t = uint32_t; @@ -728,6 +772,10 @@ namespace { static constexpr size_t _Portion_max = 0x1'0000'0000ULL; #endif // ^^^ 64-bit ^^^ + static __m128i _Load(const void* _Src) { + return _mm_loadu_si128(reinterpret_cast(_Src)); + } + static __m128i _Sign_correction(const __m128i _Val, const bool _Sign) noexcept { alignas(16) static constexpr _Unsigned_t _Sign_corrections[2][4] = { 0x8000'0000UL, 0x8000'0000UL, 0x8000'0000UL, 0x8000'0000UL, {}}; @@ -780,6 +828,10 @@ namespace { return _mm_cmpgt_epi32(_First, _Second); } + static __m128i _Cmp_eq_idx(const __m128i _First, const __m128i _Second) noexcept { + return _mm_cmpeq_epi32(_First, _Second); + } + static __m128i _Min(const __m128i _First, const __m128i _Second, __m128i) noexcept { return _mm_min_epi32(_First, _Second); } @@ -787,10 +839,16 @@ namespace { static __m128i _Max(const __m128i _First, const __m128i _Second, __m128i) noexcept { return _mm_max_epi32(_First, _Second); } + + static __m128i _Mask_cast(__m128i _Mask) noexcept { + return _Mask; + } #endif // !_M_ARM64EC }; struct _Minmax_traits_8 { + static constexpr bool _Is_floating = false; + using _Signed_t = int64_t; using _Unsigned_t = uint64_t; @@ -800,6 +858,10 @@ namespace { #ifndef _M_ARM64EC static constexpr bool _Has_portion_max = false; + static __m128i _Load(const void* _Src) { + return _mm_loadu_si128(reinterpret_cast(_Src)); + } + static __m128i _Sign_correction(const __m128i _Val, const bool _Sign) { alignas(16) static constexpr _Unsigned_t _Sign_corrections[2][2] = { 0x8000'0000'0000'0000ULL, 0x8000'0000'0000'0000ULL, {}}; @@ -860,6 +922,10 @@ namespace { return _mm_cmpgt_epi64(_First, _Second); } + static __m128i _Cmp_eq_idx(const __m128i _First, const __m128i _Second) noexcept { + return _mm_cmpeq_epi64(_First, _Second); + } + static __m128i _Min(const __m128i _First, const __m128i _Second, const __m128i _Mask) noexcept { return _mm_blendv_epi8(_First, _Second, _Mask); } @@ -867,6 +933,206 @@ namespace { static __m128i _Max(const __m128i _First, const __m128i _Second, const __m128i _Mask) noexcept { return _mm_blendv_epi8(_First, _Second, _Mask); } + + static __m128i _Mask_cast(__m128i _Mask) noexcept { + return _Mask; + } +#endif // !_M_ARM64EC + }; + + struct _Minmax_traits_f { + static constexpr bool _Is_floating = true; + + using _Signed_t = float; + + static constexpr _Signed_t _Init_min_val = -__builtin_huge_valf(); + static constexpr _Signed_t _Init_max_val = __builtin_huge_valf(); + +#ifndef _M_ARM64EC +#ifdef _M_IX86 + static constexpr bool _Has_portion_max = false; +#else // ^^^ 32-bit / 64-bit vvv + static constexpr bool _Has_portion_max = true; + static constexpr size_t _Portion_max = 0x1'0000'0000ULL; +#endif // ^^^ 64-bit ^^^ + + static __m128 _Load(const void* _Src) { + return _mm_loadu_ps(reinterpret_cast(_Src)); + } + + static __m128 _Sign_correction(const __m128 _Val, const bool) noexcept { + return _Val; + } + + static __m128i _Inc(__m128i _Idx) noexcept { + return _mm_add_epi32(_Idx, _mm_set1_epi32(1)); + } + + template + static __m128 _H_func(const __m128 _Cur, _Fn _Funct) noexcept { + __m128 _H_min_val = _Cur; + _H_min_val = _Funct(_H_min_val, _mm_shuffle_ps(_H_min_val, _H_min_val, _MM_SHUFFLE(1, 0, 3, 2))); + _H_min_val = _Funct(_H_min_val, _mm_shuffle_ps(_H_min_val, _H_min_val, _MM_SHUFFLE(2, 3, 0, 1))); + return _H_min_val; + } + + template + static __m128i _H_func_u(const __m128i _Cur, _Fn _Funct) noexcept { + __m128i _H_min_val = _Cur; + _H_min_val = _Funct(_H_min_val, _mm_shuffle_epi32(_H_min_val, _MM_SHUFFLE(1, 0, 3, 2))); + _H_min_val = _Funct(_H_min_val, _mm_shuffle_epi32(_H_min_val, _MM_SHUFFLE(2, 3, 0, 1))); + return _H_min_val; + } + + static __m128 _H_min(const __m128 _Cur) noexcept { + return _H_func(_Cur, [](__m128 _First, __m128 _Second) { return _mm_min_ps(_First, _Second); }); + } + + static __m128 _H_max(const __m128 _Cur) noexcept { + return _H_func(_Cur, [](__m128 _First, __m128 _Second) { return _mm_max_ps(_First, _Second); }); + } + + static __m128i _H_min_u(const __m128i _Cur) noexcept { + return _H_func_u(_Cur, [](__m128i _First, __m128i _Second) { return _mm_min_epu32(_First, _Second); }); + } + + static __m128i _H_max_u(const __m128i _Cur) noexcept { + return _H_func_u(_Cur, [](__m128i _First, __m128i _Second) { return _mm_max_epu32(_First, _Second); }); + } + + static float _Get_any(const __m128 _Cur) noexcept { + return _mm_cvtss_f32(_Cur); + } + + static uint32_t _Get_v_pos(const __m128i _Idx, const unsigned long _H_pos) noexcept { + uint32_t _Array[4]; + _mm_storeu_si128(reinterpret_cast<__m128i*>(&_Array), _Idx); + return _Array[_H_pos >> 2]; + } + + static __m128 _Cmp_eq(const __m128 _First, const __m128 _Second) noexcept { + return _mm_cmpeq_ps(_First, _Second); + } + + static __m128 _Cmp_gt(const __m128 _First, const __m128 _Second) noexcept { + return _mm_cmpgt_ps(_First, _Second); + } + + static __m128i _Cmp_eq_idx(const __m128i _First, const __m128i _Second) noexcept { + return _mm_cmpeq_epi32(_First, _Second); + } + + static __m128 _Min(const __m128 _First, const __m128 _Second, __m128) noexcept { + return _mm_min_ps(_First, _Second); + } + + static __m128 _Max(const __m128 _First, const __m128 _Second, __m128) noexcept { + return _mm_max_ps(_First, _Second); + } + + static __m128i _Mask_cast(__m128 _Mask) noexcept { + return _mm_castps_si128(_Mask); + } +#endif // !_M_ARM64EC + }; + + struct _Minmax_traits_d { + static constexpr bool _Is_floating = true; + + using _Signed_t = double; + + static constexpr _Signed_t _Init_min_val = -__builtin_huge_val(); + static constexpr _Signed_t _Init_max_val = __builtin_huge_val(); + +#ifndef _M_ARM64EC + static constexpr bool _Has_portion_max = false; + + static __m128d _Load(const void* _Src) { + return _mm_loadu_pd(reinterpret_cast(_Src)); + } + + static __m128d _Sign_correction(const __m128d _Val, const bool) noexcept { + return _Val; + } + + static __m128i _Inc(__m128i _Idx) noexcept { + return _mm_add_epi32(_Idx, _mm_set1_epi64x(1)); + } + + template + static __m128d _H_func(const __m128d _Cur, _Fn _Funct) noexcept { + __m128d _H_min_val = _Cur; + _H_min_val = _Funct(_H_min_val, _mm_shuffle_pd(_H_min_val, _H_min_val, 1)); + return _H_min_val; + } + + template + static __m128i _H_func_u(const __m128i _Cur, _Fn _Funct) noexcept { + uint64_t _H_min_a = _Get_any_u(_Cur); + uint64_t _H_min_b = _Get_any_u(_mm_bsrli_si128(_Cur, 8)); + if (_Funct(_H_min_b, _H_min_a)) { + _H_min_a = _H_min_b; + } + return _mm_set1_epi64x(_H_min_a); + } + + static __m128d _H_min(const __m128d _Cur) noexcept { + return _H_func(_Cur, [](__m128d _First, __m128d _Second) { return _mm_min_pd(_First, _Second); }); + } + + static __m128d _H_max(const __m128d _Cur) noexcept { + return _H_func(_Cur, [](__m128d _First, __m128d _Second) { return _mm_max_pd(_First, _Second); }); + } + + static __m128i _H_min_u(const __m128i _Cur) noexcept { + return _H_func_u(_Cur, [](uint64_t _Lhs, uint64_t _Rhs) { return _Lhs < _Rhs; }); + } + + static __m128i _H_max_u(const __m128i _Cur) noexcept { + return _H_func_u(_Cur, [](uint64_t _Lhs, uint64_t _Rhs) { return _Lhs > _Rhs; }); + } + static double _Get_any(const __m128d _Cur) noexcept { + return _mm_cvtsd_f64(_Cur); + } + + static uint64_t _Get_any_u(const __m128i _Cur) noexcept { +#ifdef _M_IX86 + return (static_cast(static_cast(_mm_extract_epi32(_Cur, 1))) << 32) + | static_cast(static_cast(_mm_cvtsi128_si32(_Cur)))); +#else // ^^^ x86 / x64 vvv + return static_cast(_mm_cvtsi128_si64(_Cur)); +#endif // ^^^ x64 ^^^ + } + + static uint64_t _Get_v_pos(const __m128i _Idx, const unsigned long _H_pos) noexcept { + uint64_t _Array[2]; + _mm_storeu_si128(reinterpret_cast<__m128i*>(&_Array), _Idx); + return _Array[_H_pos >> 3]; + } + + static __m128d _Cmp_eq(const __m128d _First, const __m128d _Second) noexcept { + return _mm_cmpeq_pd(_First, _Second); + } + + static __m128d _Cmp_gt(const __m128d _First, const __m128d _Second) noexcept { + return _mm_cmpgt_pd(_First, _Second); + } + + static __m128i _Cmp_eq_idx(const __m128i _First, const __m128i _Second) noexcept { + return _mm_cmpeq_epi64(_First, _Second); + } + + static __m128d _Min(const __m128d _First, const __m128d _Second, __m128d) noexcept { + return _mm_min_pd(_First, _Second); + } + + static __m128d _Max(const __m128d _First, const __m128d _Second, __m128d) noexcept { + return _mm_max_pd(_First, _Second); + } + + static __m128i _Mask_cast(__m128d _Mask) noexcept { + return _mm_castpd_si128(_Mask); + } #endif // !_M_ARM64EC }; @@ -898,13 +1164,12 @@ namespace { _Advance_bytes(_Stop_at, _Portion_byte_size); // Load values and if unsigned adjust them to be signed (for signed vector comparisons) - __m128i _Cur_vals = - _Traits::_Sign_correction(_mm_loadu_si128(reinterpret_cast(_First)), _Sign); - __m128i _Cur_vals_min = _Cur_vals; // vector of vertical minimum values - __m128i _Cur_idx_min = _mm_setzero_si128(); // vector of vertical minimum indices - __m128i _Cur_vals_max = _Cur_vals; // vector of vertical maximum values - __m128i _Cur_idx_max = _mm_setzero_si128(); // vector of vertical maximum indices - __m128i _Cur_idx = _mm_setzero_si128(); // current vector of indices + auto _Cur_vals = _Traits::_Sign_correction(_Traits::_Load(_First), _Sign); + auto _Cur_vals_min = _Cur_vals; // vector of vertical minimum values + auto _Cur_idx_min = _mm_setzero_si128(); // vector of vertical minimum indices + auto _Cur_vals_max = _Cur_vals; // vector of vertical maximum values + auto _Cur_idx_max = _mm_setzero_si128(); // vector of vertical maximum indices + auto _Cur_idx = _mm_setzero_si128(); // current vector of indices for (;;) { _Advance_bytes(_First, 16); @@ -917,21 +1182,21 @@ namespace { // Compute horizontal min and/or max. Determine horizontal and vertical position of it. if constexpr ((_Mode & _Mode_min) != 0) { - const __m128i _H_min = - _Traits::_H_min(_Cur_vals_min); // Vector populated by the smallest element + const auto _H_min = _Traits::_H_min(_Cur_vals_min); // Vector populated by the smallest element const auto _H_min_val = _Traits::_Get_any(_H_min); // Get any element of it if (_H_min_val < _Cur_min_val) { // Current horizontal min is less than the old _Cur_min_val = _H_min_val; // update min - const __m128i _Eq_mask = + const auto _Eq_mask = _Traits::_Cmp_eq(_H_min, _Cur_vals_min); // Mask of all elems eq to min - int _Mask = _mm_movemask_epi8(_Eq_mask); + int _Mask = _mm_movemask_epi8(_Traits::_Mask_cast(_Eq_mask)); // Indices of minimum elements or the greatest index if none - const __m128i _All_max = _mm_set1_epi8(static_cast(0xFF)); - const __m128i _Idx_min_val = _mm_blendv_epi8(_All_max, _Cur_idx_min, _Eq_mask); - __m128i _Idx_min = _Traits::_H_min_u(_Idx_min_val); // The smallest indices + const auto _All_max = _mm_set1_epi8(static_cast(0xFF)); + const auto _Idx_min_val = + _mm_blendv_epi8(_All_max, _Cur_idx_min, _Traits::_Mask_cast(_Eq_mask)); + auto _Idx_min = _Traits::_H_min_u(_Idx_min_val); // The smallest indices // Select the smallest vertical indices from the smallest element mask - _Mask &= _mm_movemask_epi8(_Traits::_Cmp_eq(_Idx_min, _Idx_min_val)); + _Mask &= _mm_movemask_epi8(_Traits::_Cmp_eq_idx(_Idx_min, _Idx_min_val)); unsigned long _H_pos; // Find the smallest horizontal index @@ -944,8 +1209,7 @@ namespace { } if constexpr ((_Mode & _Mode_max) != 0) { - const __m128i _H_max = - _Traits::_H_max(_Cur_vals_max); // Vector populated by the largest element + const auto _H_max = _Traits::_H_max(_Cur_vals_max); // Vector populated by the largest element const auto _H_max_val = _Traits::_Get_any(_H_max); // Get any element of it if (_Mode == _Mode_both && _Cur_max_val <= _H_max_val @@ -953,19 +1217,19 @@ namespace { // max_element: current horizontal max is greater than the old, update max // minmax_element: current horizontal max is not less than the old, update max _Cur_max_val = _H_max_val; - const __m128i _Eq_mask = + const auto _Eq_mask = _Traits::_Cmp_eq(_H_max, _Cur_vals_max); // Mask of all elems eq to max - int _Mask = _mm_movemask_epi8(_Eq_mask); + int _Mask = _mm_movemask_epi8(_Traits::_Mask_cast(_Eq_mask)); unsigned long _H_pos; if constexpr (_Mode == _Mode_both) { // Looking for the last occurrence of maximum // Indices of maximum elements or zero if none - const __m128i _Idx_max_val = - _mm_blendv_epi8(_mm_setzero_si128(), _Cur_idx_max, _Eq_mask); - const __m128i _Idx_max = _Traits::_H_max_u(_Idx_max_val); // The greatest indices + const auto _Idx_max_val = + _mm_blendv_epi8(_mm_setzero_si128(), _Cur_idx_max, _Traits::_Mask_cast(_Eq_mask)); + const auto _Idx_max = _Traits::_H_max_u(_Idx_max_val); // The greatest indices // Select the greatest vertical indices from the largest element mask - _Mask &= _mm_movemask_epi8(_Traits::_Cmp_eq(_Idx_max, _Idx_max_val)); + _Mask &= _mm_movemask_epi8(_Traits::_Cmp_eq_idx(_Idx_max, _Idx_max_val)); // Find the largest horizontal index _BitScanReverse(&_H_pos, _Mask); // lgtm [cpp/conditionallyuninitializedvariable] @@ -974,11 +1238,12 @@ namespace { } else { // Looking for the first occurrence of maximum // Indices of maximum elements or the greatest index if none - const __m128i _All_max = _mm_set1_epi8(static_cast(0xFF)); - const __m128i _Idx_max_val = _mm_blendv_epi8(_All_max, _Cur_idx_max, _Eq_mask); - const __m128i _Idx_max = _Traits::_H_min_u(_Idx_max_val); // The smallest indices + const auto _All_max = _mm_set1_epi8(static_cast(0xFF)); + const auto _Idx_max_val = + _mm_blendv_epi8(_All_max, _Cur_idx_max, _Traits::_Mask_cast(_Eq_mask)); + const auto _Idx_max = _Traits::_H_min_u(_Idx_max_val); // The smallest indices // Select the smallest vertical indices from the largest element mask - _Mask &= _mm_movemask_epi8(_Traits::_Cmp_eq(_Idx_max, _Idx_max_val)); + _Mask &= _mm_movemask_epi8(_Traits::_Cmp_eq_idx(_Idx_max, _Idx_max_val)); // Find the smallest horizontal index _BitScanForward(&_H_pos, _Mask); // lgtm [cpp/conditionallyuninitializedvariable] @@ -1007,8 +1272,7 @@ namespace { // Indices will be relative to the new base _Base = static_cast(_First); // Load values and if unsigned adjust them to be signed (for signed vector comparisons) - _Cur_vals = - _Traits::_Sign_correction(_mm_loadu_si128(reinterpret_cast(_First)), _Sign); + _Cur_vals = _Traits::_Sign_correction(_Traits::_Load(_First), _Sign); if constexpr ((_Mode & _Mode_min) != 0) { _Cur_vals_min = _Cur_vals; @@ -1028,34 +1292,38 @@ namespace { // This is the main part, finding vertical minimum/maximum // Load values and if unsigned adjust them to be signed (for signed vector comparisons) - _Cur_vals = _Traits::_Sign_correction(_mm_loadu_si128(reinterpret_cast(_First)), _Sign); + _Cur_vals = _Traits::_Sign_correction(_Traits::_Load(_First), _Sign); if constexpr ((_Mode & _Mode_min) != 0) { // Looking for the first occurrence of minimum, don't overwrite with newly found occurrences - const __m128i _Is_less = _Traits::_Cmp_gt(_Cur_vals_min, _Cur_vals); // _Cur_vals < _Cur_vals_min - _Cur_idx_min = _mm_blendv_epi8(_Cur_idx_min, _Cur_idx, _Is_less); // Remember their vertical indices + const auto _Is_less = _Traits::_Cmp_gt(_Cur_vals_min, _Cur_vals); // _Cur_vals < _Cur_vals_min + _Cur_idx_min = _mm_blendv_epi8( + _Cur_idx_min, _Cur_idx, _Traits::_Mask_cast(_Is_less)); // Remember their vertical indices _Cur_vals_min = _Traits::_Min(_Cur_vals_min, _Cur_vals, _Is_less); // Update the current minimum } if constexpr (_Mode == _Mode_max) { // Looking for the first occurrence of maximum, don't overwrite with newly found occurrences - const __m128i _Is_greater = _Traits::_Cmp_gt(_Cur_vals, _Cur_vals_max); // _Cur_vals > _Cur_vals_max - _Cur_idx_max = - _mm_blendv_epi8(_Cur_idx_max, _Cur_idx, _Is_greater); // Remember their vertical indices + const auto _Is_greater = _Traits::_Cmp_gt(_Cur_vals, _Cur_vals_max); // _Cur_vals > _Cur_vals_max + _Cur_idx_max = _mm_blendv_epi8( + _Cur_idx_max, _Cur_idx, _Traits::_Mask_cast(_Is_greater)); // Remember their vertical indices _Cur_vals_max = _Traits::_Max(_Cur_vals_max, _Cur_vals, _Is_greater); // Update the current maximum } else if constexpr (_Mode == _Mode_both) { // Looking for the last occurrence of maximum, do overwrite with newly found occurrences - const __m128i _Is_less = - _Traits::_Cmp_gt(_Cur_vals_max, _Cur_vals); // !(_Cur_vals >= _Cur_vals_max) - _Cur_idx_max = _mm_blendv_epi8(_Cur_idx, _Cur_idx_max, _Is_less); // Remember their vertical indices + const auto _Is_less = _Traits::_Cmp_gt(_Cur_vals_max, _Cur_vals); // !(_Cur_vals >= _Cur_vals_max) + _Cur_idx_max = _mm_blendv_epi8(_Cur_idx, _Cur_idx_max, + _Traits::_Mask_cast(_Is_less)); // Remember their vertical indices _Cur_vals_max = _Traits::_Max(_Cur_vals, _Cur_vals_max, _Is_less); // Update the current maximum } } } #endif // !_M_ARM64EC - - return _Minmax_tail<_Mode, typename _Traits::_Signed_t, typename _Traits::_Unsigned_t>( - _First, _Last, _Res, _Sign, _Cur_min_val, _Cur_max_val); + if constexpr (_Traits::_Is_floating) { + return _Minmax_tail_f<_Mode, typename _Traits::_Signed_t>(_First, _Last, _Res, _Cur_min_val, _Cur_max_val); + } else { + return _Minmax_tail<_Mode, typename _Traits::_Signed_t, typename _Traits::_Unsigned_t>( + _First, _Last, _Res, _Sign, _Cur_min_val, _Cur_max_val); + } } } // unnamed namespace @@ -1082,6 +1350,16 @@ const void* __stdcall __std_min_element_8( return _Minmax_element<_Mode_min, _Minmax_traits_8>(_First, _Last, _Signed); } +const void* __stdcall __std_min_element_f( + const void* const _First, const void* const _Last, const bool _Unused) noexcept { + return _Minmax_element<_Mode_min, _Minmax_traits_f>(_First, _Last, _Unused); +} + +const void* __stdcall __std_min_element_d( + const void* const _First, const void* const _Last, const bool _Unused) noexcept { + return _Minmax_element<_Mode_min, _Minmax_traits_d>(_First, _Last, _Unused); +} + const void* __stdcall __std_max_element_1( const void* const _First, const void* const _Last, const bool _Signed) noexcept { return _Minmax_element<_Mode_max, _Minmax_traits_1>(_First, _Last, _Signed); @@ -1102,6 +1380,16 @@ const void* __stdcall __std_max_element_8( return _Minmax_element<_Mode_max, _Minmax_traits_8>(_First, _Last, _Signed); } +const void* __stdcall __std_max_element_f( + const void* const _First, const void* const _Last, const bool _Unused) noexcept { + return _Minmax_element<_Mode_max, _Minmax_traits_f>(_First, _Last, _Unused); +} + +const void* __stdcall __std_max_element_d( + const void* const _First, const void* const _Last, const bool _Unused) noexcept { + return _Minmax_element<_Mode_max, _Minmax_traits_d>(_First, _Last, _Unused); +} + _Min_max_element_t __stdcall __std_minmax_element_1( const void* const _First, const void* const _Last, const bool _Signed) noexcept { return _Minmax_element<_Mode_both, _Minmax_traits_1>(_First, _Last, _Signed); @@ -1122,6 +1410,15 @@ _Min_max_element_t __stdcall __std_minmax_element_8( return _Minmax_element<_Mode_both, _Minmax_traits_8>(_First, _Last, _Signed); } +_Min_max_element_t __stdcall __std_minmax_element_f( + const void* const _First, const void* const _Last, const bool _Unused) noexcept { + return _Minmax_element<_Mode_both, _Minmax_traits_f>(_First, _Last, _Unused); +} + +_Min_max_element_t __stdcall __std_minmax_element_d( + const void* const _First, const void* const _Last, const bool _Unused) noexcept { + return _Minmax_element<_Mode_both, _Minmax_traits_d>(_First, _Last, _Unused); +} } // extern "C" namespace { diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 22fa2b285e..357bbf0e06 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -138,6 +138,19 @@ void test_min_max_element(mt19937_64& gen) { } } +template +void test_min_max_element_f(mt19937_64& gen) { + normal_distribution dis(100.0, 1.0); + + vector input; + input.reserve(dataCount); + test_case_min_max_element(input); + for (size_t attempts = 0; attempts < dataCount; ++attempts) { + input.push_back(static_cast(dis(gen))); + test_case_min_max_element(input); + } +} + void test_min_max_element_pointers(mt19937_64& gen) { const short arr[20]{}; @@ -315,6 +328,10 @@ void test_vector_algorithms(mt19937_64& gen) { test_min_max_element(gen); test_min_max_element(gen); + test_min_max_element_f(gen); + test_min_max_element_f(gen); + test_min_max_element_f(gen); + test_min_max_element_pointers(gen); test_min_max_element_special_cases(); // SSE2 vectors From 80b561c45db4731983cf670c0e888b6e51fd5850 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 6 Aug 2023 17:36:12 +0300 Subject: [PATCH 02/37] `const` --- stl/inc/algorithm | 7 ++++--- stl/inc/xutility | 10 ++++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 43b7624511..f7abcdb5ca 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -64,11 +64,12 @@ _STD pair<_Ty*, _Ty*> __std_minmax_element(_Ty* _First, _Ty* _Last) noexcept { _Min_max_element_t _Res; - if constexpr (_STD is_same_v<_Ty, float>) { + if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, float>) { _Res = __std_minmax_element_f(_First, _Last, false); - } else if constexpr (_STD is_same_v<_Ty, double> || _STD is_same_v<_Ty, long double>) { + } else if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, double> + || _STD is_same_v<_STD remove_const_t<_Ty>, long double>) { _Res = __std_minmax_element_d(_First, _Last, false); - } else if constexpr(sizeof(_Ty) == 1) { + } else if constexpr (sizeof(_Ty) == 1) { _Res = __std_minmax_element_1(_First, _Last, _Signed); } else if constexpr (sizeof(_Ty) == 2) { _Res = __std_minmax_element_2(_First, _Last, _Signed); diff --git a/stl/inc/xutility b/stl/inc/xutility index 2a0baa250d..484d6412c4 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -161,9 +161,10 @@ template _Ty* __std_min_element(_Ty* _First, _Ty* _Last) noexcept { constexpr bool _Signed = _STD is_signed_v<_Ty>; - if constexpr (_STD is_same_v<_Ty, float>) { + if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, float>) { return const_cast<_Ty*>(static_cast(__std_min_element_f(_First, _Last, false))); - } else if constexpr (_STD is_same_v<_Ty, double> || _STD is_same_v<_Ty, long double>) { + } else if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, double> + || _STD is_same_v<_STD remove_const_t<_Ty>, long double>) { return const_cast<_Ty*>(static_cast(__std_min_element_d(_First, _Last, false))); }else if constexpr(sizeof(_Ty) == 1) { return const_cast<_Ty*>(static_cast(__std_min_element_1(_First, _Last, _Signed))); @@ -182,9 +183,10 @@ template _Ty* __std_max_element(_Ty* _First, _Ty* _Last) noexcept { constexpr bool _Signed = _STD is_signed_v<_Ty>; - if constexpr (_STD is_same_v<_Ty, float>) { + if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, float>) { return const_cast<_Ty*>(static_cast(__std_max_element_f(_First, _Last, false))); - } else if constexpr (_STD is_same_v<_Ty, double> || _STD is_same_v<_Ty, long double>) { + } else if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, double> + || _STD is_same_v<_STD remove_const_t<_Ty>, long double>) { return const_cast<_Ty*>(static_cast(__std_max_element_d(_First, _Last, false))); } else if constexpr (sizeof(_Ty) == 1) { return const_cast<_Ty*>(static_cast(__std_max_element_1(_First, _Last, _Signed))); From 86b3100b8a74845ff3ec09457edcb6d5c01d70f9 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 6 Aug 2023 18:04:38 +0300 Subject: [PATCH 03/37] reverse init --- stl/src/vector_algorithms.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index cafd58d746..04b377b3c3 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -945,8 +945,8 @@ namespace { using _Signed_t = float; - static constexpr _Signed_t _Init_min_val = -__builtin_huge_valf(); - static constexpr _Signed_t _Init_max_val = __builtin_huge_valf(); + static constexpr _Signed_t _Init_min_val = __builtin_huge_valf(); + static constexpr _Signed_t _Init_max_val = -__builtin_huge_valf(); #ifndef _M_ARM64EC #ifdef _M_IX86 @@ -1041,8 +1041,8 @@ namespace { using _Signed_t = double; - static constexpr _Signed_t _Init_min_val = -__builtin_huge_val(); - static constexpr _Signed_t _Init_max_val = __builtin_huge_val(); + static constexpr _Signed_t _Init_min_val = __builtin_huge_val(); + static constexpr _Signed_t _Init_max_val = -__builtin_huge_val(); #ifndef _M_ARM64EC static constexpr bool _Has_portion_max = false; From 55c5add3db273fb51b223ea5477bd10971f6c5f5 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 6 Aug 2023 18:05:04 +0300 Subject: [PATCH 04/37] format --- stl/inc/xutility | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index 484d6412c4..c2aaf9c668 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -166,7 +166,7 @@ _Ty* __std_min_element(_Ty* _First, _Ty* _Last) noexcept { } else if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, double> || _STD is_same_v<_STD remove_const_t<_Ty>, long double>) { return const_cast<_Ty*>(static_cast(__std_min_element_d(_First, _Last, false))); - }else if constexpr(sizeof(_Ty) == 1) { + } else if constexpr (sizeof(_Ty) == 1) { return const_cast<_Ty*>(static_cast(__std_min_element_1(_First, _Last, _Signed))); } else if constexpr (sizeof(_Ty) == 2) { return const_cast<_Ty*>(static_cast(__std_min_element_2(_First, _Last, _Signed))); From c88fe9aac4bc50ddf754ef823982b2304f66df1e Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 6 Aug 2023 18:17:33 +0300 Subject: [PATCH 05/37] more interesting values --- tests/std/tests/VSO_0000000_vector_algorithms/test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 357bbf0e06..c0db2459a6 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -140,7 +140,7 @@ void test_min_max_element(mt19937_64& gen) { template void test_min_max_element_f(mt19937_64& gen) { - normal_distribution dis(100.0, 1.0); + normal_distribution dis(0.0, 100000.0); vector input; input.reserve(dataCount); From 8f70406eafd88c6aa38b208774c40eb4ce2543a0 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 6 Aug 2023 18:41:12 +0300 Subject: [PATCH 06/37] fix x68 build --- stl/src/vector_algorithms.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 04b377b3c3..a3c01dec38 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -1098,7 +1098,7 @@ namespace { static uint64_t _Get_any_u(const __m128i _Cur) noexcept { #ifdef _M_IX86 return (static_cast(static_cast(_mm_extract_epi32(_Cur, 1))) << 32) - | static_cast(static_cast(_mm_cvtsi128_si32(_Cur)))); + | static_cast(static_cast(_mm_cvtsi128_si32(_Cur))); #else // ^^^ x86 / x64 vvv return static_cast(_mm_cvtsi128_si64(_Cur)); #endif // ^^^ x64 ^^^ From 52c81914920b03a5bb26e5a35280a9e6c4e9c08b Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 6 Aug 2023 18:51:19 +0300 Subject: [PATCH 07/37] format --- stl/src/vector_algorithms.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index a3c01dec38..2f2cc75eb9 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -1098,7 +1098,7 @@ namespace { static uint64_t _Get_any_u(const __m128i _Cur) noexcept { #ifdef _M_IX86 return (static_cast(static_cast(_mm_extract_epi32(_Cur, 1))) << 32) - | static_cast(static_cast(_mm_cvtsi128_si32(_Cur))); + | static_cast(static_cast(_mm_cvtsi128_si32(_Cur))); #else // ^^^ x86 / x64 vvv return static_cast(_mm_cvtsi128_si64(_Cur)); #endif // ^^^ x64 ^^^ From af9c9b5c4a92d00db1e2374adae00f188cc5c7ae Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 6 Aug 2023 19:43:31 +0300 Subject: [PATCH 08/37] coverage --- stl/inc/xutility | 16 ++++++++++++++-- .../tests/VSO_0000000_vector_algorithms/env.lst | 1 + 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index c2aaf9c668..8e9f725446 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -48,6 +48,18 @@ _STL_DISABLE_CLANG_WARNINGS #endif // _USE_STD_VECTOR_ALGORITHMS #endif // ^^^ no support for vector algorithms ^^^ +#ifndef _USE_STD_VECTOR_FLOATING_ALGORITHMS +#if _USE_STD_VECTOR_ALGORITHMS && defined(_M_FP_FAST) +#define _USE_STD_VECTOR_FLOATING_ALGORITHMS 1 +#else // ^^^ use vector algorithms & fast math / not use vector algorithms or not use fast math vvv +#define _USE_STD_VECTOR_FLOATING_ALGORITHMS 0 +#endif // ^^^ not use vector algorithms or not use fast math ^^^ +#else // ^^^ !defined(_USE_STD_VECTOR_FLOATING_ALGORITHMS) / defined(_USE_STD_VECTOR_FLOATING_ALGORITHMS) vvv +#if _USE_STD_VECTOR_FLOATING_ALGORITHMS && !_USE_STD_VECTOR_ALGORITHMS +#error If _USE_STD_VECTOR_ALGORITHMS is set, _USE_STD_VECTOR_FLOATING_ALGORITHMS should also be set +#endif // _USE_STD_VECTOR_FLOATING_ALGORITHMS && !_USE_STD_VECTOR_ALGORITHMS +#endif // ^^^ defined(_USE_STD_VECTOR_FLOATING_ALGORITHMS) ^^^ + #if _USE_STD_VECTOR_ALGORITHMS _EXTERN_C // The "noalias" attribute tells the compiler optimizer that pointers going into these hand-vectorized algorithms @@ -6641,9 +6653,9 @@ _INLINE_VAR constexpr bool _Is_min_max_optimization_safe = // Activate the vecto _Iterator_is_contiguous<_Iter> // The iterator must be contiguous so we can get raw pointers. && !_Iterator_is_volatile<_Iter> // The iterator must not be volatile. && conjunction_v, // Element is floating point or... -#endif +#endif // _USE_STD_VECTOR_FLOATING_ALGORITHMS is_integral<_Elem>, is_pointer<_Elem>>, // ... integral or pointer type. disjunction< // And either of the following: #ifdef __cpp_lib_concepts diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/env.lst b/tests/std/tests/VSO_0000000_vector_algorithms/env.lst index 2e61ef96b9..f72f926a11 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/env.lst +++ b/tests/std/tests/VSO_0000000_vector_algorithms/env.lst @@ -5,3 +5,4 @@ RUNALL_INCLUDE ..\usual_matrix.lst RUNALL_CROSSLIST PM_CL="" # Test default setting PM_CL="/D_USE_STD_VECTOR_ALGORITHMS=0" # Test escape hatch, see GH-1751 +PM_CL="/D_USE_STD_VECTOR_FLOATING_ALGORITHMS=1" # Test floating point vectorization, which is normally only enabled for /fp:fast From ff97c5b64b59dc9edbabb60d1418121528dbd938 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 6 Aug 2023 19:46:04 +0300 Subject: [PATCH 09/37] ouch --- stl/src/vector_algorithms.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 2f2cc75eb9..baf2cf8b13 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -1,5 +1,4 @@ // Copyright (c) Microsoft Corporation. - // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #ifdef _M_CEE_PURE From 90b799915274a9cead6c659126c0f9cffa558a58 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 6 Aug 2023 19:56:47 +0300 Subject: [PATCH 10/37] format --- stl/inc/xutility | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index 8e9f725446..ecdfcb8f03 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -57,7 +57,7 @@ _STL_DISABLE_CLANG_WARNINGS #else // ^^^ !defined(_USE_STD_VECTOR_FLOATING_ALGORITHMS) / defined(_USE_STD_VECTOR_FLOATING_ALGORITHMS) vvv #if _USE_STD_VECTOR_FLOATING_ALGORITHMS && !_USE_STD_VECTOR_ALGORITHMS #error If _USE_STD_VECTOR_ALGORITHMS is set, _USE_STD_VECTOR_FLOATING_ALGORITHMS should also be set -#endif // _USE_STD_VECTOR_FLOATING_ALGORITHMS && !_USE_STD_VECTOR_ALGORITHMS +#endif // _USE_STD_VECTOR_FLOATING_ALGORITHMS && !_USE_STD_VECTOR_ALGORITHMS #endif // ^^^ defined(_USE_STD_VECTOR_FLOATING_ALGORITHMS) ^^^ #if _USE_STD_VECTOR_ALGORITHMS From 568a7936e9c5c60311f64d54e8778f7f94e0e465 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 6 Aug 2023 21:57:56 +0300 Subject: [PATCH 11/37] include `/fp:strict` / `/fp:precise` and remove extra coverage --- stl/inc/xutility | 2 +- tests/std/tests/VSO_0000000_vector_algorithms/env.lst | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index ecdfcb8f03..6c2027d12e 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -49,7 +49,7 @@ _STL_DISABLE_CLANG_WARNINGS #endif // ^^^ no support for vector algorithms ^^^ #ifndef _USE_STD_VECTOR_FLOATING_ALGORITHMS -#if _USE_STD_VECTOR_ALGORITHMS && defined(_M_FP_FAST) +#if _USE_STD_VECTOR_ALGORITHMS && !defined(_M_FP_EXCEPT) #define _USE_STD_VECTOR_FLOATING_ALGORITHMS 1 #else // ^^^ use vector algorithms & fast math / not use vector algorithms or not use fast math vvv #define _USE_STD_VECTOR_FLOATING_ALGORITHMS 0 diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/env.lst b/tests/std/tests/VSO_0000000_vector_algorithms/env.lst index f72f926a11..2e61ef96b9 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/env.lst +++ b/tests/std/tests/VSO_0000000_vector_algorithms/env.lst @@ -5,4 +5,3 @@ RUNALL_INCLUDE ..\usual_matrix.lst RUNALL_CROSSLIST PM_CL="" # Test default setting PM_CL="/D_USE_STD_VECTOR_ALGORITHMS=0" # Test escape hatch, see GH-1751 -PM_CL="/D_USE_STD_VECTOR_FLOATING_ALGORITHMS=1" # Test floating point vectorization, which is normally only enabled for /fp:fast From 7f2a6358701069b93320aa9204e1eae781d2c9f7 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 6 Aug 2023 23:38:42 +0300 Subject: [PATCH 12/37] -extra casts --- stl/src/vector_algorithms.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index baf2cf8b13..737f5255a7 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -1,4 +1,5 @@ // Copyright (c) Microsoft Corporation. +// Copyright (c) Microsoft Corporation. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #ifdef _M_CEE_PURE @@ -538,11 +539,11 @@ namespace { const void* _First, const void* _Last, _Min_max_element_t& _Res, _Ty _Cur_min, _Ty _Cur_max) noexcept { if constexpr (_Mode == _Mode_min) { - return _Min_tail(_First, _Last, _Res._Min, static_cast<_Ty>(_Cur_min)); + return _Min_tail(_First, _Last, _Res._Min, _Cur_min); } else if constexpr (_Mode == _Mode_max) { - return _Max_tail(_First, _Last, _Res._Max, static_cast<_Ty>(_Cur_max)); + return _Max_tail(_First, _Last, _Res._Max, _Cur_max); } else { - return _Both_tail(_First, _Last, _Res, static_cast<_Ty>(_Cur_min), static_cast<_Ty>(_Cur_max)); + return _Both_tail(_First, _Last, _Res, _Cur_min, _Cur_max); } } From f6b24a95653af90b8059096cb66c62c729f555e0 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 6 Aug 2023 23:39:04 +0300 Subject: [PATCH 13/37] copypaste error --- stl/src/vector_algorithms.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 737f5255a7..6f8d46abc5 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -1056,7 +1056,7 @@ namespace { } static __m128i _Inc(__m128i _Idx) noexcept { - return _mm_add_epi32(_Idx, _mm_set1_epi64x(1)); + return _mm_add_epi64(_Idx, _mm_set1_epi64x(1)); } template From 91ed8a3d0731c0e97da2dc5f537e9da309d3655c Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Mon, 7 Aug 2023 08:28:38 +0300 Subject: [PATCH 14/37] more interesting input --- .../tests/VSO_0000000_vector_algorithms/test.cpp | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index c0db2459a6..e297ed03e7 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -142,11 +142,23 @@ template void test_min_max_element_f(mt19937_64& gen) { normal_distribution dis(0.0, 100000.0); + constexpr auto input_of_input_size = dataCount / 2; + vector input_of_input(input_of_input_size); + input_of_input[0] = -numeric_limits::infinity(); + input_of_input[1] = +numeric_limits::infinity(); + input_of_input[2] = -0.0; + input_of_input[3] = +0.0; + for (size_t i = 4; i < input_of_input_size; ++i) { + input_of_input[i] = dis(gen); + } + + uniform_int_distribution idx_dis(0, input_of_input_size - 1); + vector input; input.reserve(dataCount); test_case_min_max_element(input); for (size_t attempts = 0; attempts < dataCount; ++attempts) { - input.push_back(static_cast(dis(gen))); + input.push_back(static_cast(input_of_input[idx_dis(gen)])); test_case_min_max_element(input); } } From 46baf227c419f85e67dc1d34e3ffdad3dad55df2 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Mon, 7 Aug 2023 12:46:55 +0300 Subject: [PATCH 15/37] Unsupport 80-bit long double See #3931 --- stl/inc/xutility | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/stl/inc/xutility b/stl/inc/xutility index 6c2027d12e..28048e8e4e 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -6654,7 +6654,11 @@ _INLINE_VAR constexpr bool _Is_min_max_optimization_safe = // Activate the vecto && !_Iterator_is_volatile<_Iter> // The iterator must not be volatile. && conjunction_v, is_same<_Elem, double>, +#else // ^^^ 80-bit long double (not supported by MSVC in general, see GH-1316) / 64-bit long double vvv is_floating_point<_Elem>, // Element is floating point or... +#endif // ^^^ 64-bit long double ^^ #endif // _USE_STD_VECTOR_FLOATING_ALGORITHMS is_integral<_Elem>, is_pointer<_Elem>>, // ... integral or pointer type. disjunction< // And either of the following: From 83d208fbff99a6b03285afeff00ade8159fe01be Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Mon, 7 Aug 2023 16:28:36 +0300 Subject: [PATCH 16/37] +benchmark --- benchmarks/CMakeLists.txt | 1 + benchmarks/src/minmax_element.cpp | 99 +++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+) create mode 100644 benchmarks/src/minmax_element.cpp diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 11df4398d0..3b4608531c 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -107,6 +107,7 @@ endfunction() add_benchmark(bitset_to_string src/bitset_to_string.cpp) add_benchmark(locale_classic src/locale_classic.cpp) +add_benchmark(minmax_element src/minmax_element.cpp) add_benchmark(path_lexically_normal src/path_lexically_normal.cpp) add_benchmark(random_integer_generation src/random_integer_generation.cpp) add_benchmark(std_copy src/std_copy.cpp) diff --git a/benchmarks/src/minmax_element.cpp b/benchmarks/src/minmax_element.cpp new file mode 100644 index 0000000000..489b3a64f7 --- /dev/null +++ b/benchmarks/src/minmax_element.cpp @@ -0,0 +1,99 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include + +#include +#include + +enum class Op { + Min, + Max, + Both, +}; + +using namespace std; + +template +void bm(benchmark::State& state) { + mt19937 gen(84710); + uniform_int_distribution> dis(1, 20); + + T a[Size]; + ranges::generate(a, [&] { return dis(gen); }); + + for (auto _ : state) { + if constexpr (Operation == Op::Min) { + benchmark::DoNotOptimize(ranges::min_element(a)); + } else if constexpr (Operation == Op::Max) { + benchmark::DoNotOptimize(ranges::max_element(a)); + } else if constexpr (Operation == Op::Both) { + benchmark::DoNotOptimize(ranges::minmax_element(a)); + } + } +} + +template +void bmf(benchmark::State& state) { + mt19937 gen(84710); + normal_distribution dis(0, 10000.0); + + T a[Size]; + generate(begin(a), end(a), [&] { return dis(gen); }); + + for (auto _ : state) { + if constexpr (Operation == Op::Min) { + benchmark::DoNotOptimize(ranges::min_element(a)); + } else if constexpr (Operation == Op::Max) { + benchmark::DoNotOptimize(ranges::max_element(a)); + } else if constexpr (Operation == Op::Both) { + benchmark::DoNotOptimize(ranges::minmax_element(a)); + } + } +} + +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); + +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); + +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); + +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); + +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); + +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); + +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); + +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); + +BENCHMARK(bmf); +BENCHMARK(bmf); +BENCHMARK(bmf); + +BENCHMARK(bmf); +BENCHMARK(bmf); +BENCHMARK(bmf); + + +BENCHMARK_MAIN(); From 1be219faa32096c3e5ee5e1b133a61acd8501358 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Mon, 7 Aug 2023 16:35:45 +0300 Subject: [PATCH 17/37] include order --- benchmarks/src/minmax_element.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/benchmarks/src/minmax_element.cpp b/benchmarks/src/minmax_element.cpp index 489b3a64f7..c258619ef4 100644 --- a/benchmarks/src/minmax_element.cpp +++ b/benchmarks/src/minmax_element.cpp @@ -1,12 +1,11 @@ // Copyright (c) Microsoft Corporation. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +#include #include #include #include #include - -#include #include enum class Op { From 99b174675e00f8f0bee0caf5bef9a17ff938f668 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Mon, 7 Aug 2023 16:55:02 +0300 Subject: [PATCH 18/37] -copy --- stl/src/vector_algorithms.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 6f8d46abc5..06326572f9 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -1,5 +1,4 @@ // Copyright (c) Microsoft Corporation. -// Copyright (c) Microsoft Corporation. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception #ifdef _M_CEE_PURE From 58fc6b91faa1eb4eff80b6b954bbf537ac26342b Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Mon, 7 Aug 2023 17:03:32 +0300 Subject: [PATCH 19/37] simplify benchmark --- benchmarks/src/minmax_element.cpp | 42 ++++++++++++------------------- 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/benchmarks/src/minmax_element.cpp b/benchmarks/src/minmax_element.cpp index c258619ef4..d769652458 100644 --- a/benchmarks/src/minmax_element.cpp +++ b/benchmarks/src/minmax_element.cpp @@ -7,6 +7,7 @@ #include #include #include +#include enum class Op { Min, @@ -17,31 +18,20 @@ enum class Op { using namespace std; template -void bm(benchmark::State& state) { - mt19937 gen(84710); - uniform_int_distribution> dis(1, 20); +template +void bm(benchmark::State& state) { T a[Size]; - ranges::generate(a, [&] { return dis(gen); }); - - for (auto _ : state) { - if constexpr (Operation == Op::Min) { - benchmark::DoNotOptimize(ranges::min_element(a)); - } else if constexpr (Operation == Op::Max) { - benchmark::DoNotOptimize(ranges::max_element(a)); - } else if constexpr (Operation == Op::Both) { - benchmark::DoNotOptimize(ranges::minmax_element(a)); - } - } -} -template -void bmf(benchmark::State& state) { mt19937 gen(84710); - normal_distribution dis(0, 10000.0); - T a[Size]; - generate(begin(a), end(a), [&] { return dis(gen); }); + if constexpr (is_floating_point_v) { + normal_distribution dis(0, 10000.0); + ranges::generate(a, [&] { return dis(gen); }); + } else { + uniform_int_distribution> dis(1, 20); + ranges::generate(a, [&] { return dis(gen); }); + } for (auto _ : state) { if constexpr (Operation == Op::Min) { @@ -86,13 +76,13 @@ BENCHMARK(bm); BENCHMARK(bm); BENCHMARK(bm); -BENCHMARK(bmf); -BENCHMARK(bmf); -BENCHMARK(bmf); +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); -BENCHMARK(bmf); -BENCHMARK(bmf); -BENCHMARK(bmf); +BENCHMARK(bm); +BENCHMARK(bm); +BENCHMARK(bm); BENCHMARK_MAIN(); From eb388ad7b8eaaeb756f6e08609853646a4a27c25 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Mon, 7 Aug 2023 17:28:02 +0300 Subject: [PATCH 20/37] fix build --- benchmarks/src/minmax_element.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/benchmarks/src/minmax_element.cpp b/benchmarks/src/minmax_element.cpp index d769652458..baba47865c 100644 --- a/benchmarks/src/minmax_element.cpp +++ b/benchmarks/src/minmax_element.cpp @@ -17,8 +17,6 @@ enum class Op { using namespace std; -template - template void bm(benchmark::State& state) { T a[Size]; From 8249aa59b0bd5490c58df43fdcf8e850bf1d8b05 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Thu, 26 Oct 2023 08:50:00 +0300 Subject: [PATCH 21/37] load noexcept --- stl/src/vector_algorithms.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 1091945893..43d15d8b7a 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -556,7 +556,7 @@ namespace { static constexpr bool _Has_portion_max = true; static constexpr size_t _Portion_max = 256; - static __m128i _Load(const void* _Src) { + static __m128i _Load(const void* _Src) noexcept { return _mm_loadu_si128(reinterpret_cast(_Src)); } @@ -646,7 +646,7 @@ namespace { static constexpr bool _Has_portion_max = true; static constexpr size_t _Portion_max = 65536; - static __m128i _Load(const void* _Src) { + static __m128i _Load(const void* _Src) noexcept { return _mm_loadu_si128(reinterpret_cast(_Src)); } @@ -741,7 +741,7 @@ namespace { static constexpr size_t _Portion_max = 0x1'0000'0000ULL; #endif // ^^^ 64-bit ^^^ - static __m128i _Load(const void* _Src) { + static __m128i _Load(const void* _Src) noexcept { return _mm_loadu_si128(reinterpret_cast(_Src)); } @@ -827,7 +827,7 @@ namespace { #ifndef _M_ARM64EC static constexpr bool _Has_portion_max = false; - static __m128i _Load(const void* _Src) { + static __m128i _Load(const void* _Src) noexcept { return _mm_loadu_si128(reinterpret_cast(_Src)); } @@ -925,7 +925,7 @@ namespace { static constexpr size_t _Portion_max = 0x1'0000'0000ULL; #endif // ^^^ 64-bit ^^^ - static __m128 _Load(const void* _Src) { + static __m128 _Load(const void* _Src) noexcept { return _mm_loadu_ps(reinterpret_cast(_Src)); } @@ -1016,7 +1016,7 @@ namespace { #ifndef _M_ARM64EC static constexpr bool _Has_portion_max = false; - static __m128d _Load(const void* _Src) { + static __m128d _Load(const void* _Src) noexcept { return _mm_loadu_pd(reinterpret_cast(_Src)); } From 74049706bda41db6144ec746ecc7b5d7f9cc00fa Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Thu, 26 Oct 2023 08:51:48 +0300 Subject: [PATCH 22/37] fix copypasta during merge --- stl/src/vector_algorithms.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 43d15d8b7a..5e414516c3 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -1291,9 +1291,9 @@ namespace { if constexpr (_Mode == _Mode_min) { return _Min_tail(_First, _Last, _Res._Min, _Cur_min_val); } else if constexpr (_Mode == _Mode_max) { - return _Max_tail(_First, _Last, _Res._Max, _Cur_min_val); + return _Max_tail(_First, _Last, _Res._Max, _Cur_max_val); } else { - return _Both_tail(_First, _Last, _Res, _Cur_min_val, _Cur_min_val); + return _Both_tail(_First, _Last, _Res, _Cur_min_val, _Cur_max_val); } } else { using _STy = _Traits::_Signed_t; From b0867f80b8a8a32deee278b1841e112a6034f3c7 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Mon, 25 Dec 2023 17:06:50 +0200 Subject: [PATCH 23/37] ADL-wary --- stl/inc/xutility | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index e4a9eb5747..b83c65edab 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -197,10 +197,10 @@ _Ty* __std_max_element(_Ty* _First, _Ty* _Last) noexcept { constexpr bool _Signed = _STD is_signed_v<_Ty>; if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, float>) { - return const_cast<_Ty*>(static_cast(__std_max_element_f(_First, _Last, false))); + return const_cast<_Ty*>(static_cast(::__std_max_element_f(_First, _Last, false))); } else if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, double> || _STD is_same_v<_STD remove_const_t<_Ty>, long double>) { - return const_cast<_Ty*>(static_cast(__std_max_element_d(_First, _Last, false))); + return const_cast<_Ty*>(static_cast(::__std_max_element_d(_First, _Last, false))); } else if constexpr (sizeof(_Ty) == 1) { return const_cast<_Ty*>(static_cast(::__std_max_element_1(_First, _Last, _Signed))); } else if constexpr (sizeof(_Ty) == 2) { From ce7cdc1325b783dcf46ba605741051dc6021c656 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Mon, 25 Dec 2023 17:08:43 +0200 Subject: [PATCH 24/37] ADL-wary --- stl/inc/xutility | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index b83c65edab..70aa449f74 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -175,10 +175,10 @@ _Ty* __std_min_element(_Ty* _First, _Ty* _Last) noexcept { constexpr bool _Signed = _STD is_signed_v<_Ty>; if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, float>) { - return const_cast<_Ty*>(static_cast(__std_min_element_f(_First, _Last, false))); + return const_cast<_Ty*>(static_cast(::__std_min_element_f(_First, _Last, false))); } else if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, double> || _STD is_same_v<_STD remove_const_t<_Ty>, long double>) { - return const_cast<_Ty*>(static_cast(__std_min_element_d(_First, _Last, false))); + return const_cast<_Ty*>(static_cast(::__std_min_element_d(_First, _Last, false))); } else if constexpr (sizeof(_Ty) == 1) { return const_cast<_Ty*>(static_cast(::__std_min_element_1(_First, _Last, _Signed))); } else if constexpr (sizeof(_Ty) == 2) { From 9e87e915917c917f98d0937dc73a2a77a956bcdf Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Mon, 25 Dec 2023 17:10:33 +0200 Subject: [PATCH 25/37] ADL-wary --- stl/inc/algorithm | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index e9ded94e6f..9b3fbf101d 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -71,10 +71,10 @@ _STD pair<_Ty*, _Ty*> __std_minmax_element(_Ty* _First, _Ty* _Last) noexcept { _Min_max_element_t _Res; if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, float>) { - _Res = __std_minmax_element_f(_First, _Last, false); + _Res = ::__std_minmax_element_f(_First, _Last, false); } else if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, double> || _STD is_same_v<_STD remove_const_t<_Ty>, long double>) { - _Res = __std_minmax_element_d(_First, _Last, false); + _Res = ::__std_minmax_element_d(_First, _Last, false); } else if constexpr (sizeof(_Ty) == 1) { _Res = ::__std_minmax_element_1(_First, _Last, _Signed); } else if constexpr (sizeof(_Ty) == 2) { From 43f7d92e7917b788e39db91df73dba7fbbe467f2 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 30 Jan 2024 22:42:35 -0800 Subject: [PATCH 26/37] Use `_Is_any_of_v`. --- stl/inc/algorithm | 3 +-- stl/inc/xutility | 6 ++---- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 476c51fcbc..32c9e17a46 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -72,8 +72,7 @@ _STD pair<_Ty*, _Ty*> __std_minmax_element(_Ty* _First, _Ty* _Last) noexcept { if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, float>) { _Res = ::__std_minmax_element_f(_First, _Last, false); - } else if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, double> - || _STD is_same_v<_STD remove_const_t<_Ty>, long double>) { + } else if constexpr (_STD _Is_any_of_v<_STD remove_const_t<_Ty>, double, long double>) { _Res = ::__std_minmax_element_d(_First, _Last, false); } else if constexpr (sizeof(_Ty) == 1) { _Res = ::__std_minmax_element_1(_First, _Last, _Signed); diff --git a/stl/inc/xutility b/stl/inc/xutility index 6507e09c90..5dd72d4831 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -176,8 +176,7 @@ _Ty* __std_min_element(_Ty* _First, _Ty* _Last) noexcept { if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, float>) { return const_cast<_Ty*>(static_cast(::__std_min_element_f(_First, _Last, false))); - } else if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, double> - || _STD is_same_v<_STD remove_const_t<_Ty>, long double>) { + } else if constexpr (_STD _Is_any_of_v<_STD remove_const_t<_Ty>, double, long double>) { return const_cast<_Ty*>(static_cast(::__std_min_element_d(_First, _Last, false))); } else if constexpr (sizeof(_Ty) == 1) { return const_cast<_Ty*>(static_cast(::__std_min_element_1(_First, _Last, _Signed))); @@ -198,8 +197,7 @@ _Ty* __std_max_element(_Ty* _First, _Ty* _Last) noexcept { if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, float>) { return const_cast<_Ty*>(static_cast(::__std_max_element_f(_First, _Last, false))); - } else if constexpr (_STD is_same_v<_STD remove_const_t<_Ty>, double> - || _STD is_same_v<_STD remove_const_t<_Ty>, long double>) { + } else if constexpr (_STD _Is_any_of_v<_STD remove_const_t<_Ty>, double, long double>) { return const_cast<_Ty*>(static_cast(::__std_max_element_d(_First, _Last, false))); } else if constexpr (sizeof(_Ty) == 1) { return const_cast<_Ty*>(static_cast(::__std_max_element_1(_First, _Last, _Signed))); From 83035c0723a04d05c785d83facafad51a784b7e8 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 30 Jan 2024 22:50:23 -0800 Subject: [PATCH 27/37] Comment nitpicks. --- stl/inc/xutility | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index 5dd72d4831..a80be0b401 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -51,7 +51,7 @@ _STL_DISABLE_CLANG_WARNINGS #ifndef _USE_STD_VECTOR_FLOATING_ALGORITHMS #if _USE_STD_VECTOR_ALGORITHMS && !defined(_M_FP_EXCEPT) #define _USE_STD_VECTOR_FLOATING_ALGORITHMS 1 -#else // ^^^ use vector algorithms & fast math / not use vector algorithms or not use fast math vvv +#else // ^^^ use vector algorithms and fast math / not use vector algorithms or not use fast math vvv #define _USE_STD_VECTOR_FLOATING_ALGORITHMS 0 #endif // ^^^ not use vector algorithms or not use fast math ^^^ #else // ^^^ !defined(_USE_STD_VECTOR_FLOATING_ALGORITHMS) / defined(_USE_STD_VECTOR_FLOATING_ALGORITHMS) vvv @@ -6616,7 +6616,7 @@ _INLINE_VAR constexpr bool _Is_min_max_optimization_safe = // Activate the vecto is_same<_Elem, float>, is_same<_Elem, double>, #else // ^^^ 80-bit long double (not supported by MSVC in general, see GH-1316) / 64-bit long double vvv is_floating_point<_Elem>, // Element is floating point or... -#endif // ^^^ 64-bit long double ^^ +#endif // ^^^ 64-bit long double ^^^ #endif // _USE_STD_VECTOR_FLOATING_ALGORITHMS is_integral<_Elem>, is_pointer<_Elem>>, // ... integral or pointer type. disjunction< // And either of the following: From 30b8748ab6be20fd0fb65b9b641788bb473209b5 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 30 Jan 2024 22:59:42 -0800 Subject: [PATCH 28/37] Fix `#error` message, use "must imply" phrasing. --- stl/inc/xutility | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index a80be0b401..7803ea3d8c 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -56,7 +56,7 @@ _STL_DISABLE_CLANG_WARNINGS #endif // ^^^ not use vector algorithms or not use fast math ^^^ #else // ^^^ !defined(_USE_STD_VECTOR_FLOATING_ALGORITHMS) / defined(_USE_STD_VECTOR_FLOATING_ALGORITHMS) vvv #if _USE_STD_VECTOR_FLOATING_ALGORITHMS && !_USE_STD_VECTOR_ALGORITHMS -#error If _USE_STD_VECTOR_ALGORITHMS is set, _USE_STD_VECTOR_FLOATING_ALGORITHMS should also be set +#error _USE_STD_VECTOR_FLOATING_ALGORITHMS must imply _USE_STD_VECTOR_ALGORITHMS. #endif // _USE_STD_VECTOR_FLOATING_ALGORITHMS && !_USE_STD_VECTOR_ALGORITHMS #endif // ^^^ defined(_USE_STD_VECTOR_FLOATING_ALGORITHMS) ^^^ From d2b332051f387adef844e56940fb5bf6d1f29194 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 30 Jan 2024 23:42:51 -0800 Subject: [PATCH 29/37] Style: Unnamed `const bool` => `bool` --- stl/src/vector_algorithms.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 34db5f7d29..11ebe9a738 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -930,7 +930,7 @@ namespace { return _mm_loadu_ps(reinterpret_cast(_Src)); } - static __m128 _Sign_correction(const __m128 _Val, const bool) noexcept { + static __m128 _Sign_correction(const __m128 _Val, bool) noexcept { return _Val; } @@ -1021,7 +1021,7 @@ namespace { return _mm_loadu_pd(reinterpret_cast(_Src)); } - static __m128d _Sign_correction(const __m128d _Val, const bool) noexcept { + static __m128d _Sign_correction(const __m128d _Val, bool) noexcept { return _Val; } From eef60cef7a07fdd9f0190f13fc3b4738365f6b9d Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Tue, 30 Jan 2024 23:57:09 -0800 Subject: [PATCH 30/37] Style: Add newline. --- stl/src/vector_algorithms.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 11ebe9a738..66f8895758 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -1288,6 +1288,7 @@ namespace { } } #endif // !_M_ARM64EC + if constexpr (_Traits::_Is_floating) { if constexpr (_Mode == _Mode_min) { return _Min_tail(_First, _Last, _Res._Min, _Cur_min_val); From 00ba9737716c423b6f7fe4f735e032f93b1ab7f5 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 31 Jan 2024 00:07:18 -0800 Subject: [PATCH 31/37] `test_min_max_element_f` => `test_min_max_element_floating` --- tests/std/tests/VSO_0000000_vector_algorithms/test.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index a351916788..1581de2b61 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -179,7 +179,7 @@ void test_min_max_element(mt19937_64& gen) { } template -void test_min_max_element_f(mt19937_64& gen) { +void test_min_max_element_floating(mt19937_64& gen) { normal_distribution dis(0.0, 100000.0); constexpr auto input_of_input_size = dataCount / 2; @@ -392,9 +392,9 @@ void test_vector_algorithms(mt19937_64& gen) { test_min_max_element(gen); test_min_max_element(gen); - test_min_max_element_f(gen); - test_min_max_element_f(gen); - test_min_max_element_f(gen); + test_min_max_element_floating(gen); + test_min_max_element_floating(gen); + test_min_max_element_floating(gen); test_min_max_element_pointers(gen); From e9a76e44b7288fb07465f809cc3c1e3f94a7f0da Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 31 Jan 2024 00:12:22 -0800 Subject: [PATCH 32/37] Test ordinary negative values too. --- tests/std/tests/VSO_0000000_vector_algorithms/test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 1581de2b61..fb3a0c1a24 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -180,7 +180,7 @@ void test_min_max_element(mt19937_64& gen) { template void test_min_max_element_floating(mt19937_64& gen) { - normal_distribution dis(0.0, 100000.0); + normal_distribution dis(-100000.0, 100000.0); constexpr auto input_of_input_size = dataCount / 2; vector input_of_input(input_of_input_size); From 3e17d0572ae875677b6a163e2590f2662de39850 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 31 Jan 2024 00:17:49 -0800 Subject: [PATCH 33/37] Drop `static_cast` as `input_of_input` is `vector`. --- tests/std/tests/VSO_0000000_vector_algorithms/test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index fb3a0c1a24..77b54e2353 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -198,7 +198,7 @@ void test_min_max_element_floating(mt19937_64& gen) { input.reserve(dataCount); test_case_min_max_element(input); for (size_t attempts = 0; attempts < dataCount; ++attempts) { - input.push_back(static_cast(input_of_input[idx_dis(gen)])); + input.push_back(input_of_input[idx_dis(gen)]); test_case_min_max_element(input); } } From 40ca00b2fdc88d8acd474da8044632579bd7ae21 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 31 Jan 2024 01:01:56 -0800 Subject: [PATCH 34/37] Enable warnings when building the benchmarks. * Start with the usual: `/diagnostics:caret /W4 /WX` * I'm enabling the same off-by-default warnings as the STL's build (C4265 (non-virtual dtor), C5038 (data member init order)) and adding C5262 (implicit fallthrough). * Finally, let's add `/utf-8` in case we ever benchmark ``. --- benchmarks/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 3f3b919fee..1a5071ea58 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -61,6 +61,9 @@ endif() set(CMAKE_BUILD_TYPE RelWithDebInfo) +# /utf-8 affects . +add_compile_options("$<$:/diagnostics:caret;/W4;/WX;/w14265;/w15038;/w15262;/utf-8>") + if(NOT EXISTS "${CMAKE_CURRENT_LIST_DIR}/google-benchmark/.git") message(FATAL_ERROR "google-benchmark is not checked out; make sure to run\n git submodule update --init benchmarks/google-benchmark") endif() From af4df71e8c32da03e69e72b8061ba7bd1e6790da Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 31 Jan 2024 01:24:42 -0800 Subject: [PATCH 35/37] Fix truncation warnings in benchmarks. --- benchmarks/inc/utility.hpp | 5 +++++ benchmarks/src/minmax_element.cpp | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/benchmarks/inc/utility.hpp b/benchmarks/inc/utility.hpp index e407e2654c..c0a86f892b 100644 --- a/benchmarks/inc/utility.hpp +++ b/benchmarks/inc/utility.hpp @@ -18,6 +18,11 @@ std::vector random_vector(size_t n) { xoshiro256ss prng{id64(rd), id64(rd), id64(rd), id64(rd)}; std::vector res(n); + +#pragma warning(push) +#pragma warning(disable : 4244) // conversion from 'uint64_t' to 'Contained', possible loss of data std::generate(res.begin(), res.end(), [&prng] { return static_cast(prng.next()); }); +#pragma warning(pop) + return res; } diff --git a/benchmarks/src/minmax_element.cpp b/benchmarks/src/minmax_element.cpp index baba47865c..da6cf9aff4 100644 --- a/benchmarks/src/minmax_element.cpp +++ b/benchmarks/src/minmax_element.cpp @@ -28,7 +28,7 @@ void bm(benchmark::State& state) { ranges::generate(a, [&] { return dis(gen); }); } else { uniform_int_distribution> dis(1, 20); - ranges::generate(a, [&] { return dis(gen); }); + ranges::generate(a, [&] { return static_cast(dis(gen)); }); } for (auto _ : state) { From bf797871a12f90704f48b07e737fc15901f9c7fa Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 31 Jan 2024 02:16:07 -0800 Subject: [PATCH 36/37] Fix x86 size_t truncation warnings in the vector.bool benchmarks. --- .../std/containers/sequences/vector.bool/copy/test.cpp | 9 +++++---- .../std/containers/sequences/vector.bool/copy_n/test.cpp | 9 +++++---- .../std/containers/sequences/vector.bool/move/test.cpp | 9 +++++---- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/benchmarks/src/std/containers/sequences/vector.bool/copy/test.cpp b/benchmarks/src/std/containers/sequences/vector.bool/copy/test.cpp index c5c0777526..18e1824451 100644 --- a/benchmarks/src/std/containers/sequences/vector.bool/copy/test.cpp +++ b/benchmarks/src/std/containers/sequences/vector.bool/copy/test.cpp @@ -4,6 +4,7 @@ #include // #include +#include #include #include @@ -17,7 +18,7 @@ static vector createRandomVector(const size_t size) { } static void copy_block_aligned(benchmark::State& state) { - const auto size = state.range(0); + const auto size = static_cast(state.range(0)); const vector source = createRandomVector(size); vector dest(size, false); @@ -27,7 +28,7 @@ static void copy_block_aligned(benchmark::State& state) { } static void copy_source_misaligned(benchmark::State& state) { - const auto size = state.range(0); + const auto size = static_cast(state.range(0)); const vector source = createRandomVector(size); vector dest(size, false); @@ -37,7 +38,7 @@ static void copy_source_misaligned(benchmark::State& state) { } static void copy_dest_misaligned(benchmark::State& state) { - const auto size = state.range(0); + const auto size = static_cast(state.range(0)); const vector source = createRandomVector(size); vector dest(size, false); @@ -48,7 +49,7 @@ static void copy_dest_misaligned(benchmark::State& state) { // Special benchmark for matching char alignment static void copy_matching_alignment(benchmark::State& state) { - const auto size = state.range(0); + const auto size = static_cast(state.range(0)); const vector source = createRandomVector(size); vector dest(size, false); diff --git a/benchmarks/src/std/containers/sequences/vector.bool/copy_n/test.cpp b/benchmarks/src/std/containers/sequences/vector.bool/copy_n/test.cpp index 5a4babf5c5..fe52c7036c 100644 --- a/benchmarks/src/std/containers/sequences/vector.bool/copy_n/test.cpp +++ b/benchmarks/src/std/containers/sequences/vector.bool/copy_n/test.cpp @@ -4,6 +4,7 @@ #include // #include +#include #include #include @@ -17,7 +18,7 @@ static vector createRandomVector(const size_t size) { } static void copy_n_block_aligned(benchmark::State& state) { - const auto size = state.range(0); + const auto size = static_cast(state.range(0)); const vector source = createRandomVector(size); vector dest(size, false); @@ -27,7 +28,7 @@ static void copy_n_block_aligned(benchmark::State& state) { } static void copy_n_source_misaligned(benchmark::State& state) { - const auto size = state.range(0); + const auto size = static_cast(state.range(0)); const vector source = createRandomVector(size); vector dest(size, false); @@ -37,7 +38,7 @@ static void copy_n_source_misaligned(benchmark::State& state) { } static void copy_n_dest_misaligned(benchmark::State& state) { - const auto size = state.range(0); + const auto size = static_cast(state.range(0)); const vector source = createRandomVector(size); vector dest(size, false); @@ -48,7 +49,7 @@ static void copy_n_dest_misaligned(benchmark::State& state) { // Special benchmark for matching char alignment static void copy_n_matching_alignment(benchmark::State& state) { - const auto size = state.range(0); + const auto size = static_cast(state.range(0)); const vector source = createRandomVector(size); vector dest(size, false); diff --git a/benchmarks/src/std/containers/sequences/vector.bool/move/test.cpp b/benchmarks/src/std/containers/sequences/vector.bool/move/test.cpp index db79791788..7eefc268ec 100644 --- a/benchmarks/src/std/containers/sequences/vector.bool/move/test.cpp +++ b/benchmarks/src/std/containers/sequences/vector.bool/move/test.cpp @@ -4,6 +4,7 @@ #include // #include +#include #include #include @@ -17,7 +18,7 @@ static vector createRandomVector(const size_t size) { } static void move_block_aligned(benchmark::State& state) { - const auto size = state.range(0); + const auto size = static_cast(state.range(0)); const vector source = createRandomVector(size); vector dest(size, false); @@ -27,7 +28,7 @@ static void move_block_aligned(benchmark::State& state) { } static void move_source_misaligned(benchmark::State& state) { - const auto size = state.range(0); + const auto size = static_cast(state.range(0)); const vector source = createRandomVector(size); vector dest(size, false); @@ -37,7 +38,7 @@ static void move_source_misaligned(benchmark::State& state) { } static void move_dest_misaligned(benchmark::State& state) { - const auto size = state.range(0); + const auto size = static_cast(state.range(0)); const vector source = createRandomVector(size); vector dest(size, false); @@ -48,7 +49,7 @@ static void move_dest_misaligned(benchmark::State& state) { // Special benchmark for matching char alignment static void move_matching_alignment(benchmark::State& state) { - const auto size = state.range(0); + const auto size = static_cast(state.range(0)); const vector source = createRandomVector(size); vector dest(size, false); From 358dd222f86147a865311c04d01f5d975d8b91e8 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 31 Jan 2024 02:46:13 -0800 Subject: [PATCH 37/37] Revert enabling warnings for benchmarks. Keep the fix within minmax_element.cpp, though. --- benchmarks/CMakeLists.txt | 3 --- benchmarks/inc/utility.hpp | 5 ----- .../std/containers/sequences/vector.bool/copy/test.cpp | 9 ++++----- .../std/containers/sequences/vector.bool/copy_n/test.cpp | 9 ++++----- .../std/containers/sequences/vector.bool/move/test.cpp | 9 ++++----- 5 files changed, 12 insertions(+), 23 deletions(-) diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 1a5071ea58..3f3b919fee 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -61,9 +61,6 @@ endif() set(CMAKE_BUILD_TYPE RelWithDebInfo) -# /utf-8 affects . -add_compile_options("$<$:/diagnostics:caret;/W4;/WX;/w14265;/w15038;/w15262;/utf-8>") - if(NOT EXISTS "${CMAKE_CURRENT_LIST_DIR}/google-benchmark/.git") message(FATAL_ERROR "google-benchmark is not checked out; make sure to run\n git submodule update --init benchmarks/google-benchmark") endif() diff --git a/benchmarks/inc/utility.hpp b/benchmarks/inc/utility.hpp index c0a86f892b..e407e2654c 100644 --- a/benchmarks/inc/utility.hpp +++ b/benchmarks/inc/utility.hpp @@ -18,11 +18,6 @@ std::vector random_vector(size_t n) { xoshiro256ss prng{id64(rd), id64(rd), id64(rd), id64(rd)}; std::vector res(n); - -#pragma warning(push) -#pragma warning(disable : 4244) // conversion from 'uint64_t' to 'Contained', possible loss of data std::generate(res.begin(), res.end(), [&prng] { return static_cast(prng.next()); }); -#pragma warning(pop) - return res; } diff --git a/benchmarks/src/std/containers/sequences/vector.bool/copy/test.cpp b/benchmarks/src/std/containers/sequences/vector.bool/copy/test.cpp index 18e1824451..c5c0777526 100644 --- a/benchmarks/src/std/containers/sequences/vector.bool/copy/test.cpp +++ b/benchmarks/src/std/containers/sequences/vector.bool/copy/test.cpp @@ -4,7 +4,6 @@ #include // #include -#include #include #include @@ -18,7 +17,7 @@ static vector createRandomVector(const size_t size) { } static void copy_block_aligned(benchmark::State& state) { - const auto size = static_cast(state.range(0)); + const auto size = state.range(0); const vector source = createRandomVector(size); vector dest(size, false); @@ -28,7 +27,7 @@ static void copy_block_aligned(benchmark::State& state) { } static void copy_source_misaligned(benchmark::State& state) { - const auto size = static_cast(state.range(0)); + const auto size = state.range(0); const vector source = createRandomVector(size); vector dest(size, false); @@ -38,7 +37,7 @@ static void copy_source_misaligned(benchmark::State& state) { } static void copy_dest_misaligned(benchmark::State& state) { - const auto size = static_cast(state.range(0)); + const auto size = state.range(0); const vector source = createRandomVector(size); vector dest(size, false); @@ -49,7 +48,7 @@ static void copy_dest_misaligned(benchmark::State& state) { // Special benchmark for matching char alignment static void copy_matching_alignment(benchmark::State& state) { - const auto size = static_cast(state.range(0)); + const auto size = state.range(0); const vector source = createRandomVector(size); vector dest(size, false); diff --git a/benchmarks/src/std/containers/sequences/vector.bool/copy_n/test.cpp b/benchmarks/src/std/containers/sequences/vector.bool/copy_n/test.cpp index fe52c7036c..5a4babf5c5 100644 --- a/benchmarks/src/std/containers/sequences/vector.bool/copy_n/test.cpp +++ b/benchmarks/src/std/containers/sequences/vector.bool/copy_n/test.cpp @@ -4,7 +4,6 @@ #include // #include -#include #include #include @@ -18,7 +17,7 @@ static vector createRandomVector(const size_t size) { } static void copy_n_block_aligned(benchmark::State& state) { - const auto size = static_cast(state.range(0)); + const auto size = state.range(0); const vector source = createRandomVector(size); vector dest(size, false); @@ -28,7 +27,7 @@ static void copy_n_block_aligned(benchmark::State& state) { } static void copy_n_source_misaligned(benchmark::State& state) { - const auto size = static_cast(state.range(0)); + const auto size = state.range(0); const vector source = createRandomVector(size); vector dest(size, false); @@ -38,7 +37,7 @@ static void copy_n_source_misaligned(benchmark::State& state) { } static void copy_n_dest_misaligned(benchmark::State& state) { - const auto size = static_cast(state.range(0)); + const auto size = state.range(0); const vector source = createRandomVector(size); vector dest(size, false); @@ -49,7 +48,7 @@ static void copy_n_dest_misaligned(benchmark::State& state) { // Special benchmark for matching char alignment static void copy_n_matching_alignment(benchmark::State& state) { - const auto size = static_cast(state.range(0)); + const auto size = state.range(0); const vector source = createRandomVector(size); vector dest(size, false); diff --git a/benchmarks/src/std/containers/sequences/vector.bool/move/test.cpp b/benchmarks/src/std/containers/sequences/vector.bool/move/test.cpp index 7eefc268ec..db79791788 100644 --- a/benchmarks/src/std/containers/sequences/vector.bool/move/test.cpp +++ b/benchmarks/src/std/containers/sequences/vector.bool/move/test.cpp @@ -4,7 +4,6 @@ #include // #include -#include #include #include @@ -18,7 +17,7 @@ static vector createRandomVector(const size_t size) { } static void move_block_aligned(benchmark::State& state) { - const auto size = static_cast(state.range(0)); + const auto size = state.range(0); const vector source = createRandomVector(size); vector dest(size, false); @@ -28,7 +27,7 @@ static void move_block_aligned(benchmark::State& state) { } static void move_source_misaligned(benchmark::State& state) { - const auto size = static_cast(state.range(0)); + const auto size = state.range(0); const vector source = createRandomVector(size); vector dest(size, false); @@ -38,7 +37,7 @@ static void move_source_misaligned(benchmark::State& state) { } static void move_dest_misaligned(benchmark::State& state) { - const auto size = static_cast(state.range(0)); + const auto size = state.range(0); const vector source = createRandomVector(size); vector dest(size, false); @@ -49,7 +48,7 @@ static void move_dest_misaligned(benchmark::State& state) { // Special benchmark for matching char alignment static void move_matching_alignment(benchmark::State& state) { - const auto size = static_cast(state.range(0)); + const auto size = state.range(0); const vector source = createRandomVector(size); vector dest(size, false);