From 9c4e4c24ee99d1e8a53fb4241f550b16073a6c72 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Wed, 20 Mar 2024 07:01:50 +0200 Subject: [PATCH 01/36] mismatch vectorization --- benchmarks/CMakeLists.txt | 1 + benchmarks/src/mismatch.cpp | 36 +++++++ stl/inc/algorithm | 27 +++++ stl/inc/xutility | 22 ++++ stl/src/vector_algorithms.cpp | 76 +++++++++++++ .../VSO_0000000_vector_algorithms/test.cpp | 100 ++++++++++++++++++ 6 files changed, 262 insertions(+) create mode 100644 benchmarks/src/mismatch.cpp diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 2903ee1324..b6542d1fba 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -112,6 +112,7 @@ add_benchmark(bitset_to_string src/bitset_to_string.cpp) add_benchmark(find_and_count src/find_and_count.cpp) add_benchmark(locale_classic src/locale_classic.cpp) add_benchmark(minmax_element src/minmax_element.cpp) +add_benchmark(mismatch src/mismatch.cpp) add_benchmark(path_lexically_normal src/path_lexically_normal.cpp) add_benchmark(priority_queue_push_range src/priority_queue_push_range.cpp) add_benchmark(random_integer_generation src/random_integer_generation.cpp) diff --git a/benchmarks/src/mismatch.cpp b/benchmarks/src/mismatch.cpp new file mode 100644 index 0000000000..b6b293d155 --- /dev/null +++ b/benchmarks/src/mismatch.cpp @@ -0,0 +1,36 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include +#include +#include + +using namespace std; + +constexpr int64_t no_pos = -1; + +template +void bm(benchmark::State& state) { + vector a(static_cast(state.range(0)), T{'.'}); + vector b(static_cast(state.range(0)), T{'.'}); + + if (state.range(1) != no_pos) { + b.at(static_cast(state.range(1))) = 'x'; + } + + for (auto _ : state) { + benchmark::DoNotOptimize(ranges::mismatch(a, b)); + } +} + +#define COMMON_ARGS Args({8, 3})->Args({24, 22})->Args({105, -1})->Args({4021, 3056}) + +BENCHMARK(bm)->COMMON_ARGS; +BENCHMARK(bm)->COMMON_ARGS; +BENCHMARK(bm)->COMMON_ARGS; +BENCHMARK(bm)->COMMON_ARGS; + +BENCHMARK_MAIN(); diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 06152ed95d..d973833d26 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -641,6 +641,19 @@ _NODISCARD _CONSTEXPR20 pair<_InIt1, _InIt2> mismatch(_InIt1 _First1, const _InI auto _UFirst1 = _STD _Get_unwrapped(_First1); const auto _ULast1 = _STD _Get_unwrapped(_Last1); auto _UFirst2 = _STD _Get_unwrapped_n(_First2, _STD _Idl_distance<_InIt1>(_UFirst1, _ULast1)); +#if _USE_STD_VECTOR_ALGORITHMS + if constexpr (_Equal_memcmp_is_safe) { + using _Ty = _Iter_value_t<_InIt1>; + const auto _First1_ptr = _STD _To_address(_UFirst1); + const auto _First2_ptr = _STD _To_address(_UFirst2); + const auto _Count_el = static_cast(_ULast1 - _UFirst1); + + const auto _Skip = __std_mismatch_byte_helper(_First1_ptr, _First2_ptr, _Count_el * sizeof(_Ty)) / sizeof(_Ty); + + _UFirst1 += _Skip; + _UFirst2 += _Skip; + } +#endif // ^^^ !_USE_STD_VECTOR_ALGORITHMS ^^^ while (_UFirst1 != _ULast1 && _Pred(*_UFirst1, *_UFirst2)) { ++_UFirst1; ++_UFirst2; @@ -688,6 +701,20 @@ _NODISCARD _CONSTEXPR20 pair<_InIt1, _InIt2> mismatch( const _CT _Count2 = _ULast2 - _UFirst2; const auto _Count = static_cast<_Iter_diff_t<_InIt1>>((_STD min)(_Count1, _Count2)); _ULast1 = _UFirst1 + _Count; +#if _USE_STD_VECTOR_ALGORITHMS + if constexpr (_Equal_memcmp_is_safe) { + using _Ty = _Iter_value_t<_InIt1>; + const auto _First1_ptr = _STD _To_address(_UFirst1); + const auto _First2_ptr = _STD _To_address(_UFirst2); + const auto _Count_el = static_cast(_Count); + + const auto _Skip = + __std_mismatch_byte_helper(_First1_ptr, _First2_ptr, _Count_el * sizeof(_Ty)) / sizeof(_Ty); + + _UFirst1 += _Skip; + _UFirst2 += _Skip; + } +#endif // ^^^ !_USE_STD_VECTOR_ALGORITHMS ^^^ while (_UFirst1 != _ULast1 && _Pred(*_UFirst1, *_UFirst2)) { ++_UFirst1; ++_UFirst2; diff --git a/stl/inc/xutility b/stl/inc/xutility index 260c44f211..52835a2cd3 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -124,6 +124,13 @@ __declspec(noalias) int64_t __stdcall __std_max_8i(const void* _First, const voi __declspec(noalias) uint64_t __stdcall __std_max_8u(const void* _First, const void* _Last) noexcept; __declspec(noalias) float __stdcall __std_max_f(const void* _First, const void* _Last) noexcept; __declspec(noalias) double __stdcall __std_max_d(const void* _First, const void* _Last) noexcept; + +// Returns the position to which 'mismatch' can fast forward. +// This position can be the first mismatched byte, or an earlier position. +// The purpose is to handle only the portion that can be vectorized in a more efficient way, +// than element wise comparison, without element size knowledge. +__declspec(noalias) size_t + __stdcall __std_mismatch_byte_helper(const void* _First1, const void* _First2, size_t _Count_bytes); } // extern "C" _STD_BEGIN @@ -5477,6 +5484,21 @@ namespace ranges { _NODISCARD constexpr mismatch_result<_It1, _It2> _Mismatch_n( _It1 _First1, _It2 _First2, iter_difference_t<_It1> _Count, _Pr _Pred, _Pj1 _Proj1, _Pj2 _Proj2) { _STL_INTERNAL_CHECK(_Count >= 0); +#if _USE_STD_VECTOR_ALGORITHMS + if constexpr (_Equal_memcmp_is_safe<_It1, _It2, _Pr> && is_same_v<_Pj1, identity> + && is_same_v<_Pj2, identity>) { + using _Ty = iter_value_t<_It1>; + const auto _First1_ptr = _STD _To_address(_First1); + const auto _First2_ptr = _STD _To_address(_First2); + const auto _Count_el = static_cast(_Count); + const auto _Skip = + __std_mismatch_byte_helper(_First1_ptr, _First2_ptr, _Count_el * sizeof(_Ty)) / sizeof(_Ty); + + _First1 += _Skip; + _First2 += _Skip; + _Count -= _Skip; + } +#endif // ^^^ !_USE_STD_VECTOR_ALGORITHMS ^^^ for (; _Count != 0; ++_First1, (void) ++_First2, --_Count) { if (!_STD invoke(_Pred, _STD invoke(_Proj1, *_First1), _STD invoke(_Proj2, *_First2))) { break; diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index d1e2b654e4..7dd6e222b5 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -88,6 +88,24 @@ namespace { void _Advance_bytes(const void*& _Target, _Integral _Offset) noexcept { _Target = static_cast(_Target) + _Offset; } + + __m256i _Avx2_tail_mask_32(const size_t _Count_in_dwords) { + constexpr unsigned int _Dx = 0; // All zeros 32 bit mask + constexpr unsigned int _Ex = ~_Dx; // All ones 32 bit mask + // clang-format off + static alignas(32) constexpr unsigned int _Tail_masks[8][8] = { + {_Dx, _Dx, _Dx, _Dx, _Dx, _Dx, _Dx, _Dx}, + {_Ex, _Dx, _Dx, _Dx, _Dx, _Dx, _Dx, _Dx}, + {_Ex, _Ex, _Dx, _Dx, _Dx, _Dx, _Dx, _Dx}, + {_Ex, _Ex, _Ex, _Dx, _Dx, _Dx, _Dx, _Dx}, + {_Ex, _Ex, _Ex, _Ex, _Dx, _Dx, _Dx, _Dx}, + {_Ex, _Ex, _Ex, _Ex, _Ex, _Dx, _Dx, _Dx}, + {_Ex, _Ex, _Ex, _Ex, _Ex, _Ex, _Dx, _Dx}, + {_Ex, _Ex, _Ex, _Ex, _Ex, _Ex, _Ex, _Dx}, + }; + // clang-format on + return _mm256_load_si256(reinterpret_cast(_Tail_masks[_Count_in_dwords])); + } } // unnamed namespace extern "C" { @@ -2299,5 +2317,63 @@ __declspec(noalias) void __stdcall __std_bitset_to_string_2( } } +__declspec(noalias) size_t __stdcall __std_mismatch_byte_helper( + const void* const _First1, const void* const _First2, const size_t _Count_bytes) { +#ifndef _M_ARM64EC + const auto _First1_ch = static_cast(_First1); + const auto _First2_ch = static_cast(_First2); + + if (_Use_avx2()) { + const size_t _Count_bytes_avx_full = _Count_bytes & ~size_t{0x1F}; + + size_t _Result = 0; + for (; _Result != _Count_bytes_avx_full; _Result += 0x20) { + const __m256i _Elem1 = _mm256_loadu_si256(reinterpret_cast(_First1_ch + _Result)); + const __m256i _Elem2 = _mm256_loadu_si256(reinterpret_cast(_First2_ch + _Result)); + const auto _Bingo = ~static_cast(_mm256_movemask_epi8(_mm256_cmpeq_epi8(_Elem1, _Elem2))); + if (_Bingo != 0) { + return _Result + _tzcnt_u32(_Bingo); + } + } + + size_t _Count_tail = _Count_bytes & size_t{0x1C}; + + if (_Count_tail == 0) { + return _Result; + } + + const __m256i _Tail_mask = _Avx2_tail_mask_32(_Count_tail >> 2); + const __m256i _Elem1 = _mm256_maskload_epi32(reinterpret_cast(_First1_ch + _Result), _Tail_mask); + const __m256i _Elem2 = _mm256_maskload_epi32(reinterpret_cast(_First2_ch + _Result), _Tail_mask); + + const auto _Bingo = ~static_cast(_mm256_movemask_epi8(_mm256_cmpeq_epi8(_Elem1, _Elem2))); + if (_Bingo != 0) { + return _Result + _tzcnt_u32(_Bingo); + } + + return _Result + _Count_tail; + } else if (_Use_sse2()) { + const size_t _Count_bytes_sse = _Count_bytes & ~size_t{0xF}; + + size_t _Result = 0; + for (; _Result != _Count_bytes_sse; _Result += 0xF) { + const __m128i _Elem1 = _mm_loadu_si128(reinterpret_cast(_First1_ch + _Result)); + const __m128i _Elem2 = _mm_loadu_si128(reinterpret_cast(_First2_ch + _Result)); + const auto _Bingo = ~static_cast(_mm_movemask_epi8(_mm_cmpeq_epi8(_Elem1, _Elem2))); + if (_Bingo != 0) { + unsigned long _Offset; + _BitScanForward(&_Offset, _Bingo); // lgtm [cpp/conditionallyuninitializedvariable] + return _Result + _Offset; + } + } + + return _Result; + } else +#endif // !defined(_M_ARM64EC) + { + return 0; + } +} + } // extern "C" #endif // defined(_M_IX86) || defined(_M_X64) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 510c7b9935..4193e239a2 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -313,6 +313,88 @@ void test_min_max_element_special_cases() { == v.begin() + 2 * block_size_in_elements + last_vector_first_elem + 9); } +template +auto last_known_good_mismatch(FwdIt first1, FwdIt last1, FwdIt first2, FwdIt last2) { + for (; first1 != last1 && first2 != last2; ++first1, ++first2) { + if (*first1 != *first2) { + break; + } + } + + return std::make_pair(first1, first2); +} + +template +void test_case_mismatch(const vector& a, const vector& b) { + auto expected = last_known_good_mismatch(a.begin(), a.end(), b.begin(), b.end()); + auto actual = mismatch(a.begin(), a.end(), b.begin(), b.end()); + assert(expected == actual); +#if _HAS_CXX20 + auto ranges_actual = ranges::mismatch(a, b); + assert(get<0>(expected) == ranges_actual.in1); + assert(get<1>(expected) == ranges_actual.in2); +#endif // _HAS_CXX20 +} + +template +void test_mismatch(mt19937_64& gen) { + constexpr size_t shrinkCount = 4; + constexpr size_t mismatchCount = 30; + using TD = conditional_t; + uniform_int_distribution dis('a', 'z'); + vector input_a; + vector input_b; + input_a.reserve(dataCount); + input_b.reserve(dataCount); + + for (;;) { + // equal + test_case_mismatch(input_a, input_b); + + // different sizes + for (size_t i = 0; i != shrinkCount && !input_b.empty(); ++i) { + test_case_mismatch(input_a, input_b); + test_case_mismatch(input_b, input_a); + } + + // actual mismatch (or maybe not, depending on random) + if (!input_b.empty()) { + uniform_int_distribution mismatch_dis(0, input_a.size() - 1); + + for (size_t attempts = 0; attempts < mismatchCount; ++attempts) { + size_t possible_mismatch_pos = mismatch_dis(gen); + input_a[possible_mismatch_pos] = static_cast(dis(gen)); + test_case_mismatch(input_a, input_b); + test_case_mismatch(input_b, input_a); + } + } + + if (input_a.size() == dataCount) { + break; + } + + input_a.push_back(static_cast(dis(gen))); + input_b = input_a; + } +} + +template +void test_mismatch_containers() { + C1 a{'m', 'e', 'o', 'w', ' ', 'C', 'A', 'T', 'S'}; + C2 b{'m', 'e', 'o', 'w', ' ', 'K', 'I', 'T', 'T', 'E', 'N', 'S'}; + const auto result_4 = mismatch(a.begin(), a.end(), b.begin(), b.end()); + const auto result_3 = mismatch(a.begin(), a.end(), b.begin()); + assert(get<0>(result_4) == a.begin() + 5); + assert(get<1>(result_4) == b.begin() + 5); + assert(get<0>(result_3) == a.begin() + 5); + assert(get<1>(result_3) == b.begin() + 5); +#if _HAS_CXX20 + const auto result_r = ranges::mismatch(a, b); + assert(result_r.in1 == a.begin() + 5); + assert(result_r.in2 == b.begin() + 5); +#endif // _HAS_CXX_20 +} + template void last_known_good_reverse(BidIt first, BidIt last) { for (; first != last && first != --last; ++first) { @@ -464,6 +546,24 @@ void test_vector_algorithms(mt19937_64& gen) { test_case_min_max_element( vector{-6604286336755016904, -4365366089374418225, 6104371530830675888, -8582621853879131834}); + test_mismatch(gen); + test_mismatch(gen); + test_mismatch(gen); + test_mismatch(gen); + test_mismatch(gen); + test_mismatch(gen); + test_mismatch(gen); + test_mismatch(gen); + test_mismatch(gen); + + test_mismatch_containers, vector>(); + test_mismatch_containers, vector>(); + test_mismatch_containers, vector>(); + test_mismatch_containers, const vector>(); + test_mismatch_containers, const vector>(); + test_mismatch_containers, vector>(); + test_mismatch_containers, vector>(); + test_reverse(gen); test_reverse(gen); test_reverse(gen); From 4c008fd713ab47a1f8eb7e6379df425b96d9d77f Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Thu, 21 Mar 2024 11:17:34 +0200 Subject: [PATCH 02/36] format --- benchmarks/src/mismatch.cpp | 4 ++-- tests/std/tests/VSO_0000000_vector_algorithms/test.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/benchmarks/src/mismatch.cpp b/benchmarks/src/mismatch.cpp index b6b293d155..f6a3069ac2 100644 --- a/benchmarks/src/mismatch.cpp +++ b/benchmarks/src/mismatch.cpp @@ -12,8 +12,8 @@ using namespace std; constexpr int64_t no_pos = -1; -template -void bm(benchmark::State& state) { +template +void bm(benchmark::State& state) { vector a(static_cast(state.range(0)), T{'.'}); vector b(static_cast(state.range(0)), T{'.'}); diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 4193e239a2..98c6c15c40 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -327,7 +327,7 @@ auto last_known_good_mismatch(FwdIt first1, FwdIt last1, FwdIt first2, FwdIt las template void test_case_mismatch(const vector& a, const vector& b) { auto expected = last_known_good_mismatch(a.begin(), a.end(), b.begin(), b.end()); - auto actual = mismatch(a.begin(), a.end(), b.begin(), b.end()); + auto actual = mismatch(a.begin(), a.end(), b.begin(), b.end()); assert(expected == actual); #if _HAS_CXX20 auto ranges_actual = ranges::mismatch(a, b); From b69c04c89ce4b4bc63859a71ff14fb0c8e03e874 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Thu, 21 Mar 2024 11:26:09 +0200 Subject: [PATCH 03/36] more format --- stl/src/vector_algorithms.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 7dd6e222b5..aea02dfc47 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -88,7 +88,7 @@ namespace { void _Advance_bytes(const void*& _Target, _Integral _Offset) noexcept { _Target = static_cast(_Target) + _Offset; } - + __m256i _Avx2_tail_mask_32(const size_t _Count_in_dwords) { constexpr unsigned int _Dx = 0; // All zeros 32 bit mask constexpr unsigned int _Ex = ~_Dx; // All ones 32 bit mask From 38633a0fcde5b28b08d5c9dedbe7a218883bc37d Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Thu, 21 Mar 2024 11:32:45 +0200 Subject: [PATCH 04/36] wrong step --- stl/src/vector_algorithms.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index aea02dfc47..9ff2b900a9 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -2356,7 +2356,7 @@ __declspec(noalias) size_t __stdcall __std_mismatch_byte_helper( const size_t _Count_bytes_sse = _Count_bytes & ~size_t{0xF}; size_t _Result = 0; - for (; _Result != _Count_bytes_sse; _Result += 0xF) { + for (; _Result != _Count_bytes_sse; _Result += 0x10) { const __m128i _Elem1 = _mm_loadu_si128(reinterpret_cast(_First1_ch + _Result)); const __m128i _Elem2 = _mm_loadu_si128(reinterpret_cast(_First2_ch + _Result)); const auto _Bingo = ~static_cast(_mm_movemask_epi8(_mm_cmpeq_epi8(_Elem1, _Elem2))); From ac2786df0a2e9070dbbad6f0594c26fbaaee266c Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Thu, 21 Mar 2024 12:09:02 +0200 Subject: [PATCH 05/36] ADL --- stl/inc/algorithm | 5 +++-- stl/inc/xutility | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index d973833d26..cda0760d66 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -648,7 +648,8 @@ _NODISCARD _CONSTEXPR20 pair<_InIt1, _InIt2> mismatch(_InIt1 _First1, const _InI const auto _First2_ptr = _STD _To_address(_UFirst2); const auto _Count_el = static_cast(_ULast1 - _UFirst1); - const auto _Skip = __std_mismatch_byte_helper(_First1_ptr, _First2_ptr, _Count_el * sizeof(_Ty)) / sizeof(_Ty); + const auto _Skip = + ::__std_mismatch_byte_helper(_First1_ptr, _First2_ptr, _Count_el * sizeof(_Ty)) / sizeof(_Ty); _UFirst1 += _Skip; _UFirst2 += _Skip; @@ -709,7 +710,7 @@ _NODISCARD _CONSTEXPR20 pair<_InIt1, _InIt2> mismatch( const auto _Count_el = static_cast(_Count); const auto _Skip = - __std_mismatch_byte_helper(_First1_ptr, _First2_ptr, _Count_el * sizeof(_Ty)) / sizeof(_Ty); + ::__std_mismatch_byte_helper(_First1_ptr, _First2_ptr, _Count_el * sizeof(_Ty)) / sizeof(_Ty); _UFirst1 += _Skip; _UFirst2 += _Skip; diff --git a/stl/inc/xutility b/stl/inc/xutility index 52835a2cd3..a99e296bc7 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -5492,7 +5492,7 @@ namespace ranges { const auto _First2_ptr = _STD _To_address(_First2); const auto _Count_el = static_cast(_Count); const auto _Skip = - __std_mismatch_byte_helper(_First1_ptr, _First2_ptr, _Count_el * sizeof(_Ty)) / sizeof(_Ty); + ::__std_mismatch_byte_helper(_First1_ptr, _First2_ptr, _Count_el * sizeof(_Ty)) / sizeof(_Ty); _First1 += _Skip; _First2 += _Skip; From 2d0cff95b95a5f9e650706e9e33dcab02d61ee3e Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Thu, 21 Mar 2024 12:46:58 +0200 Subject: [PATCH 06/36] types --- stl/inc/algorithm | 8 ++++---- stl/inc/xutility | 5 +++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index cda0760d66..499fb1e1d7 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -648,8 +648,8 @@ _NODISCARD _CONSTEXPR20 pair<_InIt1, _InIt2> mismatch(_InIt1 _First1, const _InI const auto _First2_ptr = _STD _To_address(_UFirst2); const auto _Count_el = static_cast(_ULast1 - _UFirst1); - const auto _Skip = - ::__std_mismatch_byte_helper(_First1_ptr, _First2_ptr, _Count_el * sizeof(_Ty)) / sizeof(_Ty); + const auto _Skip = static_cast( + ::__std_mismatch_byte_helper(_First1_ptr, _First2_ptr, _Count_el * sizeof(_Ty)) / sizeof(_Ty)); _UFirst1 += _Skip; _UFirst2 += _Skip; @@ -709,8 +709,8 @@ _NODISCARD _CONSTEXPR20 pair<_InIt1, _InIt2> mismatch( const auto _First2_ptr = _STD _To_address(_UFirst2); const auto _Count_el = static_cast(_Count); - const auto _Skip = - ::__std_mismatch_byte_helper(_First1_ptr, _First2_ptr, _Count_el * sizeof(_Ty)) / sizeof(_Ty); + const auto _Skip = static_cast( + ::__std_mismatch_byte_helper(_First1_ptr, _First2_ptr, _Count_el * sizeof(_Ty)) / sizeof(_Ty)); _UFirst1 += _Skip; _UFirst2 += _Skip; diff --git a/stl/inc/xutility b/stl/inc/xutility index a99e296bc7..0f65d2dee2 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -5491,8 +5491,9 @@ namespace ranges { const auto _First1_ptr = _STD _To_address(_First1); const auto _First2_ptr = _STD _To_address(_First2); const auto _Count_el = static_cast(_Count); - const auto _Skip = - ::__std_mismatch_byte_helper(_First1_ptr, _First2_ptr, _Count_el * sizeof(_Ty)) / sizeof(_Ty); + + const auto _Skip = static_cast( + ::__std_mismatch_byte_helper(_First1_ptr, _First2_ptr, _Count_el * sizeof(_Ty)) / sizeof(_Ty)); _First1 += _Skip; _First2 += _Skip; From f45dbbdf71a4f24ccf34a403358c161a2053bb98 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Thu, 21 Mar 2024 12:54:29 +0200 Subject: [PATCH 07/36] constant --- stl/inc/algorithm | 36 ++++++++++++++++++++---------------- stl/inc/xutility | 20 +++++++++++--------- 2 files changed, 31 insertions(+), 25 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 499fb1e1d7..34a4195532 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -643,16 +643,18 @@ _NODISCARD _CONSTEXPR20 pair<_InIt1, _InIt2> mismatch(_InIt1 _First1, const _InI auto _UFirst2 = _STD _Get_unwrapped_n(_First2, _STD _Idl_distance<_InIt1>(_UFirst1, _ULast1)); #if _USE_STD_VECTOR_ALGORITHMS if constexpr (_Equal_memcmp_is_safe) { - using _Ty = _Iter_value_t<_InIt1>; - const auto _First1_ptr = _STD _To_address(_UFirst1); - const auto _First2_ptr = _STD _To_address(_UFirst2); - const auto _Count_el = static_cast(_ULast1 - _UFirst1); + if (!_STD _Is_constant_evaluated()) { + using _Ty = _Iter_value_t<_InIt1>; + const auto _First1_ptr = _STD _To_address(_UFirst1); + const auto _First2_ptr = _STD _To_address(_UFirst2); + const auto _Count_el = static_cast(_ULast1 - _UFirst1); - const auto _Skip = static_cast( - ::__std_mismatch_byte_helper(_First1_ptr, _First2_ptr, _Count_el * sizeof(_Ty)) / sizeof(_Ty)); + const auto _Skip = static_cast( + ::__std_mismatch_byte_helper(_First1_ptr, _First2_ptr, _Count_el * sizeof(_Ty)) / sizeof(_Ty)); - _UFirst1 += _Skip; - _UFirst2 += _Skip; + _UFirst1 += _Skip; + _UFirst2 += _Skip; + } } #endif // ^^^ !_USE_STD_VECTOR_ALGORITHMS ^^^ while (_UFirst1 != _ULast1 && _Pred(*_UFirst1, *_UFirst2)) { @@ -704,16 +706,18 @@ _NODISCARD _CONSTEXPR20 pair<_InIt1, _InIt2> mismatch( _ULast1 = _UFirst1 + _Count; #if _USE_STD_VECTOR_ALGORITHMS if constexpr (_Equal_memcmp_is_safe) { - using _Ty = _Iter_value_t<_InIt1>; - const auto _First1_ptr = _STD _To_address(_UFirst1); - const auto _First2_ptr = _STD _To_address(_UFirst2); - const auto _Count_el = static_cast(_Count); + if (!_STD _Is_constant_evaluated()) { + using _Ty = _Iter_value_t<_InIt1>; + const auto _First1_ptr = _STD _To_address(_UFirst1); + const auto _First2_ptr = _STD _To_address(_UFirst2); + const auto _Count_el = static_cast(_Count); - const auto _Skip = static_cast( - ::__std_mismatch_byte_helper(_First1_ptr, _First2_ptr, _Count_el * sizeof(_Ty)) / sizeof(_Ty)); + const auto _Skip = static_cast( + ::__std_mismatch_byte_helper(_First1_ptr, _First2_ptr, _Count_el * sizeof(_Ty)) / sizeof(_Ty)); - _UFirst1 += _Skip; - _UFirst2 += _Skip; + _UFirst1 += _Skip; + _UFirst2 += _Skip; + } } #endif // ^^^ !_USE_STD_VECTOR_ALGORITHMS ^^^ while (_UFirst1 != _ULast1 && _Pred(*_UFirst1, *_UFirst2)) { diff --git a/stl/inc/xutility b/stl/inc/xutility index 0f65d2dee2..6ffd25b12d 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -5487,17 +5487,19 @@ namespace ranges { #if _USE_STD_VECTOR_ALGORITHMS if constexpr (_Equal_memcmp_is_safe<_It1, _It2, _Pr> && is_same_v<_Pj1, identity> && is_same_v<_Pj2, identity>) { - using _Ty = iter_value_t<_It1>; - const auto _First1_ptr = _STD _To_address(_First1); - const auto _First2_ptr = _STD _To_address(_First2); - const auto _Count_el = static_cast(_Count); + if (!_STD is_constant_evaluated()) { + using _Ty = iter_value_t<_It1>; + const auto _First1_ptr = _STD _To_address(_First1); + const auto _First2_ptr = _STD _To_address(_First2); + const auto _Count_el = static_cast(_Count); - const auto _Skip = static_cast( - ::__std_mismatch_byte_helper(_First1_ptr, _First2_ptr, _Count_el * sizeof(_Ty)) / sizeof(_Ty)); + const auto _Skip = static_cast( + ::__std_mismatch_byte_helper(_First1_ptr, _First2_ptr, _Count_el * sizeof(_Ty)) / sizeof(_Ty)); - _First1 += _Skip; - _First2 += _Skip; - _Count -= _Skip; + _First1 += _Skip; + _First2 += _Skip; + _Count -= _Skip; + } } #endif // ^^^ !_USE_STD_VECTOR_ALGORITHMS ^^^ for (; _Count != 0; ++_First1, (void) ++_First2, --_Count) { From b0d6eced9377397a33b069336771b4cea11ff9ff Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Thu, 21 Mar 2024 14:23:40 +0200 Subject: [PATCH 08/36] let's have sized functions as usual --- stl/inc/algorithm | 34 ++++---- stl/inc/xutility | 38 +++++---- stl/src/vector_algorithms.cpp | 150 +++++++++++++++++++++------------- 3 files changed, 133 insertions(+), 89 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 34a4195532..1d3c034a3b 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -644,16 +644,17 @@ _NODISCARD _CONSTEXPR20 pair<_InIt1, _InIt2> mismatch(_InIt1 _First1, const _InI #if _USE_STD_VECTOR_ALGORITHMS if constexpr (_Equal_memcmp_is_safe) { if (!_STD _Is_constant_evaluated()) { - using _Ty = _Iter_value_t<_InIt1>; - const auto _First1_ptr = _STD _To_address(_UFirst1); - const auto _First2_ptr = _STD _To_address(_UFirst2); - const auto _Count_el = static_cast(_ULast1 - _UFirst1); + constexpr size_t _Elem_size = sizeof(_Iter_value_t<_InIt1>); - const auto _Skip = static_cast( - ::__std_mismatch_byte_helper(_First1_ptr, _First2_ptr, _Count_el * sizeof(_Ty)) / sizeof(_Ty)); + const auto _Pos = static_cast(_STD __std_mismatch<_Elem_size>( + _STD _To_address(_UFirst1), _STD _To_address(_UFirst2), static_cast(_ULast1 - _UFirst1))); - _UFirst1 += _Skip; - _UFirst2 += _Skip; + _UFirst1 += _Pos; + _UFirst2 += _Pos; + + _STD _Seek_wrapped(_First2, _UFirst2); + _STD _Seek_wrapped(_First1, _UFirst1); + return {_First1, _First2}; } } #endif // ^^^ !_USE_STD_VECTOR_ALGORITHMS ^^^ @@ -707,16 +708,17 @@ _NODISCARD _CONSTEXPR20 pair<_InIt1, _InIt2> mismatch( #if _USE_STD_VECTOR_ALGORITHMS if constexpr (_Equal_memcmp_is_safe) { if (!_STD _Is_constant_evaluated()) { - using _Ty = _Iter_value_t<_InIt1>; - const auto _First1_ptr = _STD _To_address(_UFirst1); - const auto _First2_ptr = _STD _To_address(_UFirst2); - const auto _Count_el = static_cast(_Count); + constexpr size_t _Elem_size = sizeof(_Iter_value_t<_InIt1>); + + const auto _Pos = static_cast(_STD __std_mismatch<_Elem_size>( + _STD _To_address(_UFirst1), _STD _To_address(_UFirst2), static_cast(_Count))); - const auto _Skip = static_cast( - ::__std_mismatch_byte_helper(_First1_ptr, _First2_ptr, _Count_el * sizeof(_Ty)) / sizeof(_Ty)); + _UFirst1 += _Pos; + _UFirst2 += _Pos; - _UFirst1 += _Skip; - _UFirst2 += _Skip; + _STD _Seek_wrapped(_First2, _UFirst2); + _STD _Seek_wrapped(_First1, _UFirst1); + return {_First1, _First2}; } } #endif // ^^^ !_USE_STD_VECTOR_ALGORITHMS ^^^ diff --git a/stl/inc/xutility b/stl/inc/xutility index 6ffd25b12d..2ea3ce1344 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -125,12 +125,10 @@ __declspec(noalias) uint64_t __stdcall __std_max_8u(const void* _First, const vo __declspec(noalias) float __stdcall __std_max_f(const void* _First, const void* _Last) noexcept; __declspec(noalias) double __stdcall __std_max_d(const void* _First, const void* _Last) noexcept; -// Returns the position to which 'mismatch' can fast forward. -// This position can be the first mismatched byte, or an earlier position. -// The purpose is to handle only the portion that can be vectorized in a more efficient way, -// than element wise comparison, without element size knowledge. -__declspec(noalias) size_t - __stdcall __std_mismatch_byte_helper(const void* _First1, const void* _First2, size_t _Count_bytes); +__declspec(noalias) size_t __stdcall __std_mismatch_1(const void* _First1, const void* _First2, size_t _Count) noexcept; +__declspec(noalias) size_t __stdcall __std_mismatch_2(const void* _First1, const void* _First2, size_t _Count) noexcept; +__declspec(noalias) size_t __stdcall __std_mismatch_4(const void* _First1, const void* _First2, size_t _Count) noexcept; +__declspec(noalias) size_t __stdcall __std_mismatch_8(const void* _First1, const void* _First2, size_t _Count) noexcept; } // extern "C" _STD_BEGIN @@ -299,6 +297,21 @@ auto __std_max(_Ty* const _First, _Ty* const _Last) noexcept { static_assert(_Always_false<_Ty>, "Unexpected size"); } } + +template +size_t __std_mismatch(const void* const _First1, const void* const _First2, const size_t _Count) noexcept { + if constexpr (_Element_size == 1) { + return __std_mismatch_1(_First1, _First2, _Count); + } else if constexpr (_Element_size == 2) { + return __std_mismatch_2(_First1, _First2, _Count); + } else if constexpr (_Element_size == 4) { + return __std_mismatch_4(_First1, _First2, _Count); + } else if constexpr (_Element_size == 8) { + return __std_mismatch_8(_First1, _First2, _Count); + } else { + static_assert(_Always_false>, "Unexpected size"); + } +} _STD_END #endif // _USE_STD_VECTOR_ALGORITHMS @@ -5488,17 +5501,12 @@ namespace ranges { if constexpr (_Equal_memcmp_is_safe<_It1, _It2, _Pr> && is_same_v<_Pj1, identity> && is_same_v<_Pj2, identity>) { if (!_STD is_constant_evaluated()) { - using _Ty = iter_value_t<_It1>; - const auto _First1_ptr = _STD _To_address(_First1); - const auto _First2_ptr = _STD _To_address(_First2); - const auto _Count_el = static_cast(_Count); + constexpr size_t _Elem_size = sizeof(iter_value_t<_It1>); - const auto _Skip = static_cast( - ::__std_mismatch_byte_helper(_First1_ptr, _First2_ptr, _Count_el * sizeof(_Ty)) / sizeof(_Ty)); + const auto _Pos = static_cast(_STD __std_mismatch<_Elem_size>( + _STD _To_address(_First1), _STD _To_address(_First2), static_cast(_Count))); - _First1 += _Skip; - _First2 += _Skip; - _Count -= _Skip; + return {_STD move(_First1 + _Pos), _STD move(_First2 + _Pos)}; } } #endif // ^^^ !_USE_STD_VECTOR_ALGORITHMS ^^^ diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 9ff2b900a9..fc7425f3da 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -2027,6 +2027,78 @@ namespace { } return _Result; } + + template + __declspec(noalias) size_t + __stdcall _Mismatch(const void* const _First1, const void* const _First2, const size_t _Count) noexcept { + size_t _Result = 0; +#ifndef _M_ARM64EC + const auto _First1_ch = static_cast(_First1); + const auto _First2_ch = static_cast(_First2); + + if (_Use_avx2()) { + const size_t _Count_bytes = _Count * sizeof(_Ty); + const size_t _Count_bytes_avx_full = _Count_bytes & ~size_t{0x1F}; + + for (; _Result != _Count_bytes_avx_full; _Result += 0x20) { + const __m256i _Elem1 = _mm256_loadu_si256(reinterpret_cast(_First1_ch + _Result)); + const __m256i _Elem2 = _mm256_loadu_si256(reinterpret_cast(_First2_ch + _Result)); + const auto _Bingo = ~static_cast(_mm256_movemask_epi8(_Traits::_Cmp_avx(_Elem1, _Elem2))); + if (_Bingo != 0) { + return (_Result + _tzcnt_u32(_Bingo)) / sizeof(_Ty); + } + } + + size_t _Count_tail = _Count_bytes & size_t{0x1C}; + + if (_Count_tail != 0) { + const __m256i _Tail_mask = _Avx2_tail_mask_32(_Count_tail >> 2); + const __m256i _Elem1 = + _mm256_maskload_epi32(reinterpret_cast(_First1_ch + _Result), _Tail_mask); + const __m256i _Elem2 = + _mm256_maskload_epi32(reinterpret_cast(_First2_ch + _Result), _Tail_mask); + + const auto _Bingo = ~static_cast(_mm256_movemask_epi8(_Traits::_Cmp_avx(_Elem1, _Elem2))); + if (_Bingo != 0) { + return (_Result + _tzcnt_u32(_Bingo)) / sizeof(_Ty); + } + + _Result += _Count_tail; + } + + _Result /= sizeof(_Ty); + + if constexpr (sizeof(_Ty) >= 4) { + return _Result; + } + } else if (_Use_sse2()) { + const size_t _Count_bytes_sse = (_Count * sizeof(_Ty)) & ~size_t{0xF}; + + for (; _Result != _Count_bytes_sse; _Result += 0x10) { + const __m128i _Elem1 = _mm_loadu_si128(reinterpret_cast(_First1_ch + _Result)); + const __m128i _Elem2 = _mm_loadu_si128(reinterpret_cast(_First2_ch + _Result)); + const auto _Bingo = static_cast(_mm_movemask_epi8(_Traits::_Cmp_sse(_Elem1, _Elem2))) ^ 0xFFFF; + if (_Bingo != 0) { + unsigned long _Offset; + _BitScanForward(&_Offset, _Bingo); // lgtm [cpp/conditionallyuninitializedvariable] + return (_Result + _Offset) / sizeof(_Ty); + } + } + + _Result /= sizeof(_Ty); + } +#endif // !defined(_M_ARM64EC) + const auto _First1_el = static_cast(_First1); + const auto _First2_el = static_cast(_First2); + + for (; _Result != _Count; ++_Result) { + if (_First1_el[_Result] != _First2_el[_Result]) { + break; + } + } + + return _Result; + } } // unnamed namespace extern "C" { @@ -2112,6 +2184,26 @@ __declspec(noalias) size_t return __std_count_trivial_impl<_Find_traits_8>(_First, _Last, _Val); } +__declspec(noalias) size_t + __stdcall __std_mismatch_1(const void* const _First1, const void* const _First2, const size_t _Count) noexcept { + return _Mismatch<_Find_traits_1, uint8_t>(_First1, _First2, _Count); +} + +__declspec(noalias) size_t + __stdcall __std_mismatch_2(const void* const _First1, const void* const _First2, const size_t _Count) noexcept { + return _Mismatch<_Find_traits_2, uint16_t>(_First1, _First2, _Count); +} + +__declspec(noalias) size_t + __stdcall __std_mismatch_4(const void* const _First1, const void* const _First2, const size_t _Count) noexcept { + return _Mismatch<_Find_traits_4, uint32_t>(_First1, _First2, _Count); +} + +__declspec(noalias) size_t + __stdcall __std_mismatch_8(const void* const _First1, const void* const _First2, const size_t _Count) noexcept { + return _Mismatch<_Find_traits_8, uint64_t>(_First1, _First2, _Count); +} + } // extern "C" #ifndef _M_ARM64EC @@ -2317,63 +2409,5 @@ __declspec(noalias) void __stdcall __std_bitset_to_string_2( } } -__declspec(noalias) size_t __stdcall __std_mismatch_byte_helper( - const void* const _First1, const void* const _First2, const size_t _Count_bytes) { -#ifndef _M_ARM64EC - const auto _First1_ch = static_cast(_First1); - const auto _First2_ch = static_cast(_First2); - - if (_Use_avx2()) { - const size_t _Count_bytes_avx_full = _Count_bytes & ~size_t{0x1F}; - - size_t _Result = 0; - for (; _Result != _Count_bytes_avx_full; _Result += 0x20) { - const __m256i _Elem1 = _mm256_loadu_si256(reinterpret_cast(_First1_ch + _Result)); - const __m256i _Elem2 = _mm256_loadu_si256(reinterpret_cast(_First2_ch + _Result)); - const auto _Bingo = ~static_cast(_mm256_movemask_epi8(_mm256_cmpeq_epi8(_Elem1, _Elem2))); - if (_Bingo != 0) { - return _Result + _tzcnt_u32(_Bingo); - } - } - - size_t _Count_tail = _Count_bytes & size_t{0x1C}; - - if (_Count_tail == 0) { - return _Result; - } - - const __m256i _Tail_mask = _Avx2_tail_mask_32(_Count_tail >> 2); - const __m256i _Elem1 = _mm256_maskload_epi32(reinterpret_cast(_First1_ch + _Result), _Tail_mask); - const __m256i _Elem2 = _mm256_maskload_epi32(reinterpret_cast(_First2_ch + _Result), _Tail_mask); - - const auto _Bingo = ~static_cast(_mm256_movemask_epi8(_mm256_cmpeq_epi8(_Elem1, _Elem2))); - if (_Bingo != 0) { - return _Result + _tzcnt_u32(_Bingo); - } - - return _Result + _Count_tail; - } else if (_Use_sse2()) { - const size_t _Count_bytes_sse = _Count_bytes & ~size_t{0xF}; - - size_t _Result = 0; - for (; _Result != _Count_bytes_sse; _Result += 0x10) { - const __m128i _Elem1 = _mm_loadu_si128(reinterpret_cast(_First1_ch + _Result)); - const __m128i _Elem2 = _mm_loadu_si128(reinterpret_cast(_First2_ch + _Result)); - const auto _Bingo = ~static_cast(_mm_movemask_epi8(_mm_cmpeq_epi8(_Elem1, _Elem2))); - if (_Bingo != 0) { - unsigned long _Offset; - _BitScanForward(&_Offset, _Bingo); // lgtm [cpp/conditionallyuninitializedvariable] - return _Result + _Offset; - } - } - - return _Result; - } else -#endif // !defined(_M_ARM64EC) - { - return 0; - } -} - } // extern "C" #endif // defined(_M_IX86) || defined(_M_X64) From 4fd7b432b34f3674165b5f0836fcd40cc823835a Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Thu, 21 Mar 2024 14:45:27 +0200 Subject: [PATCH 09/36] format --- stl/src/vector_algorithms.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index fc7425f3da..ef35a3a412 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -2077,7 +2077,8 @@ namespace { for (; _Result != _Count_bytes_sse; _Result += 0x10) { const __m128i _Elem1 = _mm_loadu_si128(reinterpret_cast(_First1_ch + _Result)); const __m128i _Elem2 = _mm_loadu_si128(reinterpret_cast(_First2_ch + _Result)); - const auto _Bingo = static_cast(_mm_movemask_epi8(_Traits::_Cmp_sse(_Elem1, _Elem2))) ^ 0xFFFF; + const auto _Bingo = + static_cast(_mm_movemask_epi8(_Traits::_Cmp_sse(_Elem1, _Elem2))) ^ 0xFFFF; if (_Bingo != 0) { unsigned long _Offset; _BitScanForward(&_Offset, _Bingo); // lgtm [cpp/conditionallyuninitializedvariable] From fef5885ac4b8ba77d2da560206ffca0219d97cde Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Thu, 21 Mar 2024 16:17:55 +0200 Subject: [PATCH 10/36] inline, see #4496 --- stl/inc/xutility | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index 2ea3ce1344..d39315e0ea 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -299,7 +299,7 @@ auto __std_max(_Ty* const _First, _Ty* const _Last) noexcept { } template -size_t __std_mismatch(const void* const _First1, const void* const _First2, const size_t _Count) noexcept { +size_t inline __std_mismatch(const void* const _First1, const void* const _First2, const size_t _Count) noexcept { if constexpr (_Element_size == 1) { return __std_mismatch_1(_First1, _First2, _Count); } else if constexpr (_Element_size == 2) { From 2bd29b0dbaea018caff81ecb2d39910ad88dab70 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Thu, 21 Mar 2024 16:24:11 +0200 Subject: [PATCH 11/36] transition --- stl/inc/xutility | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index d39315e0ea..0dfe7858ff 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -299,7 +299,9 @@ auto __std_max(_Ty* const _First, _Ty* const _Last) noexcept { } template -size_t inline __std_mismatch(const void* const _First1, const void* const _First2, const size_t _Count) noexcept { + inline // TRANSITION, GH-4496 + size_t + __std_mismatch(const void* const _First1, const void* const _First2, const size_t _Count) noexcept { if constexpr (_Element_size == 1) { return __std_mismatch_1(_First1, _First2, _Count); } else if constexpr (_Element_size == 2) { From 5b1dc1b7b0df38d86b8bccc79ac5a7f37619c28e Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Thu, 21 Mar 2024 16:27:22 +0200 Subject: [PATCH 12/36] format --- stl/inc/xutility | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index 0dfe7858ff..54f07a4917 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -299,7 +299,7 @@ auto __std_max(_Ty* const _First, _Ty* const _Last) noexcept { } template - inline // TRANSITION, GH-4496 +inline // TRANSITION, GH-4496 size_t __std_mismatch(const void* const _First1, const void* const _First2, const size_t _Count) noexcept { if constexpr (_Element_size == 1) { From abc7e96e9de29d0c4779419a124390073d5f8a87 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Thu, 21 Mar 2024 16:29:02 +0200 Subject: [PATCH 13/36] better format --- stl/inc/xutility | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index 54f07a4917..996247cc94 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -299,8 +299,7 @@ auto __std_max(_Ty* const _First, _Ty* const _Last) noexcept { } template -inline // TRANSITION, GH-4496 - size_t +size_t inline // TRANSITION, GH-4496 __std_mismatch(const void* const _First1, const void* const _First2, const size_t _Count) noexcept { if constexpr (_Element_size == 1) { return __std_mismatch_1(_First1, _First2, _Count); From 9c92384a41b89307cc797e990a7bd739027c4f12 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Thu, 21 Mar 2024 16:09:42 -0700 Subject: [PATCH 14/36] `_Mismatch` => `__std_mismatch_impl` for consistency. GH 4146 originally started this convention. --- stl/src/vector_algorithms.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 7a72b3659a..5a21e389d7 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -2097,8 +2097,8 @@ namespace { template - __declspec(noalias) size_t - __stdcall _Mismatch(const void* const _First1, const void* const _First2, const size_t _Count) noexcept { + __declspec(noalias) size_t __stdcall __std_mismatch_impl( + const void* const _First1, const void* const _First2, const size_t _Count) noexcept { size_t _Result = 0; #ifndef _M_ARM64EC const auto _First1_ch = static_cast(_First1); @@ -2265,22 +2265,22 @@ const void* __stdcall __std_find_first_of_trivial_2( __declspec(noalias) size_t __stdcall __std_mismatch_1(const void* const _First1, const void* const _First2, const size_t _Count) noexcept { - return _Mismatch<_Find_traits_1, uint8_t>(_First1, _First2, _Count); + return __std_mismatch_impl<_Find_traits_1, uint8_t>(_First1, _First2, _Count); } __declspec(noalias) size_t __stdcall __std_mismatch_2(const void* const _First1, const void* const _First2, const size_t _Count) noexcept { - return _Mismatch<_Find_traits_2, uint16_t>(_First1, _First2, _Count); + return __std_mismatch_impl<_Find_traits_2, uint16_t>(_First1, _First2, _Count); } __declspec(noalias) size_t __stdcall __std_mismatch_4(const void* const _First1, const void* const _First2, const size_t _Count) noexcept { - return _Mismatch<_Find_traits_4, uint32_t>(_First1, _First2, _Count); + return __std_mismatch_impl<_Find_traits_4, uint32_t>(_First1, _First2, _Count); } __declspec(noalias) size_t __stdcall __std_mismatch_8(const void* const _First1, const void* const _First2, const size_t _Count) noexcept { - return _Mismatch<_Find_traits_8, uint64_t>(_First1, _First2, _Count); + return __std_mismatch_impl<_Find_traits_8, uint64_t>(_First1, _First2, _Count); } } // extern "C" From 57d8aee200ac010e9386d48bf79f2ebde8ff3836 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sat, 23 Mar 2024 23:51:40 +0200 Subject: [PATCH 15/36] ASan provocation! --- .../VSO_0000000_vector_algorithms/test.cpp | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index a52a8911f7..3b9d6c212f 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -441,6 +441,79 @@ void test_mismatch(mt19937_64& gen) { } } +namespace test_mismatch_sizes_and_alignments { + constexpr size_t range = 33; + constexpr size_t alignment = 32; + +#pragma pack(push, 1) + template + struct with_pad { + char p[PadSize]; + T v[Size]; + }; +#pragma pack(pop) + + template + char stack_array_various_alignments_impl() { + with_pad a = {}; + with_pad b = {}; + assert(mismatch(begin(a.v), end(a.v), begin(b.v), end(b.v)) == make_pair(end(a.v), end(b.v))); + return 0; + } + + template + void stack_array_various_alignments(index_sequence) { + char ignored[] = {stack_array_various_alignments_impl()...}; + (void) ignored; + } + + template + char stack_array_impl() { + T a[Size + 1] = {}; + T b[Size + 1] = {}; + assert(mismatch(begin(a), end(a), begin(b), end(b)) == make_pair(end(a), end(b))); + stack_array_various_alignments(make_index_sequence{}); + return 0; + } + + template + void stack_array(index_sequence) { + char ignored[] = {stack_array_impl()...}; + (void) ignored; + } + + template + void test() { + // stack with different sizes and alignments. ASan would catch out-of-range reads + stack_array(make_index_sequence{}); + + // vector with different sizes. ASan vector annotations would catch out-of-range reads + for (size_t i = 0; i != range; ++i) { + std::vector a(i, 0); + std::vector b(i, 0); + assert(mismatch(begin(a), end(a), begin(b), end(b)) == make_pair(end(a), end(b))); + } + + // heap with different sizes. ASan would catch out-of-range reads + for (size_t i = 0; i != range; ++i) { + T* a = static_cast(calloc(i, sizeof(T))); + T* b = static_cast(calloc(i, sizeof(T))); + assert(mismatch(a, a+i, b, b+i) == make_pair(a+i, b+i)); + free(a); + free(b); + } + + // subarray from stack array. We would have wrong result if run out of the range (whole range plus one) + T a[range + 1] = {}; + T b[range + 1] = {}; + for (size_t i = 0; i != range; ++i) { + a[i + 1] = 1; + assert(mismatch(a, a + i, b, b + i) == make_pair(a + i, b + i)); + a[i + 1] = 0; + } + } +} // namespace test_mismatchsizes_and_alignmnets + template void test_mismatch_containers() { C1 a{'m', 'e', 'o', 'w', ' ', 'C', 'A', 'T', 'S'}; @@ -645,6 +718,11 @@ void test_vector_algorithms(mt19937_64& gen) { test_mismatch_containers, vector>(); test_mismatch_containers, vector>(); + test_mismatch_sizes_and_alignments::test(); + test_mismatch_sizes_and_alignments::test(); + test_mismatch_sizes_and_alignments::test(); + test_mismatch_sizes_and_alignments::test(); + test_reverse(gen); test_reverse(gen); test_reverse(gen); From d253100db9df75aeee3d035f8d9606eadb273d91 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 24 Mar 2024 00:02:44 +0200 Subject: [PATCH 16/36] format --- tests/std/tests/VSO_0000000_vector_algorithms/test.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 3b9d6c212f..ce10eb9711 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -453,7 +453,7 @@ namespace test_mismatch_sizes_and_alignments { }; #pragma pack(pop) - template + template char stack_array_various_alignments_impl() { with_pad a = {}; with_pad b = {}; @@ -498,7 +498,7 @@ namespace test_mismatch_sizes_and_alignments { for (size_t i = 0; i != range; ++i) { T* a = static_cast(calloc(i, sizeof(T))); T* b = static_cast(calloc(i, sizeof(T))); - assert(mismatch(a, a+i, b, b+i) == make_pair(a+i, b+i)); + assert(mismatch(a, a + i, b, b + i) == make_pair(a + i, b + i)); free(a); free(b); } @@ -512,7 +512,7 @@ namespace test_mismatch_sizes_and_alignments { a[i + 1] = 0; } } -} // namespace test_mismatchsizes_and_alignmnets +} // namespace test_mismatch_sizes_and_alignments template void test_mismatch_containers() { From fac47961b7b3057faaee224ee3732250928cbc32 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 24 Mar 2024 06:51:54 +0200 Subject: [PATCH 17/36] two --- tests/std/tests/VSO_0000000_vector_algorithms/test.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index ce10eb9711..6bf7993353 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -503,13 +503,13 @@ namespace test_mismatch_sizes_and_alignments { free(b); } - // subarray from stack array. We would have wrong result if run out of the range (whole range plus one) - T a[range + 1] = {}; - T b[range + 1] = {}; + // subarray from stack array. We would have wrong result if run out of the range (whole range plus ontwoe) + T a[range + 2] = {}; + T b[range + 2] = {}; for (size_t i = 0; i != range; ++i) { - a[i + 1] = 1; + a[i + 2] = 1; assert(mismatch(a, a + i, b, b + i) == make_pair(a + i, b + i)); - a[i + 1] = 0; + a[i + 2] = 0; } } } // namespace test_mismatch_sizes_and_alignments From 1e6b258569bb8e75d4b6a931fe39f3fcf932151e Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 24 Mar 2024 07:48:30 +0200 Subject: [PATCH 18/36] really different --- tests/std/tests/VSO_0000000_vector_algorithms/test.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 6bf7993353..5d1bfb6aa9 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -418,6 +418,7 @@ void test_mismatch(mt19937_64& gen) { for (size_t i = 0; i != shrinkCount && !input_b.empty(); ++i) { test_case_mismatch(input_a, input_b); test_case_mismatch(input_b, input_a); + input_b.pop_back(); } // actual mismatch (or maybe not, depending on random) From a0b05c8a2facd14bcc31dff4b2ed6998431c8fa0 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 24 Mar 2024 07:48:52 +0200 Subject: [PATCH 19/36] expand comment on overrun --- tests/std/tests/VSO_0000000_vector_algorithms/test.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 5d1bfb6aa9..4bc89c3af6 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -504,7 +504,8 @@ namespace test_mismatch_sizes_and_alignments { free(b); } - // subarray from stack array. We would have wrong result if run out of the range (whole range plus ontwoe) + // subarray from stack array. We would have wrong result if run out of the range. + // The mismatch is at whole range plus two, and past the end is whole range plus one. T a[range + 2] = {}; T b[range + 2] = {}; for (size_t i = 0; i != range; ++i) { From 5253b7131d520945eb980a82ff6fa635b1639a3f Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 24 Mar 2024 08:26:20 +0200 Subject: [PATCH 20/36] off by one also lets test test instead of explaining it --- .../std/tests/VSO_0000000_vector_algorithms/test.cpp | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 4bc89c3af6..6996de38b1 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -505,13 +505,15 @@ namespace test_mismatch_sizes_and_alignments { } // subarray from stack array. We would have wrong result if run out of the range. - // The mismatch is at whole range plus two, and past the end is whole range plus one. - T a[range + 2] = {}; - T b[range + 2] = {}; + T a[range + 1] = {}; + T b[range + 1] = {}; for (size_t i = 0; i != range; ++i) { - a[i + 2] = 1; + a[i + 1] = 1; + // whole range mistmatch finds mismatch after past-the-end + assert(mismatch(a, a + i + 1, b, b + i + 1) == make_pair(a + i + 1, b + i + 1)); + // limited range mismatch gets to past-the-end assert(mismatch(a, a + i, b, b + i) == make_pair(a + i, b + i)); - a[i + 2] = 0; + a[i + 1] = 0; } } } // namespace test_mismatch_sizes_and_alignments From b9e80654742a8f93a6ff15718ce6a119ec19342a Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Sun, 24 Mar 2024 13:30:15 +0200 Subject: [PATCH 21/36] Whole range --- tests/std/tests/VSO_0000000_vector_algorithms/test.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 6996de38b1..449ad1bc1c 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -510,7 +510,7 @@ namespace test_mismatch_sizes_and_alignments { for (size_t i = 0; i != range; ++i) { a[i + 1] = 1; // whole range mistmatch finds mismatch after past-the-end - assert(mismatch(a, a + i + 1, b, b + i + 1) == make_pair(a + i + 1, b + i + 1)); + assert(mismatch(a, a + range + 1, b, b + range + 1) == make_pair(a + i + 1, b + i + 1)); // limited range mismatch gets to past-the-end assert(mismatch(a, a + i, b, b + i) == make_pair(a + i, b + i)); a[i + 1] = 0; From 3b01b6342d46a92c2464a8f7ffc69ef2be38ffce Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 27 Mar 2024 11:02:52 -0700 Subject: [PATCH 22/36] Fix preprocessor comments. --- stl/inc/algorithm | 4 ++-- stl/inc/xutility | 2 +- tests/std/tests/VSO_0000000_vector_algorithms/test.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index b14b288ff6..0c111526ef 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -685,7 +685,7 @@ _NODISCARD _CONSTEXPR20 pair<_InIt1, _InIt2> mismatch(_InIt1 _First1, const _InI return {_First1, _First2}; } } -#endif // ^^^ !_USE_STD_VECTOR_ALGORITHMS ^^^ +#endif // ^^^ _USE_STD_VECTOR_ALGORITHMS ^^^ while (_UFirst1 != _ULast1 && _Pred(*_UFirst1, *_UFirst2)) { ++_UFirst1; ++_UFirst2; @@ -749,7 +749,7 @@ _NODISCARD _CONSTEXPR20 pair<_InIt1, _InIt2> mismatch( return {_First1, _First2}; } } -#endif // ^^^ !_USE_STD_VECTOR_ALGORITHMS ^^^ +#endif // ^^^ _USE_STD_VECTOR_ALGORITHMS ^^^ while (_UFirst1 != _ULast1 && _Pred(*_UFirst1, *_UFirst2)) { ++_UFirst1; ++_UFirst2; diff --git a/stl/inc/xutility b/stl/inc/xutility index 996247cc94..02e2eac78d 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -5510,7 +5510,7 @@ namespace ranges { return {_STD move(_First1 + _Pos), _STD move(_First2 + _Pos)}; } } -#endif // ^^^ !_USE_STD_VECTOR_ALGORITHMS ^^^ +#endif // ^^^ _USE_STD_VECTOR_ALGORITHMS ^^^ for (; _Count != 0; ++_First1, (void) ++_First2, --_Count) { if (!_STD invoke(_Pred, _STD invoke(_Proj1, *_First1), _STD invoke(_Proj2, *_First2))) { break; diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 449ad1bc1c..887a202626 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -532,7 +532,7 @@ void test_mismatch_containers() { const auto result_r = ranges::mismatch(a, b); assert(result_r.in1 == a.begin() + 5); assert(result_r.in2 == b.begin() + 5); -#endif // _HAS_CXX_20 +#endif // _HAS_CXX20 } template From 4771515f489b6810e51c0fb6b089f3c8b7f55d7b Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 27 Mar 2024 11:03:44 -0700 Subject: [PATCH 23/36] Style: `inline` before return type. --- stl/inc/xutility | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index 02e2eac78d..7e21368100 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -299,7 +299,7 @@ auto __std_max(_Ty* const _First, _Ty* const _Last) noexcept { } template -size_t inline // TRANSITION, GH-4496 +inline size_t // TRANSITION, GH-4496 __std_mismatch(const void* const _First1, const void* const _First2, const size_t _Count) noexcept { if constexpr (_Element_size == 1) { return __std_mismatch_1(_First1, _First2, _Count); From 69ccaae0daf0831e07ef5ae072b82493302ab8ff Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 27 Mar 2024 11:04:13 -0700 Subject: [PATCH 24/36] Drop unnecessary `_STD move()` calls. --- stl/inc/xutility | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/inc/xutility b/stl/inc/xutility index 7e21368100..7d5b817c96 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -5507,7 +5507,7 @@ namespace ranges { const auto _Pos = static_cast(_STD __std_mismatch<_Elem_size>( _STD _To_address(_First1), _STD _To_address(_First2), static_cast(_Count))); - return {_STD move(_First1 + _Pos), _STD move(_First2 + _Pos)}; + return {_First1 + _Pos, _First2 + _Pos}; } } #endif // ^^^ _USE_STD_VECTOR_ALGORITHMS ^^^ From 929fcfc5745ab0206dfd55bb2f5d9446b7a84a8d Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 27 Mar 2024 11:22:46 -0700 Subject: [PATCH 25/36] Store `size_t _Pos`, cast to each difference type. --- stl/inc/algorithm | 16 ++++++++-------- stl/inc/xutility | 7 ++++--- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 0c111526ef..ff1f8b65e3 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -674,11 +674,11 @@ _NODISCARD _CONSTEXPR20 pair<_InIt1, _InIt2> mismatch(_InIt1 _First1, const _InI if (!_STD _Is_constant_evaluated()) { constexpr size_t _Elem_size = sizeof(_Iter_value_t<_InIt1>); - const auto _Pos = static_cast(_STD __std_mismatch<_Elem_size>( - _STD _To_address(_UFirst1), _STD _To_address(_UFirst2), static_cast(_ULast1 - _UFirst1))); + const size_t _Pos = _STD __std_mismatch<_Elem_size>( + _STD _To_address(_UFirst1), _STD _To_address(_UFirst2), static_cast(_ULast1 - _UFirst1)); - _UFirst1 += _Pos; - _UFirst2 += _Pos; + _UFirst1 += static_cast<_Iter_diff_t<_InIt1>>(_Pos); + _UFirst2 += static_cast<_Iter_diff_t<_InIt2>>(_Pos); _STD _Seek_wrapped(_First2, _UFirst2); _STD _Seek_wrapped(_First1, _UFirst1); @@ -738,11 +738,11 @@ _NODISCARD _CONSTEXPR20 pair<_InIt1, _InIt2> mismatch( if (!_STD _Is_constant_evaluated()) { constexpr size_t _Elem_size = sizeof(_Iter_value_t<_InIt1>); - const auto _Pos = static_cast(_STD __std_mismatch<_Elem_size>( - _STD _To_address(_UFirst1), _STD _To_address(_UFirst2), static_cast(_Count))); + const size_t _Pos = _STD __std_mismatch<_Elem_size>( + _STD _To_address(_UFirst1), _STD _To_address(_UFirst2), static_cast(_Count)); - _UFirst1 += _Pos; - _UFirst2 += _Pos; + _UFirst1 += static_cast<_Iter_diff_t<_InIt1>>(_Pos); + _UFirst2 += static_cast<_Iter_diff_t<_InIt2>>(_Pos); _STD _Seek_wrapped(_First2, _UFirst2); _STD _Seek_wrapped(_First1, _UFirst1); diff --git a/stl/inc/xutility b/stl/inc/xutility index 7d5b817c96..f2e0482894 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -5504,10 +5504,11 @@ namespace ranges { if (!_STD is_constant_evaluated()) { constexpr size_t _Elem_size = sizeof(iter_value_t<_It1>); - const auto _Pos = static_cast(_STD __std_mismatch<_Elem_size>( - _STD _To_address(_First1), _STD _To_address(_First2), static_cast(_Count))); + const size_t _Pos = _STD __std_mismatch<_Elem_size>( + _STD _To_address(_First1), _STD _To_address(_First2), static_cast(_Count)); - return {_First1 + _Pos, _First2 + _Pos}; + return {_First1 + static_cast>(_Pos), + _First2 + static_cast>(_Pos)}; } } #endif // ^^^ _USE_STD_VECTOR_ALGORITHMS ^^^ From bdd374ecd8b023e5cd6aadf96390d9256e8da86f Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 27 Mar 2024 11:53:34 -0700 Subject: [PATCH 26/36] Bugfix: Guard with `_Traits::_Sse_available()`. --- stl/src/vector_algorithms.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 5a21e389d7..33c5e11cf9 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -2139,7 +2139,7 @@ namespace { if constexpr (sizeof(_Ty) >= 4) { return _Result; } - } else if (_Use_sse2()) { + } else if (_Traits::_Sse_available()) { const size_t _Count_bytes_sse = (_Count * sizeof(_Ty)) & ~size_t{0xF}; for (; _Result != _Count_bytes_sse; _Result += 0x10) { From 1284aa589203e51459226e7930776ff35a55a653 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 27 Mar 2024 11:59:53 -0700 Subject: [PATCH 27/36] Style: `alignas` before `static constexpr`. --- stl/src/vector_algorithms.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 33c5e11cf9..869a532d14 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -93,7 +93,7 @@ namespace { constexpr unsigned int _Dx = 0; // All zeros 32 bit mask constexpr unsigned int _Ex = ~_Dx; // All ones 32 bit mask // clang-format off - static alignas(32) constexpr unsigned int _Tail_masks[8][8] = { + alignas(32) static constexpr unsigned int _Tail_masks[8][8] = { {_Dx, _Dx, _Dx, _Dx, _Dx, _Dx, _Dx, _Dx}, {_Ex, _Dx, _Dx, _Dx, _Dx, _Dx, _Dx, _Dx}, {_Ex, _Ex, _Dx, _Dx, _Dx, _Dx, _Dx, _Dx}, From b0fbdca3a1233553a9fa0ae78e88a6a9af0b306b Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 27 Mar 2024 12:02:06 -0700 Subject: [PATCH 28/36] Add an empty line, then clang-format suppression can be removed. --- stl/src/vector_algorithms.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 869a532d14..b68d31b294 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -92,7 +92,7 @@ namespace { __m256i _Avx2_tail_mask_32(const size_t _Count_in_dwords) { constexpr unsigned int _Dx = 0; // All zeros 32 bit mask constexpr unsigned int _Ex = ~_Dx; // All ones 32 bit mask - // clang-format off + alignas(32) static constexpr unsigned int _Tail_masks[8][8] = { {_Dx, _Dx, _Dx, _Dx, _Dx, _Dx, _Dx, _Dx}, {_Ex, _Dx, _Dx, _Dx, _Dx, _Dx, _Dx, _Dx}, @@ -103,7 +103,6 @@ namespace { {_Ex, _Ex, _Ex, _Ex, _Ex, _Ex, _Dx, _Dx}, {_Ex, _Ex, _Ex, _Ex, _Ex, _Ex, _Ex, _Dx}, }; - // clang-format on return _mm256_load_si256(reinterpret_cast(_Tail_masks[_Count_in_dwords])); } } // unnamed namespace From afa152d92cd2c5e364d9b224834f0a21adefbd82 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 27 Mar 2024 12:31:45 -0700 Subject: [PATCH 29/36] Save 200 bytes by using a sliding window. --- stl/src/vector_algorithms.cpp | 17 +++-------------- 1 file changed, 3 insertions(+), 14 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index b68d31b294..5fc2edcbb7 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -90,20 +90,9 @@ namespace { } __m256i _Avx2_tail_mask_32(const size_t _Count_in_dwords) { - constexpr unsigned int _Dx = 0; // All zeros 32 bit mask - constexpr unsigned int _Ex = ~_Dx; // All ones 32 bit mask - - alignas(32) static constexpr unsigned int _Tail_masks[8][8] = { - {_Dx, _Dx, _Dx, _Dx, _Dx, _Dx, _Dx, _Dx}, - {_Ex, _Dx, _Dx, _Dx, _Dx, _Dx, _Dx, _Dx}, - {_Ex, _Ex, _Dx, _Dx, _Dx, _Dx, _Dx, _Dx}, - {_Ex, _Ex, _Ex, _Dx, _Dx, _Dx, _Dx, _Dx}, - {_Ex, _Ex, _Ex, _Ex, _Dx, _Dx, _Dx, _Dx}, - {_Ex, _Ex, _Ex, _Ex, _Ex, _Dx, _Dx, _Dx}, - {_Ex, _Ex, _Ex, _Ex, _Ex, _Ex, _Dx, _Dx}, - {_Ex, _Ex, _Ex, _Ex, _Ex, _Ex, _Ex, _Dx}, - }; - return _mm256_load_si256(reinterpret_cast(_Tail_masks[_Count_in_dwords])); + // _Count_in_dwords must be within [1, 7]. + static constexpr unsigned int _Tail_masks[14] = {~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, 0, 0, 0, 0, 0, 0, 0}; + return _mm256_loadu_si256(reinterpret_cast(_Tail_masks + (7 - _Count_in_dwords))); } } // unnamed namespace From d0a7ce125404b21a025b3dd05bd0ef6ee3b06176 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 27 Mar 2024 13:04:07 -0700 Subject: [PATCH 30/36] Mark `_Avx2_tail_mask_32` as `noexcept`. --- stl/src/vector_algorithms.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 5fc2edcbb7..faf8da6c04 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -89,7 +89,7 @@ namespace { _Target = static_cast(_Target) + _Offset; } - __m256i _Avx2_tail_mask_32(const size_t _Count_in_dwords) { + __m256i _Avx2_tail_mask_32(const size_t _Count_in_dwords) noexcept { // _Count_in_dwords must be within [1, 7]. static constexpr unsigned int _Tail_masks[14] = {~0u, ~0u, ~0u, ~0u, ~0u, ~0u, ~0u, 0, 0, 0, 0, 0, 0, 0}; return _mm256_loadu_si256(reinterpret_cast(_Tail_masks + (7 - _Count_in_dwords))); From 0d0e42357d19f6ca884fd196e7053e07473c5a5b Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 27 Mar 2024 13:30:54 -0700 Subject: [PATCH 31/36] Add `const`. --- stl/src/vector_algorithms.cpp | 2 +- tests/std/tests/VSO_0000000_vector_algorithms/test.cpp | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index faf8da6c04..a5a637695e 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -2105,7 +2105,7 @@ namespace { } } - size_t _Count_tail = _Count_bytes & size_t{0x1C}; + const size_t _Count_tail = _Count_bytes & size_t{0x1C}; if (_Count_tail != 0) { const __m256i _Tail_mask = _Avx2_tail_mask_32(_Count_tail >> 2); diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 887a202626..b6e5e115b6 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -426,8 +426,8 @@ void test_mismatch(mt19937_64& gen) { uniform_int_distribution mismatch_dis(0, input_a.size() - 1); for (size_t attempts = 0; attempts < mismatchCount; ++attempts) { - size_t possible_mismatch_pos = mismatch_dis(gen); - input_a[possible_mismatch_pos] = static_cast(dis(gen)); + const size_t possible_mismatch_pos = mismatch_dis(gen); + input_a[possible_mismatch_pos] = static_cast(dis(gen)); test_case_mismatch(input_a, input_b); test_case_mismatch(input_b, input_a); } From 90e5f37dc1779e446f27dfbc3f3d6a78ccca595d Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 27 Mar 2024 13:31:16 -0700 Subject: [PATCH 32/36] Drop unnecessary `std::`. --- tests/std/tests/VSO_0000000_vector_algorithms/test.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index b6e5e115b6..a8126ecbec 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -384,7 +384,7 @@ auto last_known_good_mismatch(FwdIt first1, FwdIt last1, FwdIt first2, FwdIt las } } - return std::make_pair(first1, first2); + return make_pair(first1, first2); } template @@ -490,8 +490,8 @@ namespace test_mismatch_sizes_and_alignments { // vector with different sizes. ASan vector annotations would catch out-of-range reads for (size_t i = 0; i != range; ++i) { - std::vector a(i, 0); - std::vector b(i, 0); + vector a(i, 0); + vector b(i, 0); assert(mismatch(begin(a), end(a), begin(b), end(b)) == make_pair(end(a), end(b))); } From 82c651b575d6f9e9de5554eda62fddced2c41bc7 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 27 Mar 2024 13:36:05 -0700 Subject: [PATCH 33/36] Include more headers. --- tests/std/tests/VSO_0000000_vector_algorithms/test.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index a8126ecbec..111ef96669 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include From d69c3d3741df652c5499d9a89cde0c72edf5af44 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 27 Mar 2024 15:32:09 -0700 Subject: [PATCH 34/36] `PadsSizes` => `PadSizes` --- tests/std/tests/VSO_0000000_vector_algorithms/test.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 111ef96669..5c676ab05b 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -463,9 +463,9 @@ namespace test_mismatch_sizes_and_alignments { return 0; } - template - void stack_array_various_alignments(index_sequence) { - char ignored[] = {stack_array_various_alignments_impl()...}; + template + void stack_array_various_alignments(index_sequence) { + char ignored[] = {stack_array_various_alignments_impl()...}; (void) ignored; } From eab5917cbabf02fefc6378d05b9fb52b1a0b8d3e Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 27 Mar 2024 15:47:07 -0700 Subject: [PATCH 35/36] Comment cleanups. --- tests/std/tests/VSO_0000000_vector_algorithms/test.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 5c676ab05b..1e34a43e66 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -505,14 +505,14 @@ namespace test_mismatch_sizes_and_alignments { free(b); } - // subarray from stack array. We would have wrong result if run out of the range. + // subarray from stack array. We would have wrong results if we run out of the range. T a[range + 1] = {}; T b[range + 1] = {}; for (size_t i = 0; i != range; ++i) { a[i + 1] = 1; - // whole range mistmatch finds mismatch after past-the-end + // whole range mismatch finds mismatch after past-the-end of the subarray assert(mismatch(a, a + range + 1, b, b + range + 1) == make_pair(a + i + 1, b + i + 1)); - // limited range mismatch gets to past-the-end + // limited range mismatch gets to past-the-end of the subarray assert(mismatch(a, a + i, b, b + i) == make_pair(a + i, b + i)); a[i + 1] = 0; } From f52e2d9b67eeb7383b9648d92b3efd4662f8910a Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Wed, 27 Mar 2024 15:48:05 -0700 Subject: [PATCH 36/36] Move the definition of `test_mismatch_containers()`, no other changes. --- .../VSO_0000000_vector_algorithms/test.cpp | 34 +++++++++---------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 1e34a43e66..e263e59d62 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -443,6 +443,23 @@ void test_mismatch(mt19937_64& gen) { } } +template +void test_mismatch_containers() { + C1 a{'m', 'e', 'o', 'w', ' ', 'C', 'A', 'T', 'S'}; + C2 b{'m', 'e', 'o', 'w', ' ', 'K', 'I', 'T', 'T', 'E', 'N', 'S'}; + const auto result_4 = mismatch(a.begin(), a.end(), b.begin(), b.end()); + const auto result_3 = mismatch(a.begin(), a.end(), b.begin()); + assert(get<0>(result_4) == a.begin() + 5); + assert(get<1>(result_4) == b.begin() + 5); + assert(get<0>(result_3) == a.begin() + 5); + assert(get<1>(result_3) == b.begin() + 5); +#if _HAS_CXX20 + const auto result_r = ranges::mismatch(a, b); + assert(result_r.in1 == a.begin() + 5); + assert(result_r.in2 == b.begin() + 5); +#endif // _HAS_CXX20 +} + namespace test_mismatch_sizes_and_alignments { constexpr size_t range = 33; constexpr size_t alignment = 32; @@ -519,23 +536,6 @@ namespace test_mismatch_sizes_and_alignments { } } // namespace test_mismatch_sizes_and_alignments -template -void test_mismatch_containers() { - C1 a{'m', 'e', 'o', 'w', ' ', 'C', 'A', 'T', 'S'}; - C2 b{'m', 'e', 'o', 'w', ' ', 'K', 'I', 'T', 'T', 'E', 'N', 'S'}; - const auto result_4 = mismatch(a.begin(), a.end(), b.begin(), b.end()); - const auto result_3 = mismatch(a.begin(), a.end(), b.begin()); - assert(get<0>(result_4) == a.begin() + 5); - assert(get<1>(result_4) == b.begin() + 5); - assert(get<0>(result_3) == a.begin() + 5); - assert(get<1>(result_3) == b.begin() + 5); -#if _HAS_CXX20 - const auto result_r = ranges::mismatch(a, b); - assert(result_r.in1 == a.begin() + 5); - assert(result_r.in2 == b.begin() + 5); -#endif // _HAS_CXX20 -} - template void last_known_good_reverse(BidIt first, BidIt last) { for (; first != last && first != --last; ++first) {