diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 908b364001..178e3f6cb4 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -119,6 +119,7 @@ add_benchmark(random_integer_generation src/random_integer_generation.cpp) add_benchmark(remove src/remove.cpp) add_benchmark(replace src/replace.cpp) add_benchmark(reverse src/reverse.cpp) +add_benchmark(rotate src/rotate.cpp) add_benchmark(search src/search.cpp) add_benchmark(search_n src/search_n.cpp) add_benchmark(std_copy src/std_copy.cpp) diff --git a/benchmarks/src/rotate.cpp b/benchmarks/src/rotate.cpp new file mode 100644 index 0000000000..9ba7c4b898 --- /dev/null +++ b/benchmarks/src/rotate.cpp @@ -0,0 +1,58 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include + +#include "skewed_allocator.hpp" +#include "utility.hpp" + +using namespace std; + +enum class AlgType { Std, Rng }; + +template +void bm_rotate(benchmark::State& state) { + const auto size = static_cast(state.range(0)); + const auto n = static_cast(state.range(1)); + + auto v = random_vector(size); + benchmark::DoNotOptimize(v); + + for (auto _ : state) { + if constexpr (Alg == AlgType::Std) { + rotate(v.begin(), v.begin() + n, v.end()); + } else { + ranges::rotate(v, v.begin() + n); + } + benchmark::DoNotOptimize(v); + } +} + +void common_args(auto bm) { + bm->Args({3333, 2242})->Args({3332, 1666})->Args({3333, 1111})->Args({3333, 501}); + bm->Args({3333, 3300})->Args({3333, 12})->Args({3333, 5})->Args({3333, 1}); + bm->Args({333, 101})->Args({123, 32})->Args({23, 7})->Args({12, 5})->Args({3, 2}); +} + +struct color { + uint16_t h; + uint16_t s; + uint16_t l; +}; + +BENCHMARK(bm_rotate)->Apply(common_args); +BENCHMARK(bm_rotate)->Apply(common_args); +BENCHMARK(bm_rotate)->Apply(common_args); +BENCHMARK(bm_rotate)->Apply(common_args); +BENCHMARK(bm_rotate)->Apply(common_args); +BENCHMARK(bm_rotate)->Apply(common_args); +BENCHMARK(bm_rotate)->Apply(common_args); +BENCHMARK(bm_rotate)->Apply(common_args); + +BENCHMARK(bm_rotate)->Apply(common_args); +BENCHMARK(bm_rotate)->Apply(common_args); + +BENCHMARK_MAIN(); diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 6d7e76ed4a..32ac49a8a7 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -5768,6 +5768,19 @@ namespace ranges { } if constexpr (bidirectional_iterator<_It>) { +#if _USE_STD_VECTOR_ALGORITHMS + using _Elem = remove_reference_t>; + + if constexpr (contiguous_iterator<_It> && sized_sentinel_for<_Se, _It> + && conjunction_v<_Is_trivially_ranges_swappable<_Elem>, negation>>) { + if (!_STD is_constant_evaluated()) { + const _It _Last_it = _First + (_Last - _First); + ::__std_rotate(_STD to_address(_First), _STD to_address(_Mid), _STD to_address(_Last_it)); + return {_First + (_Last - _Mid), _Last}; + } + } +#endif // _USE_STD_VECTOR_ALGORITHMS + _RANGES _Reverse_common(_First, _Mid); auto _Final = _RANGES _Get_final_iterator_unwrapped<_It>(_Mid, _STD move(_Last)); _RANGES _Reverse_common(_Mid, _Final); diff --git a/stl/inc/xutility b/stl/inc/xutility index 6ceda19327..854ae81b26 100644 --- a/stl/inc/xutility +++ b/stl/inc/xutility @@ -79,6 +79,8 @@ __declspec(noalias) void __cdecl __std_reverse_trivially_swappable_8(void* _Firs __declspec(noalias) void __cdecl __std_swap_ranges_trivially_swappable_noalias( void* _First1, void* _Last1, void* _First2) noexcept; +__declspec(noalias) void __stdcall __std_rotate(void* _First, void* _Mid, void* _Last) noexcept; + __declspec(noalias) size_t __stdcall __std_count_trivial_1( const void* _First, const void* _Last, uint8_t _Val) noexcept; __declspec(noalias) size_t __stdcall __std_count_trivial_2( @@ -6597,6 +6599,17 @@ _CONSTEXPR20 _FwdIt rotate(_FwdIt _First, _FwdIt _Mid, _FwdIt _Last) { } if constexpr (_Is_cpp17_random_iter_v<_FwdIt>) { +#if _USE_STD_VECTOR_ALGORITHMS + using _Elem = remove_reference_t<_Iter_ref_t>; + + if constexpr (conjunction_v>, + _Is_trivially_swappable<_Elem>, negation>>) { + if (!_STD _Is_constant_evaluated()) { + ::__std_rotate(_STD _To_address(_UFirst), _STD _To_address(_UMid), _STD _To_address(_ULast)); + return _First + (_Last - _Mid); + } + } +#endif // _USE_STD_VECTOR_ALGORITHMS _STD reverse(_UFirst, _UMid); _STD reverse(_UMid, _ULast); _STD reverse(_UFirst, _ULast); diff --git a/stl/src/vector_algorithms.cpp b/stl/src/vector_algorithms.cpp index 12558f8f3b..fb20d8d71c 100644 --- a/stl/src/vector_algorithms.cpp +++ b/stl/src/vector_algorithms.cpp @@ -382,6 +382,107 @@ __declspec(noalias) void __cdecl __std_reverse_copy_trivially_copyable_8( } // extern "C" +namespace { + namespace _Rotating { + // TRANSITION, GH-5506 "VCRuntime: memmove() is surprisingly slow for more than 8 KB on certain CPUs": + // As a workaround, the following code calls memmove() for 8 KB portions. + constexpr size_t _Portion_size = 8192; + constexpr size_t _Portion_mask = _Portion_size - 1; + static_assert((_Portion_size & _Portion_mask) == 0); + + void _Move_to_lower_address(void* _Dest, const void* _Src, const size_t _Size) noexcept { + const size_t _Whole_portions_size = _Size & ~_Portion_mask; + + void* _Dest_end = _Dest; + _Advance_bytes(_Dest_end, _Whole_portions_size); + + while (_Dest != _Dest_end) { + memmove(_Dest, _Src, _Portion_size); + _Advance_bytes(_Dest, _Portion_size); + _Advance_bytes(_Src, _Portion_size); + } + + if (const size_t _Tail = _Size - _Whole_portions_size; _Tail != 0) { + memmove(_Dest, _Src, _Tail); + } + } + + void _Move_to_higher_address(void* const _Dest, const void* const _Src, const size_t _Size) noexcept { + const size_t _Whole_portions_size = _Size & ~_Portion_mask; + + void* _Dest_end = _Dest; + _Advance_bytes(_Dest_end, _Whole_portions_size); + const void* _Src_end = _Src; + _Advance_bytes(_Src_end, _Whole_portions_size); + + if (const size_t _Tail = _Size - _Whole_portions_size; _Tail != 0) { + memmove(_Dest_end, _Src_end, _Tail); + } + + while (_Dest_end != _Dest) { + _Rewind_bytes(_Dest_end, _Portion_size); + _Rewind_bytes(_Src_end, _Portion_size); + memmove(_Dest_end, _Src_end, _Portion_size); + } + } + + constexpr size_t _Buf_size = 512; + + bool _Use_buffer(const size_t _Smaller, const size_t _Larger) noexcept { + return _Smaller <= _Buf_size && (_Smaller <= 128 || _Larger >= _Smaller * 2); + } + } // namespace _Rotating +} // unnamed namespace + +extern "C" { + +__declspec(noalias) void __stdcall __std_rotate(void* _First, void* const _Mid, void* _Last) noexcept { + unsigned char _Buf[_Rotating::_Buf_size]; + + for (;;) { + const size_t _Left = _Byte_length(_First, _Mid); + const size_t _Right = _Byte_length(_Mid, _Last); + + if (_Left <= _Right) { + if (_Left == 0) { + break; + } + + if (_Rotating::_Use_buffer(_Left, _Right)) { + memcpy(_Buf, _First, _Left); + _Rotating::_Move_to_lower_address(_First, _Mid, _Right); + _Advance_bytes(_First, _Right); + memcpy(_First, _Buf, _Left); + break; + } + + void* _Mid2 = _Last; + _Rewind_bytes(_Mid2, _Left); + __std_swap_ranges_trivially_swappable_noalias(_Mid2, _Last, _First); + _Last = _Mid2; + } else { + if (_Right == 0) { + break; + } + + if (_Rotating::_Use_buffer(_Right, _Left)) { + _Rewind_bytes(_Last, _Right); + memcpy(_Buf, _Last, _Right); + void* _Mid2 = _First; + _Advance_bytes(_Mid2, _Right); + _Rotating::_Move_to_higher_address(_Mid2, _First, _Left); + memcpy(_First, _Buf, _Right); + break; + } + + __std_swap_ranges_trivially_swappable_noalias(_Mid, _Last, _First); + _Advance_bytes(_First, _Right); + } + } +} + +} // extern "C" + namespace { namespace _Sorting { enum _Min_max_mode { diff --git a/tests/std/tests/GH_005421_vector_algorithms_integer_class_type_iterator/test.cpp b/tests/std/tests/GH_005421_vector_algorithms_integer_class_type_iterator/test.cpp index dd00180318..6e54f8390a 100644 --- a/tests/std/tests/GH_005421_vector_algorithms_integer_class_type_iterator/test.cpp +++ b/tests/std/tests/GH_005421_vector_algorithms_integer_class_type_iterator/test.cpp @@ -98,7 +98,7 @@ int main() { picky_contiguous_iterator float_arr_begin(begin(float_arr)); picky_contiguous_iterator float_arr_end(end(float_arr)); - transform(arr_begin, arr_end, float_arr_begin, [](int v) { return static_cast(v); }); + transform(arr_begin, arr_end, float_arr_begin, [](const int v) { return static_cast(v); }); assert(ranges::min(ranges::subrange(float_arr_begin, float_arr_end)) == 200.0); assert(ranges::max(ranges::subrange(float_arr_begin, float_arr_end)) == 390.0); @@ -196,6 +196,30 @@ int main() { ranges::reverse(temp_begin, temp_end); assert(ranges::equal(temp_begin, temp_end, begin(reverse_expected), end(reverse_expected))); } + { + const int rotate_expected[] = { + 250, 270, 280, 290, 300, 310, 320, 250, 340, 250, 250, 370, 380, 390, 200, 210, 220, 250, 240, 250}; + + const _Signed128 rotate_pos = 6; + + auto rot_copy_it = rotate_copy(arr_begin, arr_begin + rotate_pos, arr_end, temp_begin); + assert(equal(temp_begin, temp_end, begin(rotate_expected), end(rotate_expected))); + assert(rot_copy_it == temp_end); + + copy(arr_begin, arr_end, temp_begin); + auto rot_it = rotate(temp_begin, temp_begin + rotate_pos, temp_end); + assert(equal(temp_begin, temp_end, begin(rotate_expected), end(rotate_expected))); + assert(rot_it == temp_end - rotate_pos); + + auto r_rot_copy_it = ranges::rotate_copy(arr_begin, arr_begin + rotate_pos, arr_end, temp_begin).out; + assert(ranges::equal(temp_begin, temp_end, begin(rotate_expected), end(rotate_expected))); + assert(r_rot_copy_it == temp_end); + + ranges::copy(arr_begin, arr_end, temp_begin); + auto r_rot_it = begin(ranges::rotate(temp_begin, temp_begin + rotate_pos, temp_end)); + assert(ranges::equal(temp_begin, temp_end, begin(rotate_expected), end(rotate_expected))); + assert(r_rot_it == temp_end - rotate_pos); + } { // Out of replace family, only replace for 32-bit and 64-bit elements is manually vectorized, // replace_copy is auto vectorized (along with replace_copy_if) diff --git a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp index 35dac73c83..1232bde41a 100644 --- a/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp +++ b/tests/std/tests/VSO_0000000_vector_algorithms/test.cpp @@ -743,6 +743,65 @@ void test_reverse_copy(mt19937_64& gen) { } } +template +void last_known_good_rotate( + RanIt first, RanIt mid, RanIt last, vector::value_type>& tmp) { + const auto size_left = mid - first; + const auto size_right = last - mid; + if (size_left <= size_right) { + tmp.assign(first, mid); + move_backward(mid, last, last - size_left); + move(tmp.begin(), tmp.end(), last - size_left); + } else { + tmp.assign(mid, last); + move(first, mid, first + size_right); + move(tmp.begin(), tmp.end(), first); + } +} + +template +void test_case_rotate( + vector& actual, vector& actual_r, vector& expected, const ptrdiff_t pos, vector& tmp) { + const ptrdiff_t shift = static_cast(expected.size()) - pos; + last_known_good_rotate(expected.begin(), expected.begin() + pos, expected.end(), tmp); + const auto it = rotate(actual.begin(), actual.begin() + pos, actual.end()); + assert(expected == actual); + assert(it == actual.begin() + shift); +#if _HAS_CXX20 + const auto rng = ranges::rotate(actual_r.begin(), actual_r.begin() + pos, actual_r.end()); + assert(expected == actual_r); + assert(begin(rng) == actual_r.begin() + shift); + assert(end(rng) == actual_r.end()); +#else // ^^^ _HAS_CXX20 / !_HAS_CXX20 vvv + (void) actual_r; +#endif // ^^^ !_HAS_CXX20 ^^^ +} + +template +void test_rotate(mt19937_64& gen) { + vector actual; + vector actual_r; + vector expected; + vector tmp; + actual.reserve(dataCount); + actual_r.reserve(dataCount); + expected.reserve(dataCount); + tmp.reserve(dataCount); + test_case_rotate(actual, actual_r, expected, 0, tmp); + for (size_t attempts = 0; attempts < dataCount; ++attempts) { + const T val = static_cast(gen()); // intentionally narrows + actual.push_back(val); + actual_r.push_back(val); + expected.push_back(val); + + uniform_int_distribution dis_pos(0, static_cast(attempts) + 1); + + for (size_t pos_count = 0; pos_count != 5; ++pos_count) { + test_case_rotate(actual, actual_r, expected, dis_pos(gen), tmp); + } + } +} + template FwdIt2 last_known_good_swap_ranges(FwdIt1 first1, const FwdIt1 last1, FwdIt2 dest) { for (; first1 != last1; ++first1, ++dest) { @@ -1182,6 +1241,19 @@ void test_vector_algorithms(mt19937_64& gen) { test_reverse_copy(gen); test_reverse_copy(gen); + test_rotate(gen); + test_rotate(gen); + test_rotate(gen); + test_rotate(gen); + test_rotate(gen); + test_rotate(gen); + test_rotate(gen); + test_rotate(gen); + test_rotate(gen); + test_rotate(gen); + test_rotate(gen); + test_rotate(gen); + test_remove(gen); test_remove(gen); test_remove(gen);