diff --git a/benchmarks/CMakeLists.txt b/benchmarks/CMakeLists.txt index 893042036f1..6403f725754 100644 --- a/benchmarks/CMakeLists.txt +++ b/benchmarks/CMakeLists.txt @@ -128,8 +128,10 @@ add_benchmark(remove src/remove.cpp) add_benchmark(replace src/replace.cpp) add_benchmark(reverse src/reverse.cpp) add_benchmark(rotate src/rotate.cpp) +add_benchmark(sample src/sample.cpp) add_benchmark(search src/search.cpp) add_benchmark(search_n src/search_n.cpp) +add_benchmark(shuffle src/shuffle.cpp) add_benchmark(std_copy src/std_copy.cpp) add_benchmark(sv_equal src/sv_equal.cpp) add_benchmark(swap_ranges src/swap_ranges.cpp) diff --git a/benchmarks/src/sample.cpp b/benchmarks/src/sample.cpp new file mode 100644 index 00000000000..e27776f2420 --- /dev/null +++ b/benchmarks/src/sample.cpp @@ -0,0 +1,53 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include +#include +#include +#include +#include +using namespace std; + +enum class alg_type { std_fn, rng }; + +template +void bm_sample(benchmark::State& state) { + static_assert(is_unsigned_v, "T must be unsigned so iota() doesn't have to worry about overflow."); + + const auto population_size = static_cast(state.range(0)); + const auto sampled_size = static_cast(state.range(1)); + + vector population(population_size); + vector sampled(sampled_size); + iota(population.begin(), population.end(), T{0}); + mt19937_64 urbg; + + for (auto _ : state) { + benchmark::DoNotOptimize(population); + if constexpr (Alg == alg_type::rng) { + ranges::sample(population, sampled.begin(), sampled_size, urbg); + } else { + sample(population.begin(), population.end(), sampled.begin(), sampled_size, urbg); + } + benchmark::DoNotOptimize(sampled); + } +} + +void common_args(auto bm) { + bm->Args({1 << 20, 1 << 15}); +} + +BENCHMARK(bm_sample)->Apply(common_args); +BENCHMARK(bm_sample)->Apply(common_args); +BENCHMARK(bm_sample)->Apply(common_args); +BENCHMARK(bm_sample)->Apply(common_args); + +BENCHMARK(bm_sample)->Apply(common_args); +BENCHMARK(bm_sample)->Apply(common_args); +BENCHMARK(bm_sample)->Apply(common_args); +BENCHMARK(bm_sample)->Apply(common_args); + +BENCHMARK_MAIN(); diff --git a/benchmarks/src/shuffle.cpp b/benchmarks/src/shuffle.cpp new file mode 100644 index 00000000000..b87f7f9ee89 --- /dev/null +++ b/benchmarks/src/shuffle.cpp @@ -0,0 +1,49 @@ +// Copyright (c) Microsoft Corporation. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include +#include +#include +#include +#include +#include +#include +#include +using namespace std; + +enum class alg_type { std_fn, rng }; + +template +void bm_shuffle(benchmark::State& state) { + static_assert(is_unsigned_v, "T must be unsigned so iota() doesn't have to worry about overflow."); + + const auto n = static_cast(state.range(0)); + vector v(n); + iota(v.begin(), v.end(), T{0}); + mt19937_64 urbg; + + for (auto _ : state) { + benchmark::DoNotOptimize(v); + if constexpr (Alg == alg_type::rng) { + ranges::shuffle(v, urbg); + } else { + shuffle(v.begin(), v.end(), urbg); + } + } +} + +void common_args(auto bm) { + bm->Arg(1 << 20); +} + +BENCHMARK(bm_shuffle)->Apply(common_args); +BENCHMARK(bm_shuffle)->Apply(common_args); +BENCHMARK(bm_shuffle)->Apply(common_args); +BENCHMARK(bm_shuffle)->Apply(common_args); + +BENCHMARK(bm_shuffle)->Apply(common_args); +BENCHMARK(bm_shuffle)->Apply(common_args); +BENCHMARK(bm_shuffle)->Apply(common_args); +BENCHMARK(bm_shuffle)->Apply(common_args); + +BENCHMARK_MAIN(); diff --git a/stl/inc/algorithm b/stl/inc/algorithm index 876d57a1bd2..8bcb62b24e7 100644 --- a/stl/inc/algorithm +++ b/stl/inc/algorithm @@ -8,6 +8,7 @@ #include #if _STL_COMPILER_PREPROCESSOR #include <__msvc_heap_algorithms.hpp> +#include <__msvc_int128.hpp> #include <__msvc_minmax.hpp> #include @@ -6019,6 +6020,141 @@ private: _Udiff _Bmask; // 2^_Bits - 1 }; +template +class _Rng_from_urng_v2 { // wrap a URNG as an RNG +public: + using _Ty0 = make_unsigned_t<_Diff>; + using _Ty1 = _Invoke_result_t<_Urng&>; + + using _Udiff = conditional_t; + static constexpr unsigned int _Udiff_bits = sizeof(_Udiff) * CHAR_BIT; + using _Uprod = conditional_t<_Udiff_bits <= 16, uint32_t, conditional_t<_Udiff_bits <= 32, uint64_t, _Unsigned128>>; + + explicit _Rng_from_urng_v2(_Urng& _Func) noexcept : _Ref(_Func) {} + + _Diff operator()(_Diff _Index) { // adapt _Urng closed range to [0, _Index) + // From Daniel Lemire, "Fast Random Integer Generation in an Interval", + // ACM Trans. Model. Comput. Simul. 29 (1), 2019. + // + // Algorithm 5 <-> This Code: + // m <-> _Product + // l <-> _Rem + // s <-> _Index + // t <-> _Threshold + // L <-> _Generated_bits + // 2^L - 1 <-> _Mask + + _Udiff _Mask = _Bmask; + unsigned int _Niter = 1; + + if constexpr (_Bits < _Udiff_bits) { + while (_Mask < static_cast<_Udiff>(_Index - 1)) { + _Mask <<= _Bits; + _Mask |= _Bmask; + ++_Niter; + } + } + + // x <- random integer in [0, 2^L) + // m <- x * s + auto _Product = _Get_random_product(_Index, _Niter); + // l <- m mod 2^L + auto _Rem = static_cast<_Udiff>(_Product) & _Mask; + + if (_Rem < static_cast<_Udiff>(_Index)) { + // t <- (2^L - s) mod s + const auto _Threshold = (_Mask - _Index + 1) % _Index; + while (_Rem < _Threshold) { + _Product = _Get_random_product(_Index, _Niter); + _Rem = static_cast<_Udiff>(_Product) & _Mask; + } + } + + unsigned int _Generated_bits; + if constexpr (_Bits < _Udiff_bits) { + _Generated_bits = static_cast(_Popcount(_Mask)); + } else { + _Generated_bits = _Udiff_bits; + } + + // m / 2^L + return static_cast<_Diff>(_Product >> _Generated_bits); + } + + _Udiff _Get_all_bits() { + _Udiff _Ret = _Get_bits(); + + if constexpr (_Bits < _Udiff_bits) { + for (unsigned int _Num = _Bits; _Num < _Udiff_bits; _Num += _Bits) { // don't mask away any bits + _Ret <<= _Bits; + _Ret |= _Get_bits(); + } + } + + return _Ret; + } + + _Rng_from_urng_v2(const _Rng_from_urng_v2&) = delete; + _Rng_from_urng_v2& operator=(const _Rng_from_urng_v2&) = delete; + +private: + _Udiff _Get_bits() { // return a random value within [0, _Bmask] + constexpr auto _Urng_min = (_Urng::min) (); + for (;;) { // repeat until random value is in range + const _Udiff _Val = static_cast<_Udiff>(_Ref() - _Urng_min); + + if (_Val <= _Bmask) { + return _Val; + } + } + } + + static constexpr size_t _Calc_bits() { + auto _Bits_local = _Udiff_bits; + auto _Bmask_local = static_cast<_Udiff>(-1); + for (; static_cast<_Udiff>((_Urng::max) () - (_Urng::min) ()) < _Bmask_local; _Bmask_local >>= 1) { + --_Bits_local; + } + + return _Bits_local; + } + + _Uprod _Get_random_product(const _Diff _Index, unsigned int _Niter) { + _Udiff _Ret = _Get_bits(); + if constexpr (_Bits < _Udiff_bits) { + while (--_Niter > 0) { + _Ret <<= _Bits; + _Ret |= _Get_bits(); + } + } + + if constexpr (is_same_v<_Udiff, uint64_t>) { + uint64_t _High; + const auto _Low = _Base128::_UMul128(_Ret, static_cast<_Udiff>(_Index), _High); + return _Uprod{_Low, _High}; + } else { + return _Uprod{_Ret} * static_cast<_Uprod>(_Index); + } + } + + _Urng& _Ref; // reference to URNG + static constexpr size_t _Bits = _Calc_bits(); // number of random bits generated by _Get_bits() + static constexpr _Udiff _Bmask = static_cast<_Udiff>(-1) >> (_Udiff_bits - _Bits); // 2^_Bits - 1 +}; + +template +constexpr bool _Has_static_min_max = false; + +// This checks a requirement of N4981 [rand.req.urng] `concept uniform_random_bit_generator` but doesn't attempt +// to implement the whole concept - we just need to distinguish Standard machinery from tr1 machinery. +template +constexpr bool _Has_static_min_max<_Gen, void_t::value)>> = + true; + +template +using _Rng_from_urng_v1_or_v2 = + conditional_t<_Has_static_min_max<_Urng>, _Rng_from_urng_v2<_Diff, _Urng>, _Rng_from_urng<_Diff, _Urng>>; + #if _HAS_CXX17 template _SampleIt _Sample_reservoir_unchecked( @@ -6076,7 +6212,7 @@ _SampleIt sample(_PopIt _First, _PopIt _Last, _SampleIt _Dest, _Diff _Count, _Ur auto _UFirst = _STD _Get_unwrapped(_First); auto _ULast = _STD _Get_unwrapped(_Last); using _PopDiff = _Iter_diff_t<_PopIt>; - _Rng_from_urng<_PopDiff, remove_reference_t<_Urng>> _RngFunc(_Func); + _Rng_from_urng_v1_or_v2<_PopDiff, remove_reference_t<_Urng>> _RngFunc(_Func); if constexpr (_Is_ranges_fwd_iter_v<_PopIt>) { // source is forward: use selection sampling (stable) using _CT = common_type_t<_Diff, _PopDiff>; @@ -6119,7 +6255,7 @@ namespace ranges { return _Output; } - _Rng_from_urng, remove_reference_t<_Urng>> _RngFunc(_Func); + _Rng_from_urng_v1_or_v2, remove_reference_t<_Urng>> _RngFunc(_Func); if constexpr (forward_iterator<_It>) { auto _UFirst = _RANGES _Unwrap_iter<_Se>(_STD move(_First)); auto _Pop_size = _RANGES distance(_UFirst, _RANGES _Unwrap_sent<_It>(_STD move(_Last))); @@ -6140,7 +6276,7 @@ namespace ranges { return _Output; } - _Rng_from_urng, remove_reference_t<_Urng>> _RngFunc(_Func); + _Rng_from_urng_v1_or_v2, remove_reference_t<_Urng>> _RngFunc(_Func); if constexpr (forward_range<_Rng>) { auto _UFirst = _Ubegin(_Range); auto _Pop_size = _RANGES distance(_UFirst, _Uend(_Range)); @@ -6243,7 +6379,7 @@ void _Random_shuffle1(_RanIt _First, _RanIt _Last, _RngFn& _RngFunc) { _EXPORT_STD template void shuffle(_RanIt _First, _RanIt _Last, _Urng&& _Func) { // shuffle [_First, _Last) using URNG _Func using _Urng0 = remove_reference_t<_Urng>; - _Rng_from_urng<_Iter_diff_t<_RanIt>, _Urng0> _RngFunc(_Func); + _Rng_from_urng_v1_or_v2<_Iter_diff_t<_RanIt>, _Urng0> _RngFunc(_Func); _STD _Random_shuffle1(_First, _Last, _RngFunc); } @@ -6256,7 +6392,7 @@ namespace ranges { _STATIC_CALL_OPERATOR _It operator()(_It _First, _Se _Last, _Urng&& _Func) _CONST_CALL_OPERATOR { _STD _Adl_verify_range(_First, _Last); - _Rng_from_urng, remove_reference_t<_Urng>> _RngFunc(_Func); + _Rng_from_urng_v1_or_v2, remove_reference_t<_Urng>> _RngFunc(_Func); auto _UResult = _Shuffle_unchecked( _RANGES _Unwrap_iter<_Se>(_STD move(_First)), _RANGES _Unwrap_sent<_It>(_STD move(_Last)), _RngFunc); @@ -6267,7 +6403,7 @@ namespace ranges { template requires permutable> && uniform_random_bit_generator> _STATIC_CALL_OPERATOR borrowed_iterator_t<_Rng> operator()(_Rng&& _Range, _Urng&& _Func) _CONST_CALL_OPERATOR { - _Rng_from_urng, remove_reference_t<_Urng>> _RngFunc(_Func); + _Rng_from_urng_v1_or_v2, remove_reference_t<_Urng>> _RngFunc(_Func); return _RANGES _Rewrap_iterator(_Range, _Shuffle_unchecked(_Ubegin(_Range), _Uend(_Range), _RngFunc)); } @@ -6313,11 +6449,11 @@ void random_shuffle(_RanIt _First, _RanIt _Last, _RngFn&& _RngFunc) { struct _Rand_urng_from_func { // wrap rand() as a URNG using result_type = unsigned int; - static result_type(min)() { // return minimum possible generated value + static constexpr result_type(min)() { // return minimum possible generated value return 0; } - static result_type(max)() { // return maximum possible generated value + static constexpr result_type(max)() { // return maximum possible generated value return RAND_MAX; } diff --git a/stl/inc/random b/stl/inc/random index 7a033d12e4c..4b689be0b07 100644 --- a/stl/inc/random +++ b/stl/inc/random @@ -403,15 +403,6 @@ bool _Nrand_for_tr1( return false; } -template -constexpr bool _Has_static_min_max = false; - -// This checks a requirement of N4981 [rand.req.urng] `concept uniform_random_bit_generator` but doesn't attempt -// to implement the whole concept - we just need to distinguish Standard machinery from tr1 machinery. -template -constexpr bool _Has_static_min_max<_Gen, void_t::value)>> = - true; - template _NODISCARD _Real _Nrand_impl(_Gen& _Gx) { // build a floating-point value from random sequence _RNG_REQUIRE_REALTYPE(_Nrand_impl, _Real); @@ -2115,128 +2106,6 @@ private: double _Scale; }; -template -class _Rng_from_urng_v2 { // wrap a URNG as an RNG -public: - using _Ty0 = make_unsigned_t<_Diff>; - using _Ty1 = _Invoke_result_t<_Urng&>; - - using _Udiff = conditional_t; - static constexpr unsigned int _Udiff_bits = sizeof(_Udiff) * CHAR_BIT; - using _Uprod = conditional_t<_Udiff_bits <= 16, uint32_t, conditional_t<_Udiff_bits <= 32, uint64_t, _Unsigned128>>; - - explicit _Rng_from_urng_v2(_Urng& _Func) noexcept : _Ref(_Func) {} - - _Diff operator()(_Diff _Index) { // adapt _Urng closed range to [0, _Index) - // From Daniel Lemire, "Fast Random Integer Generation in an Interval", - // ACM Trans. Model. Comput. Simul. 29 (1), 2019. - // - // Algorithm 5 <-> This Code: - // m <-> _Product - // l <-> _Rem - // s <-> _Index - // t <-> _Threshold - // L <-> _Generated_bits - // 2^L - 1 <-> _Mask - - _Udiff _Mask = _Bmask; - unsigned int _Niter = 1; - - if constexpr (_Bits < _Udiff_bits) { - while (_Mask < static_cast<_Udiff>(_Index - 1)) { - _Mask <<= _Bits; - _Mask |= _Bmask; - ++_Niter; - } - } - - // x <- random integer in [0, 2^L) - // m <- x * s - auto _Product = _Get_random_product(_Index, _Niter); - // l <- m mod 2^L - auto _Rem = static_cast<_Udiff>(_Product) & _Mask; - - if (_Rem < _Index) { - // t <- (2^L - s) mod s - const auto _Threshold = (_Mask - _Index + 1) % _Index; - while (_Rem < _Threshold) { - _Product = _Get_random_product(_Index, _Niter); - _Rem = static_cast<_Udiff>(_Product) & _Mask; - } - } - - unsigned int _Generated_bits; - if constexpr (_Bits < _Udiff_bits) { - _Generated_bits = static_cast(_Popcount(_Mask)); - } else { - _Generated_bits = _Udiff_bits; - } - - // m / 2^L - return static_cast<_Diff>(_Product >> _Generated_bits); - } - - _Udiff _Get_all_bits() { - _Udiff _Ret = _Get_bits(); - - if constexpr (_Bits < _Udiff_bits) { - for (unsigned int _Num = _Bits; _Num < _Udiff_bits; _Num += _Bits) { // don't mask away any bits - _Ret <<= _Bits; - _Ret |= _Get_bits(); - } - } - - return _Ret; - } - - _Rng_from_urng_v2(const _Rng_from_urng_v2&) = delete; - _Rng_from_urng_v2& operator=(const _Rng_from_urng_v2&) = delete; - -private: - _Udiff _Get_bits() { // return a random value within [0, _Bmask] - static constexpr auto _Urng_min = (_Urng::min) (); - for (;;) { // repeat until random value is in range - const _Udiff _Val = _Ref() - _Urng_min; - - if (_Val <= _Bmask) { - return _Val; - } - } - } - - static constexpr size_t _Calc_bits() { - auto _Bits_local = _Udiff_bits; - auto _Bmask_local = static_cast<_Udiff>(-1); - for (; (_Urng::max) () - (_Urng::min) () < _Bmask_local; _Bmask_local >>= 1) { - --_Bits_local; - } - - return _Bits_local; - } - - _Uprod _Get_random_product(const _Diff _Index, unsigned int _Niter) { - _Udiff _Ret = _Get_bits(); - if constexpr (_Bits < _Udiff_bits) { - while (--_Niter > 0) { - _Ret <<= _Bits; - _Ret |= _Get_bits(); - } - } - - if constexpr (is_same_v<_Udiff, uint64_t>) { - uint64_t _High; - const auto _Low = _Base128::_UMul128(_Ret, static_cast<_Udiff>(_Index), _High); - return _Uprod{_Low, _High}; - } else { - return _Uprod{_Ret} * _Uprod{_Index}; - } - } - - _Urng& _Ref; // reference to URNG - static constexpr size_t _Bits = _Calc_bits(); // number of random bits generated by _Get_bits() - static constexpr _Udiff _Bmask = static_cast<_Udiff>(-1) >> (_Udiff_bits - _Bits); // 2^_Bits - 1 -}; - #if _HAS_TR1_NAMESPACE template class _DEPRECATE_TR1_RANDOM uniform_int { // uniform integer distribution @@ -2352,8 +2221,7 @@ private: template result_type _Eval(_Engine& _Eng, _Ty _Min, _Ty _Max) const { // compute next value in range [_Min, _Max] - conditional_t<_Has_static_min_max<_Engine>, _Rng_from_urng_v2<_Uty, _Engine>, _Rng_from_urng<_Uty, _Engine>> - _Generator(_Eng); + _Rng_from_urng_v1_or_v2<_Uty, _Engine> _Generator(_Eng); const _Uty _Umin = _Adjust(static_cast<_Uty>(_Min)); const _Uty _Umax = _Adjust(static_cast<_Uty>(_Max)); diff --git a/tests/std/tests/P0896R4_ranges_alg_shuffle/test.cpp b/tests/std/tests/P0896R4_ranges_alg_shuffle/test.cpp index 3582326e02b..6c2bb00b8a3 100644 --- a/tests/std/tests/P0896R4_ranges_alg_shuffle/test.cpp +++ b/tests/std/tests/P0896R4_ranges_alg_shuffle/test.cpp @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include @@ -53,14 +54,15 @@ struct instantiator { void test_urbg() { // COMPILE-ONLY struct RandGen { - static constexpr bool min() { - return false; + using result_type = uint16_t; // N5014 [rand.req.urng]/3 + static constexpr result_type min() { + return 3; } - static constexpr bool max() { - return true; + static constexpr result_type max() { + return 1729; } - bool operator()() & { - return false; + result_type operator()() & { + return 4; } }; diff --git a/tests/tr1/tests/algorithm/test.cpp b/tests/tr1/tests/algorithm/test.cpp index ca9ee9c9d38..215a792b0ee 100644 --- a/tests/tr1/tests/algorithm/test.cpp +++ b/tests/tr1/tests/algorithm/test.cpp @@ -304,11 +304,11 @@ struct rand_gen { // uniform random number generator return mt() & 0xfffff; } - static result_type(min)() { // get minimum value + static constexpr result_type(min)() { // get minimum value return 0; } - static result_type(max)() { // get maximum value + static constexpr result_type(max)() { // get maximum value return 0xfffff; } };