From 3d8706f69d14c89ed6a2bacb61458e6b998fca31 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Tue, 4 Aug 2020 09:21:50 +0300 Subject: [PATCH 1/5] cache HW thread count in resolves #1134 (conservatively) revert to what there were before atomic wait --- stl/inc/execution | 70 +++++++++++++++++---------------- stl/src/parallel_algorithms.cpp | 8 +++- 2 files changed, 43 insertions(+), 35 deletions(-) diff --git a/stl/inc/execution b/stl/inc/execution index 7fea657d087..1759459df45 100644 --- a/stl/inc/execution +++ b/stl/inc/execution @@ -44,6 +44,8 @@ using __std_PTP_WORK = __std_TP_WORK*; using __std_PTP_CALLBACK_INSTANCE = __std_TP_CALLBACK_INSTANCE*; using __std_PTP_CALLBACK_ENVIRON = __std_TP_CALLBACK_ENVIRON*; +_NODISCARD unsigned int __stdcall __std_parallel_algorithms_hw_threads() noexcept; + using __std_PTP_WORK_CALLBACK = void(__stdcall*)( _Inout_ __std_PTP_CALLBACK_INSTANCE, _Inout_opt_ void*, _Inout_ __std_PTP_WORK); @@ -1113,7 +1115,7 @@ struct _Static_partitioned_all_of_family2 { // all_of/any_of/none_of task schedu template bool _All_of_family_parallel(_FwdIt _First, const _FwdIt _Last, _Pr _Pred) { // test if all elements in [_First, _Last) satisfy _Pred (or !_Pred if _Invert is true) in parallel - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines... const auto _Count = _STD distance(_First, _Last); if (_Count >= 2) { // ... with at least 2 elements @@ -1224,7 +1226,7 @@ void for_each(_ExPo&&, _FwdIt _First, _FwdIt _Last, _Fn _Func) noexcept /* termi auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines... auto _Count = _STD distance(_UFirst, _ULast); if (_Count >= 2) { // ... with at least 2 elements @@ -1271,7 +1273,7 @@ _FwdIt for_each_n(_ExPo&&, _FwdIt _First, const _Diff _Count_raw, _Fn _Func) noe if (0 < _Count) { auto _UFirst = _Get_unwrapped_n(_First, _Count); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1 && _Count >= 2) { // parallelize on multiprocessor machines with at least 2 elements _TRY_BEGIN auto _Passed_fn = _Pass_fn(_Func); @@ -1347,7 +1349,7 @@ template _FwdIt _Find_parallel_unchecked(_ExPo&&, const _FwdIt _First, const _FwdIt _Last, const _Find_fx _Fx) { // find first matching _Val, potentially in parallel if (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { const auto _Count = _STD distance(_First, _Last); if (_Count >= 2) { @@ -1562,7 +1564,7 @@ _NODISCARD _FwdIt1 find_end(_ExPo&&, _FwdIt1 _First1, const _FwdIt1 _Last1, cons const auto _UFirst2 = _Get_unwrapped(_First2); const auto _ULast2 = _Get_unwrapped(_Last2); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { if constexpr (_Is_bidi_iter_v<_FwdIt1>) { const auto _Partition_start = @@ -1682,7 +1684,7 @@ _NODISCARD _FwdIt adjacent_find(_ExPo&&, _FwdIt _First, _FwdIt _Last, _Pr _Pred) auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { const auto _Count = static_cast<_Iter_diff_t<_FwdIt>>(_STD distance(_UFirst, _ULast) - 1); if (_Count >= 2) { @@ -1743,7 +1745,7 @@ _NODISCARD _Iter_diff_t<_FwdIt> count_if(_ExPo&&, const _FwdIt _First, const _Fw auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { const auto _Count = _STD distance(_UFirst, _ULast); if (_Count >= 2) { @@ -1907,7 +1909,7 @@ _NODISCARD pair<_FwdIt1, _FwdIt2> mismatch( const auto _UFirst1 = _Get_unwrapped(_First1); const auto _ULast1 = _Get_unwrapped(_Last1); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { const auto _Count = _STD distance(_UFirst1, _ULast1); const auto _UFirst2 = _Get_unwrapped_n(_First2, _Count); @@ -1952,7 +1954,7 @@ _NODISCARD pair<_FwdIt1, _FwdIt2> mismatch( const auto _UFirst2 = _Get_unwrapped(_First2); const auto _ULast2 = _Get_unwrapped(_Last2); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { const auto _Count = static_cast<_Iter_diff_t<_FwdIt1>>(_Distance_min(_UFirst1, _ULast1, _UFirst2, _ULast2)); if (_Count >= 2) { @@ -2027,7 +2029,7 @@ _NODISCARD bool equal(_ExPo&&, const _FwdIt1 _First1, const _FwdIt1 _Last1, cons const auto _UFirst1 = _Get_unwrapped(_First1); const auto _ULast1 = _Get_unwrapped(_Last1); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { const auto _Count = _STD distance(_UFirst1, _ULast1); const auto _UFirst2 = _Get_unwrapped_n(_First2, _Count); @@ -2063,7 +2065,7 @@ _NODISCARD bool equal(_ExPo&&, const _FwdIt1 _First1, const _FwdIt1 _Last1, cons const auto _UFirst2 = _Get_unwrapped(_First2); const auto _ULast2 = _Get_unwrapped(_Last2); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { const auto _Count = _Distance_any(_UFirst1, _ULast1, _UFirst2, _ULast2); if (_Count >= 2) { @@ -2153,7 +2155,7 @@ _NODISCARD _FwdItHaystack search(_ExPo&&, const _FwdItHaystack _First1, _FwdItHa const auto _ULast1 = _Get_unwrapped(_Last1); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { _Iter_diff_t<_FwdItHaystack> _Count; if constexpr (_Is_random_iter_v<_FwdItHaystack> && _Is_random_iter_v<_FwdItPat>) { @@ -2284,7 +2286,7 @@ _NODISCARD _FwdIt search_n(_ExPo&&, const _FwdIt _First, _FwdIt _Last, const _Di auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { const auto _Haystack_count = _STD distance(_UFirst, _ULast); if (_Count > _Haystack_count) { @@ -2352,7 +2354,7 @@ _FwdIt2 transform(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _Last, _FwdIt2 _D auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines... const auto _Count = _STD distance(_UFirst, _ULast); const auto _UDest = _Get_unwrapped_n(_Dest, _Count); @@ -2432,7 +2434,7 @@ _FwdIt3 transform(_ExPo&&, const _FwdIt1 _First1, const _FwdIt1 _Last1, const _F const auto _UFirst1 = _Get_unwrapped(_First1); const auto _ULast1 = _Get_unwrapped(_Last1); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines... const auto _Count = _STD distance(_UFirst1, _ULast1); const auto _UFirst2 = _Get_unwrapped_n(_First2, _Count); @@ -2612,7 +2614,7 @@ _NODISCARD _FwdIt remove_if(_ExPo&&, _FwdIt _First, const _FwdIt _Last, _Pr _Pre auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { const auto _Count = _STD distance(_UFirst, _ULast); if (_Count >= 2) { @@ -2754,7 +2756,7 @@ void sort(_ExPo&&, const _RanIt _First, const _RanIt _Last, _Pr _Pred) noexcept const _Iter_diff_t<_RanIt> _Ideal = _ULast - _UFirst; if constexpr (remove_reference_t<_ExPo>::_Parallelize) { size_t _Threads; - if (_Ideal > _ISORT_MAX && (_Threads = thread::hardware_concurrency()) > 1) { + if (_Ideal > _ISORT_MAX && (_Threads = __std_parallel_algorithms_hw_threads()) > 1) { // parallelize when input is large enough and we aren't on a uniprocessor machine _TRY_BEGIN _Sort_operation _Operation(_UFirst, _Pass_fn(_Pred), _Threads, _Ideal); // throws @@ -3018,7 +3020,7 @@ void stable_sort(_ExPo&&, const _BidIt _First, const _BidIt _Last, _Pr _Pred) no size_t _Hw_threads; bool _Attempt_parallelism; if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - _Hw_threads = thread::hardware_concurrency(); + _Hw_threads = __std_parallel_algorithms_hw_threads(); _Attempt_parallelism = _Hw_threads > 1; } else { _Attempt_parallelism = false; @@ -3099,7 +3101,7 @@ _NODISCARD _FwdIt is_sorted_until(_ExPo&&, _FwdIt _First, _FwdIt _Last, _Pr _Pre const auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines auto _Count = _STD distance(_UFirst, _ULast); if (_Count >= 3) { // ... with at least 3 elements @@ -3254,7 +3256,7 @@ _NODISCARD bool is_partitioned(_ExPo&&, const _FwdIt _First, const _FwdIt _Last, const auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const auto _Count = _STD distance(_UFirst, _ULast); if (_Count >= 2) { // ... with at least 2 elements @@ -3327,7 +3329,7 @@ _NODISCARD _RanIt is_heap_until(_ExPo&&, _RanIt _First, _RanIt _Last, _Pr _Pred) const auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const auto _Count = _ULast - _UFirst; if (_Count >= 3) { // ... with at least 3 elements @@ -3576,7 +3578,7 @@ _FwdIt partition(_ExPo&&, _FwdIt _First, const _FwdIt _Last, _Pr _Pred) noexcept const auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { const auto _Count = _STD distance(_UFirst, _ULast); if (_Count >= 2) { @@ -3918,7 +3920,7 @@ _FwdIt3 set_intersection(_ExPo&&, _FwdIt1 _First1, _FwdIt1 _Last1, _FwdIt2 _Firs if constexpr (remove_reference_t<_ExPo>::_Parallelize && _Is_random_iter_v<_FwdIt1> && _Is_random_iter_v<_FwdIt2> && _Is_random_iter_v<_FwdIt3>) { // only parallelize if desired, and all of the iterators given are random access - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const _Diff _Count1 = _ULast1 - _UFirst1; const _Diff _Count2 = _ULast2 - _UFirst2; @@ -4009,7 +4011,7 @@ _FwdIt3 set_difference(_ExPo&&, _FwdIt1 _First1, _FwdIt1 _Last1, _FwdIt2 _First2 if constexpr (remove_reference_t<_ExPo>::_Parallelize && _Is_random_iter_v<_FwdIt1> && _Is_random_iter_v<_FwdIt2> && _Is_random_iter_v<_FwdIt3>) { // only parallelize if desired, and all of the iterators given are random access - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const _Diff _Count = _ULast1 - _UFirst1; if (_Count >= 2) { // ... with at least 2 elements in [_First1, _Last1) @@ -4103,7 +4105,7 @@ _NODISCARD _Ty reduce(_ExPo&&, const _FwdIt _First, const _FwdIt _Last, _Ty _Val auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines... const auto _Count = _STD distance(_UFirst, _ULast); const auto _Chunks = _Get_least2_chunked_work_chunk_count(_Hw_threads, _Count); @@ -4205,7 +4207,7 @@ _NODISCARD _Ty transform_reduce(_ExPo&&, _FwdIt1 _First1, _FwdIt1 _Last1, _FwdIt auto _UFirst1 = _Get_unwrapped(_First1); const auto _ULast1 = _Get_unwrapped(_Last1); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines... const auto _Count = _STD distance(_UFirst1, _ULast1); auto _UFirst2 = _Get_unwrapped_n(_First2, _Count); @@ -4300,7 +4302,7 @@ _NODISCARD _Ty transform_reduce(_ExPo&&, const _FwdIt _First, const _FwdIt _Last auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines... const auto _Count = _STD distance(_UFirst, _ULast); const auto _Chunks = _Get_least2_chunked_work_chunk_count(_Hw_threads, _Count); @@ -4452,7 +4454,7 @@ _FwdIt2 exclusive_scan(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _Last, _FwdI const auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const auto _Count = _STD distance(_UFirst, _ULast); const auto _UDest = _Get_unwrapped_n(_Dest, _Count); @@ -4597,7 +4599,7 @@ _FwdIt2 inclusive_scan(_ExPo&&, _FwdIt1 _First, _FwdIt1 _Last, _FwdIt2 _Dest, _B const auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const auto _Count = _STD distance(_First, _Last); auto _UDest = _Get_unwrapped_n(_Dest, _Count); @@ -4640,7 +4642,7 @@ _FwdIt2 inclusive_scan(_ExPo&&, _FwdIt1 _First, _FwdIt1 _Last, _FwdIt2 _Dest, _B const auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const auto _Count = _STD distance(_UFirst, _ULast); auto _UDest = _Get_unwrapped_n(_Dest, _Count); @@ -4785,7 +4787,7 @@ _FwdIt2 transform_exclusive_scan(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _L const auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const auto _Count = _STD distance(_UFirst, _ULast); const auto _UDest = _Get_unwrapped_n(_Dest, _Count); @@ -4932,7 +4934,7 @@ _FwdIt2 transform_inclusive_scan(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _L const auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const auto _Count = _STD distance(_UFirst, _ULast); auto _UDest = _Get_unwrapped_n(_Dest, _Count); @@ -4978,7 +4980,7 @@ _FwdIt2 transform_inclusive_scan(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _L const auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const auto _Count = _STD distance(_UFirst, _ULast); auto _UDest = _Get_unwrapped_n(_Dest, _Count); @@ -5080,7 +5082,7 @@ _FwdIt2 adjacent_difference(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _Last, auto _UFirst = _Get_unwrapped(_First); const auto _ULast = _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { - const size_t _Hw_threads = thread::hardware_concurrency(); + const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines auto _Count = _STD distance(_UFirst, _ULast); const auto _UDest = _Get_unwrapped_n(_Dest, _Count); diff --git a/stl/src/parallel_algorithms.cpp b/stl/src/parallel_algorithms.cpp index b661116d312..b4e4013eee6 100644 --- a/stl/src/parallel_algorithms.cpp +++ b/stl/src/parallel_algorithms.cpp @@ -25,7 +25,13 @@ extern "C" { // TRANSITION, ABI _NODISCARD unsigned int __stdcall __std_parallel_algorithms_hw_threads() noexcept { - return _STD thread::hardware_concurrency(); + static int _Cached_hw_concurrency = -1; + unsigned int _Hw_concurrency = __iso_volatile_load32(&_Cached_hw_concurrency); + if (_Hw_concurrency == -1) { + _Hw_concurrency = _STD thread::hardware_concurrency(); + __iso_volatile_store32(&_Cached_hw_concurrency, _Hw_concurrency); + } + return _Hw_concurrency; } _NODISCARD PTP_WORK __stdcall __std_create_threadpool_work( From 91c60db619aea7e2ae838d469d266d2f5c33466b Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Tue, 4 Aug 2020 19:24:54 +0300 Subject: [PATCH 2/5] Update stl/src/parallel_algorithms.cpp Co-authored-by: Casey Carter --- stl/src/parallel_algorithms.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/stl/src/parallel_algorithms.cpp b/stl/src/parallel_algorithms.cpp index b4e4013eee6..514b47dad5c 100644 --- a/stl/src/parallel_algorithms.cpp +++ b/stl/src/parallel_algorithms.cpp @@ -23,7 +23,6 @@ namespace { extern "C" { -// TRANSITION, ABI _NODISCARD unsigned int __stdcall __std_parallel_algorithms_hw_threads() noexcept { static int _Cached_hw_concurrency = -1; unsigned int _Hw_concurrency = __iso_volatile_load32(&_Cached_hw_concurrency); From e20b2695357ab43158f01ff7559b0a36153af4da Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Wed, 5 Aug 2020 06:18:30 +0300 Subject: [PATCH 3/5] Update stl/src/parallel_algorithms.cpp Co-authored-by: Billy O'Neal --- stl/src/parallel_algorithms.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/stl/src/parallel_algorithms.cpp b/stl/src/parallel_algorithms.cpp index 514b47dad5c..ec1603b46eb 100644 --- a/stl/src/parallel_algorithms.cpp +++ b/stl/src/parallel_algorithms.cpp @@ -25,12 +25,14 @@ extern "C" { _NODISCARD unsigned int __stdcall __std_parallel_algorithms_hw_threads() noexcept { static int _Cached_hw_concurrency = -1; - unsigned int _Hw_concurrency = __iso_volatile_load32(&_Cached_hw_concurrency); + int _Hw_concurrency = __iso_volatile_load32(&_Cached_hw_concurrency); if (_Hw_concurrency == -1) { - _Hw_concurrency = _STD thread::hardware_concurrency(); + _Hw_concurrency = static_cast(_STD thread::hardware_concurrency()); __iso_volatile_store32(&_Cached_hw_concurrency, _Hw_concurrency); } - return _Hw_concurrency; + + return static_cast(_Hw_concurrency); + } _NODISCARD PTP_WORK __stdcall __std_create_threadpool_work( From 867ad867d482fc963d60e3411091a3bfa82ec805 Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Wed, 5 Aug 2020 06:19:40 +0300 Subject: [PATCH 4/5] #exclude --- stl/inc/execution | 1 - 1 file changed, 1 deletion(-) diff --git a/stl/inc/execution b/stl/inc/execution index 1759459df45..427ea5d0dac 100644 --- a/stl/inc/execution +++ b/stl/inc/execution @@ -18,7 +18,6 @@ #include #include #include -#include #include #include From e1a0db07a8efcfc00c59099b295ac5a9671dd20b Mon Sep 17 00:00:00 2001 From: Alex Guteniev Date: Wed, 5 Aug 2020 06:21:03 +0300 Subject: [PATCH 5/5] clang format --- stl/src/parallel_algorithms.cpp | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/stl/src/parallel_algorithms.cpp b/stl/src/parallel_algorithms.cpp index ec1603b46eb..47c2bc4f552 100644 --- a/stl/src/parallel_algorithms.cpp +++ b/stl/src/parallel_algorithms.cpp @@ -25,14 +25,13 @@ extern "C" { _NODISCARD unsigned int __stdcall __std_parallel_algorithms_hw_threads() noexcept { static int _Cached_hw_concurrency = -1; - int _Hw_concurrency = __iso_volatile_load32(&_Cached_hw_concurrency); + int _Hw_concurrency = __iso_volatile_load32(&_Cached_hw_concurrency); if (_Hw_concurrency == -1) { _Hw_concurrency = static_cast(_STD thread::hardware_concurrency()); __iso_volatile_store32(&_Cached_hw_concurrency, _Hw_concurrency); } return static_cast(_Hw_concurrency); - } _NODISCARD PTP_WORK __stdcall __std_create_threadpool_work(