From 1969309e0bf5834bc2ecc5966da3890a2ccbe0bf Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Wed, 14 Feb 2024 20:29:02 +0800 Subject: [PATCH 1/7] `_STD`- and `_RANGES`-qualify internal function calls --- stl/inc/execution | 300 +++++++++++++++++++++++----------------------- stl/inc/numeric | 132 ++++++++++---------- stl/inc/vector | 2 +- stl/inc/xmemory | 4 +- 4 files changed, 219 insertions(+), 219 deletions(-) diff --git a/stl/inc/execution b/stl/inc/execution index 5f26f95a7e..aedd8c2c51 100644 --- a/stl/inc/execution +++ b/stl/inc/execution @@ -295,7 +295,7 @@ struct _Parallelism_allocator { void deallocate(_Ty* const _Ptr, const size_t _Count) { // no overflow check on the following multiply; we assume _Allocate did that check - _Deallocate<_New_alignof<_Ty>>(_Ptr, sizeof(_Ty) * _Count); + _STD _Deallocate<_New_alignof<_Ty>>(_Ptr, sizeof(_Ty) * _Count); } template @@ -327,9 +327,9 @@ struct _Generalized_sum_drop { // drop off point for GENERALIZED_SUM intermediat ~_Generalized_sum_drop() noexcept { // pre: the caller has synchronized with all threads that modify _Data. - _Destroy_range(begin(), end()); + _STD _Destroy_range(begin(), end()); // no overflow check on the following multiply; we assume _Allocate did that check - _Deallocate<_New_alignof<_Ty>>(_Data, sizeof(_Ty) * _Slots); + _STD _Deallocate<_New_alignof<_Ty>>(_Data, sizeof(_Ty) * _Slots); } template @@ -337,7 +337,7 @@ struct _Generalized_sum_drop { // drop off point for GENERALIZED_SUM intermediat // constructs a _Ty in place with _Vals parameters perfectly forwarded // pre: the number of results added is less than the size the drop was constructed with const size_t _Target = _Frontier++; - _Construct_in_place(_Data[_Target], _STD forward<_Args>(_Vals)...); + _STD _Construct_in_place(_Data[_Target], _STD forward<_Args>(_Vals)...); } _Ty* begin() { @@ -3602,7 +3602,7 @@ struct _Scan_decoupled_lookback { template void _Apply_exclusive_predecessor(_Ty& _Preceding, _FwdIt _First, const _FwdIt _Last, _BinOp _Reduce_op) { // apply _Preceding to [_First, _Last) and _Sum._Ref(), using _Reduce_op - _Construct_in_place(_Sum._Ref(), _Reduce_op(_Preceding, _Local._Ref())); + _STD _Construct_in_place(_Sum._Ref(), _Reduce_op(_Preceding, _Local._Ref())); _State.store(_Local_available | _Sum_available); *_First = _Preceding; @@ -3615,7 +3615,7 @@ struct _Scan_decoupled_lookback { template void _Apply_inclusive_predecessor(_Ty& _Preceding, _FwdIt _First, const _FwdIt _Last, _BinOp _Reduce_op) { // apply _Preceding to [_First, _Last) and _Sum._Ref(), using _Reduce_op - _Construct_in_place(_Sum._Ref(), _Reduce_op(_Preceding, _Local._Ref())); + _STD _Construct_in_place(_Sum._Ref(), _Reduce_op(_Preceding, _Local._Ref())); _State.store(_Local_available | _Sum_available); #pragma loop(ivdep) @@ -3627,11 +3627,11 @@ struct _Scan_decoupled_lookback { ~_Scan_decoupled_lookback() { const auto _State_bits = _State.load(memory_order_relaxed); if (_State_bits & _Sum_available) { - _Destroy_in_place(_Sum._Ref()); + _STD _Destroy_in_place(_Sum._Ref()); } if (_State_bits & _Local_available) { - _Destroy_in_place(_Local._Ref()); + _STD _Destroy_in_place(_Local._Ref()); } } }; @@ -4057,7 +4057,7 @@ struct _Static_partitioned_reduce2 { auto _Key = _This->_Team._Get_next_key(); if (_Key) { auto _Chunk = _This->_Basis._Get_chunk(_Key); - auto _Local_result = _Reduce_at_least_two<_Ty>(_Chunk._First, _Chunk._Last, _This->_Reduce_op); + auto _Local_result = _STD _Reduce_at_least_two<_Ty>(_Chunk._First, _Chunk._Last, _This->_Reduce_op); while ((_Key = _This->_Team._Get_next_key())) { _Chunk = _This->_Basis._Get_chunk(_Key); _Local_result = _STD reduce(_Chunk._First, _Chunk._Last, _STD move(_Local_result), _This->_Reduce_op); @@ -4074,9 +4074,9 @@ _NODISCARD _Ty reduce(_ExPo&&, const _FwdIt _First, const _FwdIt _Last, _Ty _Val /* terminates */ { // return commutative and associative reduction of _Val and [_First, _Last), using _Reduce_op _REQUIRE_PARALLEL_ITERATOR(_FwdIt); - _Adl_verify_range(_First, _Last); - auto _UFirst = _Get_unwrapped(_First); - const auto _ULast = _Get_unwrapped(_Last); + _STD _Adl_verify_range(_First, _Last); + auto _UFirst = _STD _Get_unwrapped(_First); + const auto _ULast = _STD _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines... @@ -4084,7 +4084,7 @@ _NODISCARD _Ty reduce(_ExPo&&, const _FwdIt _First, const _FwdIt _Last, _Ty _Val const auto _Chunks = _Get_least2_chunked_work_chunk_count(_Hw_threads, _Count); if (_Chunks > 1) { _TRY_BEGIN - auto _Passed_fn = _Pass_fn(_Reduce_op); + auto _Passed_fn = _STD _Pass_fn(_Reduce_op); _Static_partitioned_reduce2 _Operation{ _Count, _Chunks, _UFirst, _Passed_fn}; { @@ -4095,12 +4095,12 @@ _NODISCARD _Ty reduce(_ExPo&&, const _FwdIt _First, const _FwdIt _Last, _Ty _Val _Work._Submit_for_chunks(_Hw_threads, _Chunks); while (const auto _Stolen_key = _Operation._Team._Get_next_key()) { auto _Chunk = _Operation._Basis._Get_chunk(_Stolen_key); - _Val = _STD reduce(_Chunk._First, _Chunk._Last, _STD move(_Val), _Pass_fn(_Reduce_op)); + _Val = _STD reduce(_Chunk._First, _Chunk._Last, _STD move(_Val), _STD _Pass_fn(_Reduce_op)); } } // join with _Work_ptr threads auto& _Results = _Operation._Results; - return _Reduce_move_unchecked(_Results.begin(), _Results.end(), _STD move(_Val), _Pass_fn(_Reduce_op)); + return _STD _Reduce_move_unchecked(_Results.begin(), _Results.end(), _STD move(_Val), _STD _Pass_fn(_Reduce_op)); _CATCH(const _Parallelism_resources_exhausted&) // fall through to serial case below _CATCH_END @@ -4108,7 +4108,7 @@ _NODISCARD _Ty reduce(_ExPo&&, const _FwdIt _First, const _FwdIt _Last, _Ty _Val } } - return _STD reduce(_UFirst, _ULast, _STD move(_Val), _Pass_fn(_Reduce_op)); + return _STD reduce(_UFirst, _ULast, _STD move(_Val), _STD _Pass_fn(_Reduce_op)); } template @@ -4172,19 +4172,19 @@ _NODISCARD _Ty transform_reduce(_ExPo&&, _FwdIt1 _First1, _FwdIt1 _Last1, _FwdIt // return commutative and associative transform-reduction of sequences, using _Reduce_op and _Transform_op _REQUIRE_PARALLEL_ITERATOR(_FwdIt1); _REQUIRE_PARALLEL_ITERATOR(_FwdIt2); - _Adl_verify_range(_First1, _Last1); - auto _UFirst1 = _Get_unwrapped(_First1); - const auto _ULast1 = _Get_unwrapped(_Last1); + _STD _Adl_verify_range(_First1, _Last1); + auto _UFirst1 = _STD _Get_unwrapped(_First1); + const auto _ULast1 = _STD _Get_unwrapped(_Last1); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines... const auto _Count = _STD distance(_UFirst1, _ULast1); - auto _UFirst2 = _Get_unwrapped_n(_First2, _Count); + auto _UFirst2 = _STD _Get_unwrapped_n(_First2, _Count); const auto _Chunks = _Get_least2_chunked_work_chunk_count(_Hw_threads, _Count); if (_Chunks > 1) { _TRY_BEGIN - auto _Passed_reduce = _Pass_fn(_Reduce_op); - auto _Passed_transform = _Pass_fn(_Transform_op); + auto _Passed_reduce = _STD _Pass_fn(_Reduce_op); + auto _Passed_transform = _STD _Pass_fn(_Transform_op); _Static_partitioned_transform_reduce_binary2 _Operation{_Count, _Chunks, _UFirst1, _UFirst2, _Passed_reduce, _Passed_transform}; @@ -4199,25 +4199,25 @@ _NODISCARD _Ty transform_reduce(_ExPo&&, _FwdIt1 _First1, _FwdIt1 _Last1, _FwdIt _Val = _STD transform_reduce(_Chunk1._First, _Chunk1._Last, _Operation._Basis2._Get_first( _Chunk_number, _Operation._Team._Get_chunk_offset(_Chunk_number)), - _STD move(_Val), _Pass_fn(_Reduce_op), _Pass_fn(_Transform_op)); + _STD move(_Val), _STD _Pass_fn(_Reduce_op), _STD _Pass_fn(_Transform_op)); } } // join with _Work_ptr threads auto& _Results = _Operation._Results; // note: already transformed - return _Reduce_move_unchecked(_Results.begin(), _Results.end(), _STD move(_Val), _Pass_fn(_Reduce_op)); + return _STD _Reduce_move_unchecked(_Results.begin(), _Results.end(), _STD move(_Val), _STD _Pass_fn(_Reduce_op)); _CATCH(const _Parallelism_resources_exhausted&) // fall through to serial case below _CATCH_END } return _STD transform_reduce( - _UFirst1, _ULast1, _UFirst2, _STD move(_Val), _Pass_fn(_Reduce_op), _Pass_fn(_Transform_op)); + _UFirst1, _ULast1, _UFirst2, _STD move(_Val), _STD _Pass_fn(_Reduce_op), _STD _Pass_fn(_Transform_op)); } } return _STD transform_reduce(_UFirst1, _ULast1, - _Get_unwrapped_n(_First2, _Idl_distance<_FwdIt1>(_UFirst1, _ULast1)), _STD move(_Val), _Pass_fn(_Reduce_op), - _Pass_fn(_Transform_op)); + _STD _Get_unwrapped_n(_First2, _STD _Idl_distance<_FwdIt1>(_UFirst1, _ULast1)), _STD move(_Val), _STD _Pass_fn(_Reduce_op), + _STD _Pass_fn(_Transform_op)); } template @@ -4268,9 +4268,9 @@ _NODISCARD _Ty transform_reduce(_ExPo&&, const _FwdIt _First, const _FwdIt _Last _UnaryOp _Transform_op) noexcept /* terminates */ { // return commutative and associative reduction of transformed sequence, using _Reduce_op and _Transform_op _REQUIRE_PARALLEL_ITERATOR(_FwdIt); - _Adl_verify_range(_First, _Last); - auto _UFirst = _Get_unwrapped(_First); - const auto _ULast = _Get_unwrapped(_Last); + _STD _Adl_verify_range(_First, _Last); + auto _UFirst = _STD _Get_unwrapped(_First); + const auto _ULast = _STD _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines... @@ -4278,8 +4278,8 @@ _NODISCARD _Ty transform_reduce(_ExPo&&, const _FwdIt _First, const _FwdIt _Last const auto _Chunks = _Get_least2_chunked_work_chunk_count(_Hw_threads, _Count); if (_Chunks > 1) { _TRY_BEGIN - auto _Passed_reduce = _Pass_fn(_Reduce_op); - auto _Passed_transform = _Pass_fn(_Transform_op); + auto _Passed_reduce = _STD _Pass_fn(_Reduce_op); + auto _Passed_transform = _STD _Pass_fn(_Transform_op); _Static_partitioned_transform_reduce2 _Operation{_Count, _Chunks, _UFirst, _Passed_reduce, _Passed_transform}; @@ -4290,13 +4290,13 @@ _NODISCARD _Ty transform_reduce(_ExPo&&, const _FwdIt _First, const _FwdIt _Last while (auto _Stolen_key = _Operation._Team._Get_next_key()) { // keep processing remaining chunks to comply with N4950 [intro.progress]/14 auto _Chunk = _Operation._Basis._Get_chunk(_Stolen_key); - _Val = _STD transform_reduce(_Chunk._First, _Chunk._Last, _STD move(_Val), _Pass_fn(_Reduce_op), - _Pass_fn(_Transform_op)); + _Val = _STD transform_reduce(_Chunk._First, _Chunk._Last, _STD move(_Val), _STD _Pass_fn(_Reduce_op), + _STD _Pass_fn(_Transform_op)); } } // join with _Work_ptr threads auto& _Results = _Operation._Results; // note: already transformed - return _Reduce_move_unchecked(_Results.begin(), _Results.end(), _STD move(_Val), _Pass_fn(_Reduce_op)); + return _STD _Reduce_move_unchecked(_Results.begin(), _Results.end(), _STD move(_Val), _STD _Pass_fn(_Reduce_op)); _CATCH(const _Parallelism_resources_exhausted&) // fall through to serial case below _CATCH_END @@ -4304,7 +4304,7 @@ _NODISCARD _Ty transform_reduce(_ExPo&&, const _FwdIt _First, const _FwdIt _Last } } - return _STD transform_reduce(_UFirst, _ULast, _STD move(_Val), _Pass_fn(_Reduce_op), _Pass_fn(_Transform_op)); + return _STD transform_reduce(_UFirst, _ULast, _STD move(_Val), _STD _Pass_fn(_Reduce_op), _STD _Pass_fn(_Transform_op)); } struct _No_init_tag { @@ -4316,7 +4316,7 @@ _FwdIt2 _Exclusive_scan_per_chunk(_FwdIt1 _First, const _FwdIt1 _Last, _FwdIt2 _ // local-sum for parallel exclusive_scan; writes local sums into [_Dest + 1, _Dest + (_Last - _First)) and stores // successor sum in _Val // pre: _Val is *uninitialized* && _First != _Last - _Construct_in_place(_Val, *_First); + _STD _Construct_in_place(_Val, *_First); for (;;) { ++_First; ++_Dest; @@ -4336,7 +4336,7 @@ void _Exclusive_scan_per_chunk_complete( // Sum for parallel exclusive_scan with predecessor available, into [_Dest, _Dest + (_Last - _First)) and stores // successor sum in _Val. // Pre: _Val is *uninitialized* && _First != _Last && predecessor sum is in _Init - _Construct_in_place(_Val, _Reduce_op(_Init, *_First)); + _STD _Construct_in_place(_Val, _Reduce_op(_Init, *_First)); *_Dest = _Init; while (++_First != _Last) { ++_Dest; @@ -4375,16 +4375,16 @@ struct _Static_partitioned_exclusive_scan2 { // Run local exclusive_scan on this chunk const auto _Chunk = _Lookback.begin() + static_cast(_Chunk_number); if (_Chunk_number == 0) { // chunk 0 is special as it has no predecessor; its local and total sums are the same - _Exclusive_scan_per_chunk_complete( + _STD _Exclusive_scan_per_chunk_complete( _In_range._First, _In_range._Last, _Dest, _Reduce_op, _Chunk->_Sum._Ref(), _Initial); _Chunk->_Store_available_state(_Sum_available); return _Cancellation_status::_Running; } - const auto _Prev_chunk = _Prev_iter(_Chunk); + const auto _Prev_chunk = _STD _Prev_iter(_Chunk); if (_Prev_chunk->_State.load() & _Sum_available) { // if predecessor sum already complete, we can incorporate its value directly for 1 pass - _Exclusive_scan_per_chunk_complete( + _STD _Exclusive_scan_per_chunk_complete( _In_range._First, _In_range._Last, _Dest, _Reduce_op, _Chunk->_Sum._Ref(), _Prev_chunk->_Sum._Ref()); _Chunk->_Store_available_state(_Sum_available); return _Cancellation_status::_Running; @@ -4392,14 +4392,14 @@ struct _Static_partitioned_exclusive_scan2 { // Calculate local sum and publish to other threads const auto _Last = - _Exclusive_scan_per_chunk(_In_range._First, _In_range._Last, _Dest, _Reduce_op, _Chunk->_Local._Ref()); + _STD _Exclusive_scan_per_chunk(_In_range._First, _In_range._Last, _Dest, _Reduce_op, _Chunk->_Local._Ref()); _Chunk->_Store_available_state(_Local_available); // Apply the predecessor overall sum to current overall sum and elements if (_Prev_chunk->_Get_available_state() & _Sum_available) { // predecessor overall sum done, use directly _Chunk->_Apply_exclusive_predecessor(_Prev_chunk->_Sum._Ref(), _Dest, _Last, _Reduce_op); } else { - auto _Tmp = _Get_lookback_sum(_Prev_chunk, _Reduce_op); + auto _Tmp = _STD _Get_lookback_sum(_Prev_chunk, _Reduce_op); _Chunk->_Apply_exclusive_predecessor(_Tmp, _Dest, _Last, _Reduce_op); } @@ -4408,7 +4408,7 @@ struct _Static_partitioned_exclusive_scan2 { static void __stdcall _Threadpool_callback( __std_PTP_CALLBACK_INSTANCE, void* const _Context, __std_PTP_WORK) noexcept /* terminates */ { - _Run_available_chunked_work(*static_cast<_Static_partitioned_exclusive_scan2*>(_Context)); + _STD _Run_available_chunked_work(*static_cast<_Static_partitioned_exclusive_scan2*>(_Context)); } }; @@ -4419,37 +4419,37 @@ _FwdIt2 exclusive_scan(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _Last, _FwdI // set each value in [_Dest, _Dest + (_Last - _First)) to the associative reduction of predecessors and _Val _REQUIRE_PARALLEL_ITERATOR(_FwdIt1); _REQUIRE_CPP17_MUTABLE_ITERATOR(_FwdIt2); - _Adl_verify_range(_First, _Last); - const auto _UFirst = _Get_unwrapped(_First); - const auto _ULast = _Get_unwrapped(_Last); + _STD _Adl_verify_range(_First, _Last); + const auto _UFirst = _STD _Get_unwrapped(_First); + const auto _ULast = _STD _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const auto _Count = _STD distance(_UFirst, _ULast); - const auto _UDest = _Get_unwrapped_n(_Dest, _Count); + const auto _UDest = _STD _Get_unwrapped_n(_Dest, _Count); if (_Count >= 2) { // ... with at least 2 elements _TRY_BEGIN _Static_partitioned_exclusive_scan2 _Operation{ - _Hw_threads, _Count, _UFirst, _Val, _Pass_fn(_Reduce_op), _UDest}; - _Seek_wrapped(_Dest, _Operation._Basis2._Populate(_Operation._Team, _UDest)); + _Hw_threads, _Count, _UFirst, _Val, _STD _Pass_fn(_Reduce_op), _UDest}; + _STD _Seek_wrapped(_Dest, _Operation._Basis2._Populate(_Operation._Team, _UDest)); // Note that _Val is used as temporary storage by whichever thread runs the first chunk. // If any thread starts any chunk, initialization is complete, so we can't enter the // catch or serial fallback below, so that's OK. - _Run_chunked_parallel_work(_Hw_threads, _Operation); + _STD _Run_chunked_parallel_work(_Hw_threads, _Operation); return _Dest; _CATCH(const _Parallelism_resources_exhausted&) // fall through to serial case below _CATCH_END } - _Seek_wrapped(_Dest, _STD exclusive_scan(_UFirst, _ULast, _UDest, _STD move(_Val), _Pass_fn(_Reduce_op))); + _STD _Seek_wrapped(_Dest, _STD exclusive_scan(_UFirst, _ULast, _UDest, _STD move(_Val), _STD _Pass_fn(_Reduce_op))); return _Dest; } } - _Seek_wrapped( - _Dest, _STD exclusive_scan(_UFirst, _ULast, _Get_unwrapped_n(_Dest, _Idl_distance<_FwdIt1>(_UFirst, _ULast)), - _STD move(_Val), _Pass_fn(_Reduce_op))); + _STD _Seek_wrapped( + _Dest, _STD exclusive_scan(_UFirst, _ULast, _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_FwdIt1>(_UFirst, _ULast)), + _STD move(_Val), _STD _Pass_fn(_Reduce_op))); return _Dest; } @@ -4460,9 +4460,9 @@ _FwdIt2 _Inclusive_scan_per_chunk( // _Val. // pre: _Val is *uninitialized* && _First != _Last if constexpr (is_same_v<_No_init_tag, remove_const_t>>) { - _Construct_in_place(_Val, *_First); + _STD _Construct_in_place(_Val, *_First); } else { - _Construct_in_place(_Val, _Reduce_op(_STD forward<_Ty_fwd>(_Predecessor), *_First)); + _STD _Construct_in_place(_Val, _Reduce_op(_STD forward<_Ty_fwd>(_Predecessor), *_First)); } for (;;) { @@ -4504,23 +4504,23 @@ struct _Static_partitioned_inclusive_scan2 { // Run local inclusive_scan on this chunk const auto _Chunk = _Lookback.begin() + static_cast(_Chunk_number); if (_Chunk_number == 0) { // chunk 0 is special as it has no predecessor; its local and total sums are the same - _Inclusive_scan_per_chunk( + _STD _Inclusive_scan_per_chunk( _In_range._First, _In_range._Last, _Dest, _Reduce_op, _Chunk->_Sum._Ref(), _STD move(_Initial)); _Chunk->_Store_available_state(_Sum_available); return _Cancellation_status::_Running; } - const auto _Prev_chunk = _Prev_iter(_Chunk); + const auto _Prev_chunk = _STD _Prev_iter(_Chunk); if (_Prev_chunk->_State.load() & _Sum_available) { // if predecessor sum already complete, we can incorporate its value directly for 1 pass - _Inclusive_scan_per_chunk( + _STD _Inclusive_scan_per_chunk( _In_range._First, _In_range._Last, _Dest, _Reduce_op, _Chunk->_Sum._Ref(), _Prev_chunk->_Sum._Ref()); _Chunk->_Store_available_state(_Sum_available); return _Cancellation_status::_Running; } // Calculate local sum and publish to other threads - const auto _Last = _Inclusive_scan_per_chunk( + const auto _Last = _STD _Inclusive_scan_per_chunk( _In_range._First, _In_range._Last, _Dest, _Reduce_op, _Chunk->_Local._Ref(), _No_init_tag{}); _Chunk->_Store_available_state(_Local_available); @@ -4528,7 +4528,7 @@ struct _Static_partitioned_inclusive_scan2 { if (_Prev_chunk->_Get_available_state() & _Sum_available) { // predecessor overall sum done, use directly _Chunk->_Apply_inclusive_predecessor(_Prev_chunk->_Sum._Ref(), _Dest, _Last, _Reduce_op); } else { - auto _Tmp = _Get_lookback_sum(_Prev_chunk, _Reduce_op); + auto _Tmp = _STD _Get_lookback_sum(_Prev_chunk, _Reduce_op); _Chunk->_Apply_inclusive_predecessor(_Tmp, _Dest, _Last, _Reduce_op); } @@ -4537,7 +4537,7 @@ struct _Static_partitioned_inclusive_scan2 { static void __stdcall _Threadpool_callback( __std_PTP_CALLBACK_INSTANCE, void* const _Context, __std_PTP_WORK) noexcept /* terminates */ { - _Run_available_chunked_work(*static_cast<_Static_partitioned_inclusive_scan2*>(_Context)); + _STD _Run_available_chunked_work(*static_cast<_Static_partitioned_inclusive_scan2*>(_Context)); } }; @@ -4548,40 +4548,40 @@ _FwdIt2 inclusive_scan(_ExPo&&, _FwdIt1 _First, _FwdIt1 _Last, _FwdIt2 _Dest, _B // compute partial noncommutative and associative reductions including _Val into _Dest, using _Reduce_op _REQUIRE_PARALLEL_ITERATOR(_FwdIt1); _REQUIRE_CPP17_MUTABLE_ITERATOR(_FwdIt2); - _Adl_verify_range(_First, _Last); - const auto _UFirst = _Get_unwrapped(_First); - const auto _ULast = _Get_unwrapped(_Last); + _STD _Adl_verify_range(_First, _Last); + const auto _UFirst = _STD _Get_unwrapped(_First); + const auto _ULast = _STD _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const auto _Count = _STD distance(_First, _Last); - auto _UDest = _Get_unwrapped_n(_Dest, _Count); + auto _UDest = _STD _Get_unwrapped_n(_Dest, _Count); if (_Count >= 2) { // ... with at least 2 elements _TRY_BEGIN - auto _Passed_op = _Pass_fn(_Reduce_op); + auto _Passed_op = _STD _Pass_fn(_Reduce_op); _Static_partitioned_inclusive_scan2<_Ty, _Ty, _Unwrapped_t, decltype(_UDest), decltype(_Passed_op)> _Operation{_Hw_threads, _Count, _Passed_op, _Val}; _Operation._Basis1._Populate(_Operation._Team, _UFirst); - _Seek_wrapped(_Dest, _Operation._Basis2._Populate(_Operation._Team, _UDest)); + _STD _Seek_wrapped(_Dest, _Operation._Basis2._Populate(_Operation._Team, _UDest)); // Note that _Val is moved from by whichever thread runs the first chunk. // If any thread starts any chunk, initialization is complete, so we can't enter the // catch or serial fallback below. - _Run_chunked_parallel_work(_Hw_threads, _Operation); + _STD _Run_chunked_parallel_work(_Hw_threads, _Operation); return _Dest; _CATCH(const _Parallelism_resources_exhausted&) // fall through to serial case below _CATCH_END } - _Seek_wrapped(_Dest, _STD inclusive_scan(_UFirst, _ULast, _UDest, _Pass_fn(_Reduce_op), _STD move(_Val))); + _STD _Seek_wrapped(_Dest, _STD inclusive_scan(_UFirst, _ULast, _UDest, _STD _Pass_fn(_Reduce_op), _STD move(_Val))); return _Dest; } } - _Seek_wrapped( - _Dest, _STD inclusive_scan(_UFirst, _ULast, _Get_unwrapped_n(_Dest, _Idl_distance<_FwdIt1>(_UFirst, _ULast)), - _Pass_fn(_Reduce_op), _STD move(_Val))); + _STD _Seek_wrapped( + _Dest, _STD inclusive_scan(_UFirst, _ULast, _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_FwdIt1>(_UFirst, _ULast)), + _STD _Pass_fn(_Reduce_op), _STD move(_Val))); return _Dest; } @@ -4592,37 +4592,37 @@ _FwdIt2 inclusive_scan(_ExPo&&, _FwdIt1 _First, _FwdIt1 _Last, _FwdIt2 _Dest, _B // compute partial noncommutative and associative reductions into _Dest, using _Reduce_op _REQUIRE_PARALLEL_ITERATOR(_FwdIt1); _REQUIRE_CPP17_MUTABLE_ITERATOR(_FwdIt2); - _Adl_verify_range(_First, _Last); - const auto _UFirst = _Get_unwrapped(_First); - const auto _ULast = _Get_unwrapped(_Last); + _STD _Adl_verify_range(_First, _Last); + const auto _UFirst = _STD _Get_unwrapped(_First); + const auto _ULast = _STD _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const auto _Count = _STD distance(_UFirst, _ULast); - auto _UDest = _Get_unwrapped_n(_Dest, _Count); + auto _UDest = _STD _Get_unwrapped_n(_Dest, _Count); if (_Count >= 2) { // ... with at least 2 elements _TRY_BEGIN _No_init_tag _Tag; - auto _Passed_op = _Pass_fn(_Reduce_op); + auto _Passed_op = _STD _Pass_fn(_Reduce_op); _Static_partitioned_inclusive_scan2<_Iter_value_t<_FwdIt1>, _No_init_tag, _Unwrapped_t, decltype(_UDest), decltype(_Passed_op)> _Operation{_Hw_threads, _Count, _Passed_op, _Tag}; _Operation._Basis1._Populate(_Operation._Team, _UFirst); - _Seek_wrapped(_Dest, _Operation._Basis2._Populate(_Operation._Team, _UDest)); - _Run_chunked_parallel_work(_Hw_threads, _Operation); + _STD _Seek_wrapped(_Dest, _Operation._Basis2._Populate(_Operation._Team, _UDest)); + _STD _Run_chunked_parallel_work(_Hw_threads, _Operation); return _Dest; _CATCH(const _Parallelism_resources_exhausted&) // fall through to serial case below _CATCH_END } - _Seek_wrapped(_Dest, _STD inclusive_scan(_UFirst, _ULast, _UDest, _Pass_fn(_Reduce_op))); + _STD _Seek_wrapped(_Dest, _STD inclusive_scan(_UFirst, _ULast, _UDest, _STD _Pass_fn(_Reduce_op))); return _Dest; } } - _Seek_wrapped(_Dest, _STD inclusive_scan(_UFirst, _ULast, - _Get_unwrapped_n(_Dest, _Idl_distance<_FwdIt1>(_UFirst, _ULast)), _Pass_fn(_Reduce_op))); + _STD _Seek_wrapped(_Dest, _STD inclusive_scan(_UFirst, _ULast, + _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_FwdIt1>(_UFirst, _ULast)), _STD _Pass_fn(_Reduce_op))); return _Dest; } @@ -4632,7 +4632,7 @@ _FwdIt2 _Transform_exclusive_scan_per_chunk( // Local-sum for parallel transform_exclusive_scan; writes local sums into [_Dest + 1, _Dest + (_Last - _First)) and // stores successor sum in _Val. // pre: _Val is *uninitialized* && _First != _Last - _Construct_in_place(_Val, _Transform_op(*_First)); + _STD _Construct_in_place(_Val, _Transform_op(*_First)); for (;;) { ++_First; ++_Dest; @@ -4652,7 +4652,7 @@ void _Transform_exclusive_scan_per_chunk_complete(_FwdIt1 _First, const _FwdIt1 // Sum for parallel transform_exclusive_scan with predecessor available, into [_Dest, _Dest + (_Last - _First)) and // stores successor sum in _Val. // pre: _Val is *uninitialized* && _First != _Last && predecessor sum is in _Init - _Construct_in_place(_Val, _Reduce_op(_Init, _Transform_op(*_First))); + _STD _Construct_in_place(_Val, _Reduce_op(_Init, _Transform_op(*_First))); *_Dest = _Init; while (++_First != _Last) { ++_Dest; @@ -4692,23 +4692,23 @@ struct _Static_partitioned_transform_exclusive_scan2 { // Run local transform_exclusive_scan on this chunk const auto _Chunk = _Lookback.begin() + static_cast(_Chunk_number); if (_Chunk_number == 0) { // chunk 0 is special as it has no predecessor; its local and total sums are the same - _Transform_exclusive_scan_per_chunk_complete( + _STD _Transform_exclusive_scan_per_chunk_complete( _In_range._First, _In_range._Last, _Dest, _Reduce_op, _Transform_op, _Chunk->_Sum._Ref(), _Initial); _Chunk->_Store_available_state(_Sum_available); return _Cancellation_status::_Running; } - const auto _Prev_chunk = _Prev_iter(_Chunk); + const auto _Prev_chunk = _STD _Prev_iter(_Chunk); if (_Prev_chunk->_State.load() & _Sum_available) { // if predecessor sum already complete, we can incorporate its value directly for 1 pass - _Transform_exclusive_scan_per_chunk_complete(_In_range._First, _In_range._Last, _Dest, _Reduce_op, + _STD _Transform_exclusive_scan_per_chunk_complete(_In_range._First, _In_range._Last, _Dest, _Reduce_op, _Transform_op, _Chunk->_Sum._Ref(), _Prev_chunk->_Sum._Ref()); _Chunk->_Store_available_state(_Sum_available); return _Cancellation_status::_Running; } // Calculate local sum and publish to other threads - const auto _Last = _Transform_exclusive_scan_per_chunk( + const auto _Last = _STD _Transform_exclusive_scan_per_chunk( _In_range._First, _In_range._Last, _Dest, _Reduce_op, _Transform_op, _Chunk->_Local._Ref()); _Chunk->_Store_available_state(_Local_available); @@ -4716,7 +4716,7 @@ struct _Static_partitioned_transform_exclusive_scan2 { if (_Prev_chunk->_Get_available_state() & _Sum_available) { // predecessor overall sum done, use directly _Chunk->_Apply_exclusive_predecessor(_Prev_chunk->_Sum._Ref(), _Dest, _Last, _Reduce_op); } else { - auto _Tmp = _Get_lookback_sum(_Prev_chunk, _Reduce_op); + auto _Tmp = _STD _Get_lookback_sum(_Prev_chunk, _Reduce_op); _Chunk->_Apply_exclusive_predecessor(_Tmp, _Dest, _Last, _Reduce_op); } @@ -4725,7 +4725,7 @@ struct _Static_partitioned_transform_exclusive_scan2 { static void __stdcall _Threadpool_callback( __std_PTP_CALLBACK_INSTANCE, void* const _Context, __std_PTP_WORK) noexcept /* terminates */ { - _Run_available_chunked_work(*static_cast<_Static_partitioned_transform_exclusive_scan2*>(_Context)); + _STD _Run_available_chunked_work(*static_cast<_Static_partitioned_transform_exclusive_scan2*>(_Context)); } }; @@ -4736,38 +4736,38 @@ _FwdIt2 transform_exclusive_scan(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _L // set each value in [_Dest, _Dest + (_Last - _First)) to the associative reduction of transformed predecessors _REQUIRE_PARALLEL_ITERATOR(_FwdIt1); _REQUIRE_CPP17_MUTABLE_ITERATOR(_FwdIt2); - _Adl_verify_range(_First, _Last); - const auto _UFirst = _Get_unwrapped(_First); - const auto _ULast = _Get_unwrapped(_Last); + _STD _Adl_verify_range(_First, _Last); + const auto _UFirst = _STD _Get_unwrapped(_First); + const auto _ULast = _STD _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const auto _Count = _STD distance(_UFirst, _ULast); - const auto _UDest = _Get_unwrapped_n(_Dest, _Count); + const auto _UDest = _STD _Get_unwrapped_n(_Dest, _Count); if (_Count >= 2) { // ... with at least 2 elements _TRY_BEGIN _Static_partitioned_transform_exclusive_scan2 _Operation{ - _Hw_threads, _Count, _UFirst, _Val, _Pass_fn(_Reduce_op), _Pass_fn(_Transform_op), _UDest}; - _Seek_wrapped(_Dest, _Operation._Basis2._Populate(_Operation._Team, _UDest)); + _Hw_threads, _Count, _UFirst, _Val, _STD _Pass_fn(_Reduce_op), _STD _Pass_fn(_Transform_op), _UDest}; + _STD _Seek_wrapped(_Dest, _Operation._Basis2._Populate(_Operation._Team, _UDest)); // Note that _Val is used as temporary storage by whichever thread runs the first chunk. // If any thread starts any chunk, initialization is complete, so we can't enter the // catch or serial fallback below, so that's OK. - _Run_chunked_parallel_work(_Hw_threads, _Operation); + _STD _Run_chunked_parallel_work(_Hw_threads, _Operation); return _Dest; _CATCH(const _Parallelism_resources_exhausted&) // fall through to serial case below _CATCH_END } - _Seek_wrapped(_Dest, _STD transform_exclusive_scan(_UFirst, _ULast, _UDest, _STD move(_Val), - _Pass_fn(_Reduce_op), _Pass_fn(_Transform_op))); + _STD _Seek_wrapped(_Dest, _STD transform_exclusive_scan(_UFirst, _ULast, _UDest, _STD move(_Val), + _STD _Pass_fn(_Reduce_op), _STD _Pass_fn(_Transform_op))); return _Dest; } } - _Seek_wrapped(_Dest, - _STD transform_exclusive_scan(_UFirst, _ULast, _Get_unwrapped_n(_Dest, _Idl_distance<_FwdIt1>(_UFirst, _ULast)), - _STD move(_Val), _Pass_fn(_Reduce_op), _Pass_fn(_Transform_op))); + _STD _Seek_wrapped(_Dest, + _STD transform_exclusive_scan(_UFirst, _ULast, _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_FwdIt1>(_UFirst, _ULast)), + _STD move(_Val), _STD _Pass_fn(_Reduce_op), _STD _Pass_fn(_Transform_op))); return _Dest; } @@ -4778,9 +4778,9 @@ _FwdIt2 _Transform_inclusive_scan_per_chunk(_FwdIt1 _First, const _FwdIt1 _Last, // sum in _Val // pre: _Val is *uninitialized* && _First != _Last if constexpr (is_same_v<_No_init_tag, remove_const_t>>) { - _Construct_in_place(_Val, _Transform_op(*_First)); + _STD _Construct_in_place(_Val, _Transform_op(*_First)); } else { - _Construct_in_place(_Val, _Reduce_op(_STD forward<_Ty_fwd>(_Predecessor), _Transform_op(*_First))); + _STD _Construct_in_place(_Val, _Reduce_op(_STD forward<_Ty_fwd>(_Predecessor), _Transform_op(*_First))); } for (;;) { @@ -4823,23 +4823,23 @@ struct _Static_partitioned_transform_inclusive_scan2 { // Run local transform_inclusive_scan on this chunk const auto _Chunk = _Lookback.begin() + static_cast(_Chunk_number); if (_Chunk_number == 0) { // chunk 0 is special as it has no predecessor; its local and total sums are the same - _Transform_inclusive_scan_per_chunk(_In_range._First, _In_range._Last, _Dest, _Reduce_op, _Transform_op, + _STD _Transform_inclusive_scan_per_chunk(_In_range._First, _In_range._Last, _Dest, _Reduce_op, _Transform_op, _Chunk->_Sum._Ref(), _STD move(_Initial)); _Chunk->_Store_available_state(_Sum_available); return _Cancellation_status::_Running; } - const auto _Prev_chunk = _Prev_iter(_Chunk); + const auto _Prev_chunk = _STD _Prev_iter(_Chunk); if (_Prev_chunk->_State.load() & _Sum_available) { // if predecessor sum already complete, we can incorporate its value directly for 1 pass - _Transform_inclusive_scan_per_chunk(_In_range._First, _In_range._Last, _Dest, _Reduce_op, _Transform_op, + _STD _Transform_inclusive_scan_per_chunk(_In_range._First, _In_range._Last, _Dest, _Reduce_op, _Transform_op, _Chunk->_Sum._Ref(), _Prev_chunk->_Sum._Ref()); _Chunk->_Store_available_state(_Sum_available); return _Cancellation_status::_Running; } // Calculate local sum and publish to other threads - const auto _Last = _Transform_inclusive_scan_per_chunk( + const auto _Last = _STD _Transform_inclusive_scan_per_chunk( _In_range._First, _In_range._Last, _Dest, _Reduce_op, _Transform_op, _Chunk->_Local._Ref(), _No_init_tag{}); _Chunk->_Store_available_state(_Local_available); @@ -4847,7 +4847,7 @@ struct _Static_partitioned_transform_inclusive_scan2 { if (_Prev_chunk->_Get_available_state() & _Sum_available) { // predecessor overall sum done, use directly _Chunk->_Apply_inclusive_predecessor(_Prev_chunk->_Sum._Ref(), _Dest, _Last, _Reduce_op); } else { - auto _Tmp = _Get_lookback_sum(_Prev_chunk, _Reduce_op); + auto _Tmp = _STD _Get_lookback_sum(_Prev_chunk, _Reduce_op); _Chunk->_Apply_inclusive_predecessor(_Tmp, _Dest, _Last, _Reduce_op); } @@ -4856,7 +4856,7 @@ struct _Static_partitioned_transform_inclusive_scan2 { static void __stdcall _Threadpool_callback( __std_PTP_CALLBACK_INSTANCE, void* const _Context, __std_PTP_WORK) noexcept /* terminates */ { - _Run_available_chunked_work(*static_cast<_Static_partitioned_transform_inclusive_scan2*>(_Context)); + _STD _Run_available_chunked_work(*static_cast<_Static_partitioned_transform_inclusive_scan2*>(_Context)); } }; @@ -4867,42 +4867,42 @@ _FwdIt2 transform_inclusive_scan(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _L // compute partial noncommutative and associative transformed reductions including _Val into _Dest _REQUIRE_PARALLEL_ITERATOR(_FwdIt1); _REQUIRE_CPP17_MUTABLE_ITERATOR(_FwdIt2); - _Adl_verify_range(_First, _Last); - const auto _UFirst = _Get_unwrapped(_First); - const auto _ULast = _Get_unwrapped(_Last); + _STD _Adl_verify_range(_First, _Last); + const auto _UFirst = _STD _Get_unwrapped(_First); + const auto _ULast = _STD _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const auto _Count = _STD distance(_UFirst, _ULast); - auto _UDest = _Get_unwrapped_n(_Dest, _Count); + auto _UDest = _STD _Get_unwrapped_n(_Dest, _Count); if (_Count >= 2) { // ... with at least 2 elements _TRY_BEGIN - auto _Passed_reduce = _Pass_fn(_Reduce_op); - auto _Passed_transform = _Pass_fn(_Transform_op); + auto _Passed_reduce = _STD _Pass_fn(_Reduce_op); + auto _Passed_transform = _STD _Pass_fn(_Transform_op); _Static_partitioned_transform_inclusive_scan2<_Ty, _Ty, _Unwrapped_t, decltype(_UDest), decltype(_Passed_reduce), decltype(_Passed_transform)> _Operation{_Hw_threads, _Count, _Passed_reduce, _Passed_transform, _Val}; _Operation._Basis1._Populate(_Operation._Team, _UFirst); - _Seek_wrapped(_Dest, _Operation._Basis2._Populate(_Operation._Team, _UDest)); + _STD _Seek_wrapped(_Dest, _Operation._Basis2._Populate(_Operation._Team, _UDest)); // Note that _Val is moved from by whichever thread runs the first chunk. // If any thread starts any chunk, initialization is complete, so we can't enter the // catch or serial fallback below. - _Run_chunked_parallel_work(_Hw_threads, _Operation); + _STD _Run_chunked_parallel_work(_Hw_threads, _Operation); return _Dest; _CATCH(const _Parallelism_resources_exhausted&) // fall through to serial case below _CATCH_END } - _Seek_wrapped(_Dest, _STD transform_inclusive_scan(_UFirst, _ULast, _UDest, _Pass_fn(_Reduce_op), - _Pass_fn(_Transform_op), _STD move(_Val))); + _STD _Seek_wrapped(_Dest, _STD transform_inclusive_scan(_UFirst, _ULast, _UDest, _STD _Pass_fn(_Reduce_op), + _STD _Pass_fn(_Transform_op), _STD move(_Val))); return _Dest; } } - _Seek_wrapped(_Dest, - _STD transform_inclusive_scan(_UFirst, _ULast, _Get_unwrapped_n(_Dest, _Idl_distance<_FwdIt1>(_UFirst, _ULast)), - _Pass_fn(_Reduce_op), _Pass_fn(_Transform_op), _STD move(_Val))); + _STD _Seek_wrapped(_Dest, + _STD transform_inclusive_scan(_UFirst, _ULast, _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_FwdIt1>(_UFirst, _ULast)), + _STD _Pass_fn(_Reduce_op), _STD _Pass_fn(_Transform_op), _STD move(_Val))); return _Dest; } @@ -4913,42 +4913,42 @@ _FwdIt2 transform_inclusive_scan(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _L // compute partial noncommutative and associative transformed reductions into _Dest _REQUIRE_PARALLEL_ITERATOR(_FwdIt1); _REQUIRE_CPP17_MUTABLE_ITERATOR(_FwdIt2); - _Adl_verify_range(_First, _Last); - const auto _UFirst = _Get_unwrapped(_First); - const auto _ULast = _Get_unwrapped(_Last); + _STD _Adl_verify_range(_First, _Last); + const auto _UFirst = _STD _Get_unwrapped(_First); + const auto _ULast = _STD _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines const auto _Count = _STD distance(_UFirst, _ULast); - auto _UDest = _Get_unwrapped_n(_Dest, _Count); + auto _UDest = _STD _Get_unwrapped_n(_Dest, _Count); if (_Count >= 2) { // ... with at least 2 elements _TRY_BEGIN _No_init_tag _Tag; - auto _Passed_reduce = _Pass_fn(_Reduce_op); - auto _Passed_transform = _Pass_fn(_Transform_op); + auto _Passed_reduce = _STD _Pass_fn(_Reduce_op); + auto _Passed_transform = _STD _Pass_fn(_Transform_op); using _Intermediate_t = decay_t; _Static_partitioned_transform_inclusive_scan2<_Intermediate_t, _No_init_tag, _Unwrapped_t, decltype(_UDest), decltype(_Passed_reduce), decltype(_Passed_transform)> _Operation{_Hw_threads, _Count, _Passed_reduce, _Passed_transform, _Tag}; _Operation._Basis1._Populate(_Operation._Team, _UFirst); - _Seek_wrapped(_Dest, _Operation._Basis2._Populate(_Operation._Team, _UDest)); - _Run_chunked_parallel_work(_Hw_threads, _Operation); + _STD _Seek_wrapped(_Dest, _Operation._Basis2._Populate(_Operation._Team, _UDest)); + _STD _Run_chunked_parallel_work(_Hw_threads, _Operation); return _Dest; _CATCH(const _Parallelism_resources_exhausted&) // fall through to serial case below _CATCH_END } - _Seek_wrapped(_Dest, - _STD transform_inclusive_scan(_UFirst, _ULast, _UDest, _Pass_fn(_Reduce_op), _Pass_fn(_Transform_op))); + _STD _Seek_wrapped(_Dest, + _STD transform_inclusive_scan(_UFirst, _ULast, _UDest, _STD _Pass_fn(_Reduce_op), _STD _Pass_fn(_Transform_op))); return _Dest; } } - _Seek_wrapped(_Dest, - _STD transform_inclusive_scan(_UFirst, _ULast, _Get_unwrapped_n(_Dest, _Idl_distance<_FwdIt1>(_UFirst, _ULast)), - _Pass_fn(_Reduce_op), _Pass_fn(_Transform_op))); + _STD _Seek_wrapped(_Dest, + _STD transform_inclusive_scan(_UFirst, _ULast, _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_FwdIt1>(_UFirst, _ULast)), + _STD _Pass_fn(_Reduce_op), _STD _Pass_fn(_Transform_op))); return _Dest; } @@ -4989,7 +4989,7 @@ struct _Static_partitioned_adjacent_difference2 { static void __stdcall _Threadpool_callback( __std_PTP_CALLBACK_INSTANCE, void* const _Context, __std_PTP_WORK) noexcept /* terminates */ { - _Run_available_chunked_work(*static_cast<_Static_partitioned_adjacent_difference2*>(_Context)); + _STD _Run_available_chunked_work(*static_cast<_Static_partitioned_adjacent_difference2*>(_Context)); } }; @@ -5015,41 +5015,41 @@ _FwdIt2 adjacent_difference(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _Last, // compute adjacent differences into _Dest _REQUIRE_PARALLEL_ITERATOR(_FwdIt1); _REQUIRE_CPP17_MUTABLE_ITERATOR(_FwdIt2); - _Adl_verify_range(_First, _Last); - auto _UFirst = _Get_unwrapped(_First); - const auto _ULast = _Get_unwrapped(_Last); + _STD _Adl_verify_range(_First, _Last); + auto _UFirst = _STD _Get_unwrapped(_First); + const auto _ULast = _STD _Get_unwrapped(_Last); if constexpr (remove_reference_t<_ExPo>::_Parallelize) { const size_t _Hw_threads = __std_parallel_algorithms_hw_threads(); if (_Hw_threads > 1) { // parallelize on multiprocessor machines auto _Count = _STD distance(_UFirst, _ULast); - const auto _UDest = _Get_unwrapped_n(_Dest, _Count); + const auto _UDest = _STD _Get_unwrapped_n(_Dest, _Count); if (_Count >= 2) { // ... with at least 2 elements _TRY_BEGIN --_Count; // note unusual offset partitioning _Static_partitioned_adjacent_difference2 _Operation{ - _Hw_threads, _Count, _UFirst, _Pass_fn(_Diff_op), _UDest}; - auto _Result = _Operation._Basis2._Populate(_Operation._Team, _Next_iter(_UDest)); + _Hw_threads, _Count, _UFirst, _STD _Pass_fn(_Diff_op), _UDest}; + auto _Result = _Operation._Basis2._Populate(_Operation._Team, _STD _Next_iter(_UDest)); const _Work_ptr _Work_op{_Operation}; // setup complete, hereafter nothrow or terminate _Work_op._Submit_for_chunks(_Hw_threads, _Operation._Team._Chunks); // must be done after setup is complete to avoid duplicate assign in serial fallback: *_UDest = *_UFirst; - _Run_available_chunked_work(_Operation); - _Seek_wrapped(_Dest, _Result); + _STD _Run_available_chunked_work(_Operation); + _STD _Seek_wrapped(_Dest, _Result); return _Dest; _CATCH(const _Parallelism_resources_exhausted&) // fall through to serial case below _CATCH_END } - _Seek_wrapped(_Dest, _Adjacent_difference_seq(_UFirst, _ULast, _UDest, _Pass_fn(_Diff_op))); + _STD _Seek_wrapped(_Dest, _STD _Adjacent_difference_seq(_UFirst, _ULast, _UDest, _STD _Pass_fn(_Diff_op))); return _Dest; } } // Don't call serial adjacent_difference because it's described as creating a temporary we can avoid - _Seek_wrapped(_Dest, _Adjacent_difference_seq(_UFirst, _ULast, - _Get_unwrapped_n(_Dest, _Idl_distance<_FwdIt1>(_UFirst, _ULast)), _Pass_fn(_Diff_op))); + _STD _Seek_wrapped(_Dest, _STD _Adjacent_difference_seq(_UFirst, _ULast, + _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_FwdIt1>(_UFirst, _ULast)), _STD _Pass_fn(_Diff_op))); return _Dest; } diff --git a/stl/inc/numeric b/stl/inc/numeric index ffd6ba0538..2581be10d4 100644 --- a/stl/inc/numeric +++ b/stl/inc/numeric @@ -27,9 +27,9 @@ _STD_BEGIN _EXPORT_STD template _NODISCARD _CONSTEXPR20 _Ty accumulate(const _InIt _First, const _InIt _Last, _Ty _Val, _Fn _Reduce_op) { // return noncommutative and nonassociative reduction of _Val and all in [_First, _Last), using _Reduce_op - _Adl_verify_range(_First, _Last); - auto _UFirst = _Get_unwrapped(_First); - const auto _ULast = _Get_unwrapped(_Last); + _STD _Adl_verify_range(_First, _Last); + auto _UFirst = _STD _Get_unwrapped(_First); + const auto _ULast = _STD _Get_unwrapped(_Last); for (; _UFirst != _ULast; ++_UFirst) { #if _HAS_CXX20 _Val = _Reduce_op(_STD move(_Val), *_UFirst); @@ -73,15 +73,15 @@ inline constexpr bool _Plus_on_arithmetic_ranges_reduction_v = false; _EXPORT_STD template _NODISCARD _CONSTEXPR20 _Ty reduce(const _InIt _First, const _InIt _Last, _Ty _Val, _BinOp _Reduce_op) { // return commutative and associative reduction of _Val and [_First, _Last), using _Reduce_op - _Adl_verify_range(_First, _Last); - auto _UFirst = _Get_unwrapped(_First); - const auto _ULast = _Get_unwrapped(_Last); + _STD _Adl_verify_range(_First, _Last); + auto _UFirst = _STD _Get_unwrapped(_First); + const auto _ULast = _STD _Get_unwrapped(_Last); if constexpr (_Plus_on_arithmetic_ranges_reduction_v<_Unwrapped_t, _Ty, _BinOp>) { #if _HAS_CXX20 if (!_STD is_constant_evaluated()) #endif // _HAS_CXX20 { - return _Reduce_plus_arithmetic_ranges(_UFirst, _ULast, _Val); + return _STD _Reduce_plus_arithmetic_ranges(_UFirst, _ULast, _Val); } } @@ -128,10 +128,10 @@ _NODISCARD _CONSTEXPR20 _Ty inner_product( _InIt1 _First1, _InIt1 _Last1, _InIt2 _First2, _Ty _Val, _BinOp1 _Reduce_op, _BinOp2 _Transform_op) { // return noncommutative and nonassociative transform-reduction of sequences, using // _Reduce_op and _Transform_op - _Adl_verify_range(_First1, _Last1); - auto _UFirst1 = _Get_unwrapped(_First1); - const auto _ULast1 = _Get_unwrapped(_Last1); - auto _UFirst2 = _Get_unwrapped_n(_First2, _Idl_distance<_InIt1>(_UFirst1, _ULast1)); + _STD _Adl_verify_range(_First1, _Last1); + auto _UFirst1 = _STD _Get_unwrapped(_First1); + const auto _ULast1 = _STD _Get_unwrapped(_Last1); + auto _UFirst2 = _STD _Get_unwrapped_n(_First2, _STD _Idl_distance<_InIt1>(_UFirst1, _ULast1)); for (; _UFirst1 != _ULast1; ++_UFirst1, (void) ++_UFirst2) { #if _HAS_CXX20 _Val = _Reduce_op(_STD move(_Val), _Transform_op(*_UFirst1, *_UFirst2)); // Requirement missing from N4950 @@ -178,10 +178,10 @@ _NODISCARD _CONSTEXPR20 _Ty transform_reduce( _InIt1 _First1, _InIt1 _Last1, _InIt2 _First2, _Ty _Val, _BinOp1 _Reduce_op, _BinOp2 _Transform_op) { // return commutative and associative transform-reduction of sequences, using // _Reduce_op and _Transform_op - _Adl_verify_range(_First1, _Last1); - auto _UFirst1 = _Get_unwrapped(_First1); - const auto _ULast1 = _Get_unwrapped(_Last1); - auto _UFirst2 = _Get_unwrapped_n(_First2, _Idl_distance<_InIt1>(_UFirst1, _ULast1)); + _STD _Adl_verify_range(_First1, _Last1); + auto _UFirst1 = _STD _Get_unwrapped(_First1); + const auto _ULast1 = _STD _Get_unwrapped(_Last1); + auto _UFirst2 = _STD _Get_unwrapped_n(_First2, _STD _Idl_distance<_InIt1>(_UFirst1, _ULast1)); if constexpr (_Default_ops_transform_reduce_v<_Unwrapped_t, _Unwrapped_t, _Ty, _BinOp1, _BinOp2>) { #if _HAS_CXX20 @@ -189,7 +189,7 @@ _NODISCARD _CONSTEXPR20 _Ty transform_reduce( if (!_STD is_constant_evaluated()) #endif // _HAS_CXX20 { - return _Transform_reduce_arithmetic_defaults(_UFirst1, _ULast1, _UFirst2, _STD move(_Val)); + return _STD _Transform_reduce_arithmetic_defaults(_UFirst1, _ULast1, _UFirst2, _STD move(_Val)); } } @@ -210,9 +210,9 @@ _NODISCARD _CONSTEXPR20 _Ty transform_reduce( const _InIt _First, const _InIt _Last, _Ty _Val, _BinOp _Reduce_op, _UnaryOp _Transform_op) { // return commutative and associative reduction of transformed sequence, using // _Reduce_op and _Transform_op - _Adl_verify_range(_First, _Last); - auto _UFirst = _Get_unwrapped(_First); - const auto _ULast = _Get_unwrapped(_Last); + _STD _Adl_verify_range(_First, _Last); + auto _UFirst = _STD _Get_unwrapped(_First); + const auto _ULast = _STD _Get_unwrapped(_Last); for (; _UFirst != _ULast; ++_UFirst) { _Val = _Reduce_op(_STD move(_Val), _Transform_op(*_UFirst)); // Requirement missing from N4950 } @@ -242,10 +242,10 @@ _NODISCARD _Ty transform_reduce(_ExPo&& _Exec, _FwdIt _First1, _FwdIt _Last1, _T _EXPORT_STD template _CONSTEXPR20 _OutIt partial_sum(const _InIt _First, const _InIt _Last, _OutIt _Dest, _BinOp _Reduce_op) { // compute partial noncommutative and nonassociative reductions into _Dest, using _Reduce_op - _Adl_verify_range(_First, _Last); - auto _UFirst = _Get_unwrapped(_First); - const auto _ULast = _Get_unwrapped(_Last); - auto _UDest = _Get_unwrapped_n(_Dest, _Idl_distance<_InIt>(_UFirst, _ULast)); + _STD _Adl_verify_range(_First, _Last); + auto _UFirst = _STD _Get_unwrapped(_First); + const auto _ULast = _STD _Get_unwrapped(_Last); + auto _UDest = _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_InIt>(_UFirst, _ULast)); if (_UFirst != _ULast) { _Iter_value_t<_InIt> _Val(*_UFirst); @@ -264,7 +264,7 @@ _CONSTEXPR20 _OutIt partial_sum(const _InIt _First, const _InIt _Last, _OutIt _D } } - _Seek_wrapped(_Dest, _UDest); + _STD _Seek_wrapped(_Dest, _UDest); return _Dest; } @@ -278,10 +278,10 @@ _CONSTEXPR20 _OutIt partial_sum(_InIt _First, _InIt _Last, _OutIt _Dest) { _EXPORT_STD template _CONSTEXPR20 _OutIt exclusive_scan(const _InIt _First, const _InIt _Last, _OutIt _Dest, _Ty _Val, _BinOp _Reduce_op) { // set each value in [_Dest, _Dest + (_Last - _First)) to the associative reduction of predecessors and _Val - _Adl_verify_range(_First, _Last); - auto _UFirst = _Get_unwrapped(_First); - const auto _ULast = _Get_unwrapped(_Last); - auto _UDest = _Get_unwrapped_n(_Dest, _Idl_distance<_InIt>(_UFirst, _ULast)); + _STD _Adl_verify_range(_First, _Last); + auto _UFirst = _STD _Get_unwrapped(_First); + const auto _ULast = _STD _Get_unwrapped(_Last); + auto _UDest = _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_InIt>(_UFirst, _ULast)); if (_UFirst != _ULast) { for (;;) { _Ty _Tmp(_Reduce_op(_Val, *_UFirst)); // temp to enable _First == _Dest, also requirement missing @@ -296,7 +296,7 @@ _CONSTEXPR20 _OutIt exclusive_scan(const _InIt _First, const _InIt _Last, _OutIt } } - _Seek_wrapped(_Dest, _UDest); + _STD _Seek_wrapped(_Dest, _UDest); return _Dest; } @@ -321,27 +321,27 @@ _FwdIt2 exclusive_scan(_ExPo&& _Exec, const _FwdIt1 _First, const _FwdIt1 _Last, _EXPORT_STD template _CONSTEXPR20 _OutIt inclusive_scan(const _InIt _First, const _InIt _Last, _OutIt _Dest, _BinOp _Reduce_op, _Ty _Val) { // compute partial noncommutative and associative reductions including _Val into _Dest, using _Reduce_op - _Adl_verify_range(_First, _Last); - auto _UFirst = _Get_unwrapped(_First); - const auto _ULast = _Get_unwrapped(_Last); - auto _UDest = _Get_unwrapped_n(_Dest, _Idl_distance<_InIt>(_UFirst, _ULast)); + _STD _Adl_verify_range(_First, _Last); + auto _UFirst = _STD _Get_unwrapped(_First); + const auto _ULast = _STD _Get_unwrapped(_Last); + auto _UDest = _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_InIt>(_UFirst, _ULast)); for (; _UFirst != _ULast; ++_UFirst) { _Val = _Reduce_op(_STD move(_Val), *_UFirst); // Requirement missing from N4950 *_UDest = _Val; ++_UDest; } - _Seek_wrapped(_Dest, _UDest); + _STD _Seek_wrapped(_Dest, _UDest); return _Dest; } _EXPORT_STD template _CONSTEXPR20 _OutIt inclusive_scan(const _InIt _First, const _InIt _Last, _OutIt _Dest, _BinOp _Reduce_op) { // compute partial noncommutative and associative reductions into _Dest, using _Reduce_op - _Adl_verify_range(_First, _Last); - auto _UFirst = _Get_unwrapped(_First); - const auto _ULast = _Get_unwrapped(_Last); - auto _UDest = _Get_unwrapped_n(_Dest, _Idl_distance<_InIt>(_UFirst, _ULast)); + _STD _Adl_verify_range(_First, _Last); + auto _UFirst = _STD _Get_unwrapped(_First); + const auto _ULast = _STD _Get_unwrapped(_Last); + auto _UDest = _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_InIt>(_UFirst, _ULast)); if (_UFirst != _ULast) { _Iter_value_t<_InIt> _Val(*_UFirst); // Requirement missing from N4950 for (;;) { @@ -356,7 +356,7 @@ _CONSTEXPR20 _OutIt inclusive_scan(const _InIt _First, const _InIt _Last, _OutIt } } - _Seek_wrapped(_Dest, _UDest); + _STD _Seek_wrapped(_Dest, _UDest); return _Dest; } @@ -386,10 +386,10 @@ _EXPORT_STD template (_UFirst, _ULast)); + _STD _Adl_verify_range(_First, _Last); + auto _UFirst = _STD _Get_unwrapped(_First); + const auto _ULast = _STD _Get_unwrapped(_Last); + auto _UDest = _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_InIt>(_UFirst, _ULast)); if (_UFirst != _ULast) { for (;;) { _Ty _Tmp(_Reduce_op(_Val, _Transform_op(*_UFirst))); // temp to enable _First == _Dest @@ -404,7 +404,7 @@ _CONSTEXPR20 _OutIt transform_exclusive_scan( } } - _Seek_wrapped(_Dest, _UDest); + _STD _Seek_wrapped(_Dest, _UDest); return _Dest; } @@ -417,17 +417,17 @@ _EXPORT_STD template (_UFirst, _ULast)); + _STD _Adl_verify_range(_First, _Last); + auto _UFirst = _STD _Get_unwrapped(_First); + const auto _ULast = _STD _Get_unwrapped(_Last); + auto _UDest = _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_InIt>(_UFirst, _ULast)); for (; _UFirst != _ULast; ++_UFirst) { _Val = _Reduce_op(_STD move(_Val), _Transform_op(*_UFirst)); // Requirement missing from N4950 *_UDest = _Val; ++_UDest; } - _Seek_wrapped(_Dest, _UDest); + _STD _Seek_wrapped(_Dest, _UDest); return _Dest; } @@ -435,10 +435,10 @@ _EXPORT_STD template _CONSTEXPR20 _OutIt transform_inclusive_scan( const _InIt _First, const _InIt _Last, _OutIt _Dest, _BinOp _Reduce_op, _UnaryOp _Transform_op) { // compute partial noncommutative and associative transformed reductions into _Dest - _Adl_verify_range(_First, _Last); - auto _UFirst = _Get_unwrapped(_First); - const auto _ULast = _Get_unwrapped(_Last); - auto _UDest = _Get_unwrapped_n(_Dest, _Idl_distance<_InIt>(_UFirst, _ULast)); + _STD _Adl_verify_range(_First, _Last); + auto _UFirst = _STD _Get_unwrapped(_First); + const auto _ULast = _STD _Get_unwrapped(_Last); + auto _UDest = _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_InIt>(_UFirst, _ULast)); if (_UFirst != _ULast) { auto _Val = _Transform_op(*_UFirst); // Requirement missing from N4950, also type to use unclear for (;;) { @@ -453,7 +453,7 @@ _CONSTEXPR20 _OutIt transform_inclusive_scan( } } - _Seek_wrapped(_Dest, _UDest); + _STD _Seek_wrapped(_Dest, _UDest); return _Dest; } @@ -471,10 +471,10 @@ _FwdIt2 transform_inclusive_scan(_ExPo&& _Exec, _FwdIt1 _First, _FwdIt1 _Last, _ _EXPORT_STD template _CONSTEXPR20 _OutIt adjacent_difference(const _InIt _First, const _InIt _Last, _OutIt _Dest, _BinOp _Func) { // compute adjacent differences into _Dest - _Adl_verify_range(_First, _Last); - auto _UFirst = _Get_unwrapped(_First); - const auto _ULast = _Get_unwrapped(_Last); - auto _UDest = _Get_unwrapped_n(_Dest, _Idl_distance<_InIt>(_UFirst, _ULast)); + _STD _Adl_verify_range(_First, _Last); + auto _UFirst = _STD _Get_unwrapped(_First); + const auto _ULast = _STD _Get_unwrapped(_Last); + auto _UDest = _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_InIt>(_UFirst, _ULast)); if (_UFirst != _ULast) { _Iter_value_t<_InIt> _Val(*_UFirst); *_UDest = _Val; @@ -491,7 +491,7 @@ _CONSTEXPR20 _OutIt adjacent_difference(const _InIt _First, const _InIt _Last, _ ++_UDest; } - _Seek_wrapped(_Dest, _UDest); + _STD _Seek_wrapped(_Dest, _UDest); return _Dest; } @@ -517,9 +517,9 @@ _FwdIt2 adjacent_difference(_ExPo&& _Exec, const _FwdIt1 _First, const _FwdIt1 _ _EXPORT_STD template _CONSTEXPR20 void iota(_FwdIt _First, _FwdIt _Last, _Ty _Val) { // compute increasing sequence into [_First, _Last) - _Adl_verify_range(_First, _Last); - auto _UFirst = _Get_unwrapped(_First); - const auto _ULast = _Get_unwrapped(_Last); + _STD _Adl_verify_range(_First, _Last); + auto _UFirst = _STD _Get_unwrapped(_First); + const auto _ULast = _STD _Get_unwrapped(_Last); for (; _UFirst != _ULast; ++_UFirst, (void) ++_Val) { *_UFirst = _Val; } @@ -536,9 +536,9 @@ namespace ranges { requires indirectly_writable<_It, const _Ty&> _STATIC_CALL_OPERATOR constexpr iota_result<_It, _Ty> operator()( _It _First, _Se _Last, _Ty _Val) _CONST_CALL_OPERATOR { - _Adl_verify_range(_First, _Last); - _Seek_wrapped( - _First, _Iota_impl(_Unwrap_iter<_Se>(_STD move(_First)), _Unwrap_sent<_It>(_STD move(_Last)), _Val)); + _STD _Adl_verify_range(_First, _Last); + _STD _Seek_wrapped( + _First, _Iota_impl(_RANGES _Unwrap_iter<_Se>(_STD move(_First)), _RANGES _Unwrap_sent<_It>(_STD move(_Last)), _Val)); return {_STD move(_First), _STD move(_Val)}; } @@ -546,7 +546,7 @@ namespace ranges { _STATIC_CALL_OPERATOR constexpr iota_result, _Ty> operator()( _Rng&& _Range, _Ty _Val) _CONST_CALL_OPERATOR { auto _First = _RANGES begin(_Range); - _Seek_wrapped(_First, _Iota_impl(_Unwrap_range_iter<_Rng>(_STD move(_First)), _Uend(_Range), _Val)); + _STD _Seek_wrapped(_First, _Iota_impl(_RANGES _Unwrap_range_iter<_Rng>(_STD move(_First)), _Uend(_Range), _Val)); return {_STD move(_First), _STD move(_Val)}; } diff --git a/stl/inc/vector b/stl/inc/vector index e5ae481fd1..0a5187d055 100644 --- a/stl/inc/vector +++ b/stl/inc/vector @@ -1990,7 +1990,7 @@ private: _STL_INTERNAL_CHECK(!_Myfirst && !_Mylast && !_Myend); // check that *this is tidy _STL_INTERNAL_CHECK(0 < _Newcapacity && _Newcapacity <= max_size()); - const pointer _Newvec = _Allocate_at_least_helper(_Getal(), _Newcapacity); + const pointer _Newvec = _STD _Allocate_at_least_helper(_Getal(), _Newcapacity); _Myfirst = _Newvec; _Mylast = _Newvec; _Myend = _Newvec + _Newcapacity; diff --git a/stl/inc/xmemory b/stl/inc/xmemory index 10930b229d..b4bffb8f45 100644 --- a/stl/inc/xmemory +++ b/stl/inc/xmemory @@ -1811,12 +1811,12 @@ public: _Uninitialized_backout_al& operator=(const _Uninitialized_backout_al&) = delete; _CONSTEXPR20 ~_Uninitialized_backout_al() { - _Destroy_range(_First, _Last, _Al); + _STD _Destroy_range(_First, _Last, _Al); } template _CONSTEXPR20 void _Emplace_back(_Types&&... _Vals) { // construct a new element at *_Last and increment - allocator_traits<_Alloc>::construct(_Al, _Unfancy(_Last), _STD forward<_Types>(_Vals)...); + allocator_traits<_Alloc>::construct(_Al, _STD _Unfancy(_Last), _STD forward<_Types>(_Vals)...); ++_Last; } From 20762d96a04ae0429c6caef99c659ff010286ebf Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Wed, 14 Feb 2024 20:29:49 +0800 Subject: [PATCH 2/7] Use `data()` instead of `begin()` to avoid undesired ADL --- stl/inc/execution | 8 ++++---- stl/inc/vector | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/stl/inc/execution b/stl/inc/execution index aedd8c2c51..3ddf4c3d70 100644 --- a/stl/inc/execution +++ b/stl/inc/execution @@ -4373,7 +4373,7 @@ struct _Static_partitioned_exclusive_scan2 { const auto _In_range = _Basis1._Get_chunk(_Key); const auto _Dest = _Basis2._Get_first(_Chunk_number, _Team._Get_chunk_offset(_Chunk_number)); // Run local exclusive_scan on this chunk - const auto _Chunk = _Lookback.begin() + static_cast(_Chunk_number); + const auto _Chunk = _Lookback.data() + static_cast(_Chunk_number); if (_Chunk_number == 0) { // chunk 0 is special as it has no predecessor; its local and total sums are the same _STD _Exclusive_scan_per_chunk_complete( _In_range._First, _In_range._Last, _Dest, _Reduce_op, _Chunk->_Sum._Ref(), _Initial); @@ -4502,7 +4502,7 @@ struct _Static_partitioned_inclusive_scan2 { const auto _In_range = _Basis1._Get_chunk(_Key); const auto _Dest = _Basis2._Get_first(_Chunk_number, _Team._Get_chunk_offset(_Chunk_number)); // Run local inclusive_scan on this chunk - const auto _Chunk = _Lookback.begin() + static_cast(_Chunk_number); + const auto _Chunk = _Lookback.data() + static_cast(_Chunk_number); if (_Chunk_number == 0) { // chunk 0 is special as it has no predecessor; its local and total sums are the same _STD _Inclusive_scan_per_chunk( _In_range._First, _In_range._Last, _Dest, _Reduce_op, _Chunk->_Sum._Ref(), _STD move(_Initial)); @@ -4690,7 +4690,7 @@ struct _Static_partitioned_transform_exclusive_scan2 { const auto _In_range = _Basis1._Get_chunk(_Key); const auto _Dest = _Basis2._Get_first(_Chunk_number, _Team._Get_chunk_offset(_Chunk_number)); // Run local transform_exclusive_scan on this chunk - const auto _Chunk = _Lookback.begin() + static_cast(_Chunk_number); + const auto _Chunk = _Lookback.data() + static_cast(_Chunk_number); if (_Chunk_number == 0) { // chunk 0 is special as it has no predecessor; its local and total sums are the same _STD _Transform_exclusive_scan_per_chunk_complete( _In_range._First, _In_range._Last, _Dest, _Reduce_op, _Transform_op, _Chunk->_Sum._Ref(), _Initial); @@ -4821,7 +4821,7 @@ struct _Static_partitioned_transform_inclusive_scan2 { const auto _In_range = _Basis1._Get_chunk(_Key); const auto _Dest = _Basis2._Get_first(_Chunk_number, _Team._Get_chunk_offset(_Chunk_number)); // Run local transform_inclusive_scan on this chunk - const auto _Chunk = _Lookback.begin() + static_cast(_Chunk_number); + const auto _Chunk = _Lookback.data() + static_cast(_Chunk_number); if (_Chunk_number == 0) { // chunk 0 is special as it has no predecessor; its local and total sums are the same _STD _Transform_inclusive_scan_per_chunk(_In_range._First, _In_range._Last, _Dest, _Reduce_op, _Transform_op, _Chunk->_Sum._Ref(), _STD move(_Initial)); diff --git a/stl/inc/vector b/stl/inc/vector index 0a5187d055..1548197f53 100644 --- a/stl/inc/vector +++ b/stl/inc/vector @@ -1788,11 +1788,11 @@ public: } _NODISCARD _CONSTEXPR20 _Ty* data() noexcept { - return _Unfancy_maybe_null(_Mypair._Myval2._Myfirst); + return _STD _Unfancy_maybe_null(_Mypair._Myval2._Myfirst); } _NODISCARD _CONSTEXPR20 const _Ty* data() const noexcept { - return _Unfancy_maybe_null(_Mypair._Myval2._Myfirst); + return _STD _Unfancy_maybe_null(_Mypair._Myval2._Myfirst); } _NODISCARD _CONSTEXPR20 iterator begin() noexcept { From a9210ba5674087c0fe9070048282004ea4809833 Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Wed, 14 Feb 2024 20:35:19 +0800 Subject: [PATCH 3/7] Handle `_STD_VECTORIZE_WITH_FLOAT_CONTROL == 0` cases for Clang --- stl/inc/numeric | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/stl/inc/numeric b/stl/inc/numeric index 2581be10d4..9e1a105374 100644 --- a/stl/inc/numeric +++ b/stl/inc/numeric @@ -76,6 +76,7 @@ _NODISCARD _CONSTEXPR20 _Ty reduce(const _InIt _First, const _InIt _Last, _Ty _V _STD _Adl_verify_range(_First, _Last); auto _UFirst = _STD _Get_unwrapped(_First); const auto _ULast = _STD _Get_unwrapped(_Last); +#if _STD_VECTORIZE_WITH_FLOAT_CONTROL if constexpr (_Plus_on_arithmetic_ranges_reduction_v<_Unwrapped_t, _Ty, _BinOp>) { #if _HAS_CXX20 if (!_STD is_constant_evaluated()) @@ -84,6 +85,7 @@ _NODISCARD _CONSTEXPR20 _Ty reduce(const _InIt _First, const _InIt _Last, _Ty _V return _STD _Reduce_plus_arithmetic_ranges(_UFirst, _ULast, _Val); } } +#endif // _STD_VECTORIZE_WITH_FLOAT_CONTROL for (; _UFirst != _ULast; ++_UFirst) { _Val = _Reduce_op(_STD move(_Val), *_UFirst); // Requirement missing from N4950 @@ -182,6 +184,7 @@ _NODISCARD _CONSTEXPR20 _Ty transform_reduce( auto _UFirst1 = _STD _Get_unwrapped(_First1); const auto _ULast1 = _STD _Get_unwrapped(_Last1); auto _UFirst2 = _STD _Get_unwrapped_n(_First2, _STD _Idl_distance<_InIt1>(_UFirst1, _ULast1)); +#if _STD_VECTORIZE_WITH_FLOAT_CONTROL if constexpr (_Default_ops_transform_reduce_v<_Unwrapped_t, _Unwrapped_t, _Ty, _BinOp1, _BinOp2>) { #if _HAS_CXX20 @@ -192,6 +195,7 @@ _NODISCARD _CONSTEXPR20 _Ty transform_reduce( return _STD _Transform_reduce_arithmetic_defaults(_UFirst1, _ULast1, _UFirst2, _STD move(_Val)); } } +#endif // _STD_VECTORIZE_WITH_FLOAT_CONTROL for (; _UFirst1 != _ULast1; ++_UFirst1, (void) ++_UFirst2) { _Val = _Reduce_op(_STD move(_Val), _Transform_op(*_UFirst1, *_UFirst2)); // Requirement missing from N4950 From 4aaf30de85f4451d95c3b2a7a31d838b66ef6a9c Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Wed, 14 Feb 2024 20:44:15 +0800 Subject: [PATCH 4/7] Consistently `_STD`-qualify `_Get_lookback_sum` There're 2 occurrences where unqualified calls are always safe. --- stl/inc/execution | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/stl/inc/execution b/stl/inc/execution index 3ddf4c3d70..17511e178f 100644 --- a/stl/inc/execution +++ b/stl/inc/execution @@ -3693,7 +3693,7 @@ void _Surrender_elements_to_next_chunk(const size_t _Chunk_number, // that will be placed in _Lookback by adding two of the previous sums together is the total number of elements // in the result. Assuming that _Dest has enough space for the result, the value produced by adding two previous // sums should fit inside _Diff. - _Prev_chunk_sum = _Get_lookback_sum(_Prev_chunk_lookback_data, _Casty_plus<_Diff>{}); + _Prev_chunk_sum = _STD _Get_lookback_sum(_Prev_chunk_lookback_data, _Casty_plus<_Diff>{}); } _Chunk_lookback_data->_Sum._Ref() = _Prev_chunk_sum; @@ -3813,7 +3813,7 @@ struct _Static_partitioned_set_subtraction { // Predecessor overall sum is done, use directly. _Prev_chunk_sum = _Prev_chunk_lookback_data->_Sum._Ref(); } else { - _Prev_chunk_sum = _Get_lookback_sum(_Prev_chunk_lookback_data, _Casty_plus<_Diff>{}); + _Prev_chunk_sum = _STD _Get_lookback_sum(_Prev_chunk_lookback_data, _Casty_plus<_Diff>{}); } _Chunk_lookback_data->_Sum._Ref() = static_cast<_Diff>(_Num_results + _Prev_chunk_sum); From 866936e1446b21b38036b8b3882c4a4cf1170309 Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Wed, 14 Feb 2024 20:47:26 +0800 Subject: [PATCH 5/7] Clang-format --- stl/inc/execution | 88 ++++++++++++++++++++++++++--------------------- stl/inc/numeric | 7 ++-- 2 files changed, 52 insertions(+), 43 deletions(-) diff --git a/stl/inc/execution b/stl/inc/execution index 17511e178f..1a28c1aa40 100644 --- a/stl/inc/execution +++ b/stl/inc/execution @@ -4095,12 +4095,13 @@ _NODISCARD _Ty reduce(_ExPo&&, const _FwdIt _First, const _FwdIt _Last, _Ty _Val _Work._Submit_for_chunks(_Hw_threads, _Chunks); while (const auto _Stolen_key = _Operation._Team._Get_next_key()) { auto _Chunk = _Operation._Basis._Get_chunk(_Stolen_key); - _Val = _STD reduce(_Chunk._First, _Chunk._Last, _STD move(_Val), _STD _Pass_fn(_Reduce_op)); + _Val = _STD reduce(_Chunk._First, _Chunk._Last, _STD move(_Val), _STD _Pass_fn(_Reduce_op)); } } // join with _Work_ptr threads auto& _Results = _Operation._Results; - return _STD _Reduce_move_unchecked(_Results.begin(), _Results.end(), _STD move(_Val), _STD _Pass_fn(_Reduce_op)); + return _STD _Reduce_move_unchecked( + _Results.begin(), _Results.end(), _STD move(_Val), _STD _Pass_fn(_Reduce_op)); _CATCH(const _Parallelism_resources_exhausted&) // fall through to serial case below _CATCH_END @@ -4204,7 +4205,8 @@ _NODISCARD _Ty transform_reduce(_ExPo&&, _FwdIt1 _First1, _FwdIt1 _Last1, _FwdIt } // join with _Work_ptr threads auto& _Results = _Operation._Results; // note: already transformed - return _STD _Reduce_move_unchecked(_Results.begin(), _Results.end(), _STD move(_Val), _STD _Pass_fn(_Reduce_op)); + return _STD _Reduce_move_unchecked( + _Results.begin(), _Results.end(), _STD move(_Val), _STD _Pass_fn(_Reduce_op)); _CATCH(const _Parallelism_resources_exhausted&) // fall through to serial case below _CATCH_END @@ -4216,8 +4218,8 @@ _NODISCARD _Ty transform_reduce(_ExPo&&, _FwdIt1 _First1, _FwdIt1 _Last1, _FwdIt } return _STD transform_reduce(_UFirst1, _ULast1, - _STD _Get_unwrapped_n(_First2, _STD _Idl_distance<_FwdIt1>(_UFirst1, _ULast1)), _STD move(_Val), _STD _Pass_fn(_Reduce_op), - _STD _Pass_fn(_Transform_op)); + _STD _Get_unwrapped_n(_First2, _STD _Idl_distance<_FwdIt1>(_UFirst1, _ULast1)), _STD move(_Val), + _STD _Pass_fn(_Reduce_op), _STD _Pass_fn(_Transform_op)); } template @@ -4290,13 +4292,14 @@ _NODISCARD _Ty transform_reduce(_ExPo&&, const _FwdIt _First, const _FwdIt _Last while (auto _Stolen_key = _Operation._Team._Get_next_key()) { // keep processing remaining chunks to comply with N4950 [intro.progress]/14 auto _Chunk = _Operation._Basis._Get_chunk(_Stolen_key); - _Val = _STD transform_reduce(_Chunk._First, _Chunk._Last, _STD move(_Val), _STD _Pass_fn(_Reduce_op), - _STD _Pass_fn(_Transform_op)); + _Val = _STD transform_reduce(_Chunk._First, _Chunk._Last, _STD move(_Val), + _STD _Pass_fn(_Reduce_op), _STD _Pass_fn(_Transform_op)); } } // join with _Work_ptr threads auto& _Results = _Operation._Results; // note: already transformed - return _STD _Reduce_move_unchecked(_Results.begin(), _Results.end(), _STD move(_Val), _STD _Pass_fn(_Reduce_op)); + return _STD _Reduce_move_unchecked( + _Results.begin(), _Results.end(), _STD move(_Val), _STD _Pass_fn(_Reduce_op)); _CATCH(const _Parallelism_resources_exhausted&) // fall through to serial case below _CATCH_END @@ -4304,7 +4307,8 @@ _NODISCARD _Ty transform_reduce(_ExPo&&, const _FwdIt _First, const _FwdIt _Last } } - return _STD transform_reduce(_UFirst, _ULast, _STD move(_Val), _STD _Pass_fn(_Reduce_op), _STD _Pass_fn(_Transform_op)); + return _STD transform_reduce( + _UFirst, _ULast, _STD move(_Val), _STD _Pass_fn(_Reduce_op), _STD _Pass_fn(_Transform_op)); } struct _No_init_tag { @@ -4442,14 +4446,15 @@ _FwdIt2 exclusive_scan(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _Last, _FwdI _CATCH_END } - _STD _Seek_wrapped(_Dest, _STD exclusive_scan(_UFirst, _ULast, _UDest, _STD move(_Val), _STD _Pass_fn(_Reduce_op))); + _STD _Seek_wrapped( + _Dest, _STD exclusive_scan(_UFirst, _ULast, _UDest, _STD move(_Val), _STD _Pass_fn(_Reduce_op))); return _Dest; } } - _STD _Seek_wrapped( - _Dest, _STD exclusive_scan(_UFirst, _ULast, _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_FwdIt1>(_UFirst, _ULast)), - _STD move(_Val), _STD _Pass_fn(_Reduce_op))); + _STD _Seek_wrapped(_Dest, + _STD exclusive_scan(_UFirst, _ULast, _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_FwdIt1>(_UFirst, _ULast)), + _STD move(_Val), _STD _Pass_fn(_Reduce_op))); return _Dest; } @@ -4574,14 +4579,15 @@ _FwdIt2 inclusive_scan(_ExPo&&, _FwdIt1 _First, _FwdIt1 _Last, _FwdIt2 _Dest, _B _CATCH_END } - _STD _Seek_wrapped(_Dest, _STD inclusive_scan(_UFirst, _ULast, _UDest, _STD _Pass_fn(_Reduce_op), _STD move(_Val))); + _STD _Seek_wrapped( + _Dest, _STD inclusive_scan(_UFirst, _ULast, _UDest, _STD _Pass_fn(_Reduce_op), _STD move(_Val))); return _Dest; } } - _STD _Seek_wrapped( - _Dest, _STD inclusive_scan(_UFirst, _ULast, _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_FwdIt1>(_UFirst, _ULast)), - _STD _Pass_fn(_Reduce_op), _STD move(_Val))); + _STD _Seek_wrapped(_Dest, + _STD inclusive_scan(_UFirst, _ULast, _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_FwdIt1>(_UFirst, _ULast)), + _STD _Pass_fn(_Reduce_op), _STD move(_Val))); return _Dest; } @@ -4621,8 +4627,9 @@ _FwdIt2 inclusive_scan(_ExPo&&, _FwdIt1 _First, _FwdIt1 _Last, _FwdIt2 _Dest, _B } } - _STD _Seek_wrapped(_Dest, _STD inclusive_scan(_UFirst, _ULast, - _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_FwdIt1>(_UFirst, _ULast)), _STD _Pass_fn(_Reduce_op))); + _STD _Seek_wrapped(_Dest, + _STD inclusive_scan(_UFirst, _ULast, _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_FwdIt1>(_UFirst, _ULast)), + _STD _Pass_fn(_Reduce_op))); return _Dest; } @@ -4746,8 +4753,8 @@ _FwdIt2 transform_exclusive_scan(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _L const auto _UDest = _STD _Get_unwrapped_n(_Dest, _Count); if (_Count >= 2) { // ... with at least 2 elements _TRY_BEGIN - _Static_partitioned_transform_exclusive_scan2 _Operation{ - _Hw_threads, _Count, _UFirst, _Val, _STD _Pass_fn(_Reduce_op), _STD _Pass_fn(_Transform_op), _UDest}; + _Static_partitioned_transform_exclusive_scan2 _Operation{_Hw_threads, _Count, _UFirst, _Val, + _STD _Pass_fn(_Reduce_op), _STD _Pass_fn(_Transform_op), _UDest}; _STD _Seek_wrapped(_Dest, _Operation._Basis2._Populate(_Operation._Team, _UDest)); // Note that _Val is used as temporary storage by whichever thread runs the first chunk. // If any thread starts any chunk, initialization is complete, so we can't enter the @@ -4760,14 +4767,14 @@ _FwdIt2 transform_exclusive_scan(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _L } _STD _Seek_wrapped(_Dest, _STD transform_exclusive_scan(_UFirst, _ULast, _UDest, _STD move(_Val), - _STD _Pass_fn(_Reduce_op), _STD _Pass_fn(_Transform_op))); + _STD _Pass_fn(_Reduce_op), _STD _Pass_fn(_Transform_op))); return _Dest; } } - _STD _Seek_wrapped(_Dest, - _STD transform_exclusive_scan(_UFirst, _ULast, _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_FwdIt1>(_UFirst, _ULast)), - _STD move(_Val), _STD _Pass_fn(_Reduce_op), _STD _Pass_fn(_Transform_op))); + _STD _Seek_wrapped(_Dest, _STD transform_exclusive_scan(_UFirst, _ULast, + _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_FwdIt1>(_UFirst, _ULast)), + _STD move(_Val), _STD _Pass_fn(_Reduce_op), _STD _Pass_fn(_Transform_op))); return _Dest; } @@ -4823,8 +4830,8 @@ struct _Static_partitioned_transform_inclusive_scan2 { // Run local transform_inclusive_scan on this chunk const auto _Chunk = _Lookback.data() + static_cast(_Chunk_number); if (_Chunk_number == 0) { // chunk 0 is special as it has no predecessor; its local and total sums are the same - _STD _Transform_inclusive_scan_per_chunk(_In_range._First, _In_range._Last, _Dest, _Reduce_op, _Transform_op, - _Chunk->_Sum._Ref(), _STD move(_Initial)); + _STD _Transform_inclusive_scan_per_chunk(_In_range._First, _In_range._Last, _Dest, _Reduce_op, + _Transform_op, _Chunk->_Sum._Ref(), _STD move(_Initial)); _Chunk->_Store_available_state(_Sum_available); return _Cancellation_status::_Running; } @@ -4832,8 +4839,8 @@ struct _Static_partitioned_transform_inclusive_scan2 { const auto _Prev_chunk = _STD _Prev_iter(_Chunk); if (_Prev_chunk->_State.load() & _Sum_available) { // if predecessor sum already complete, we can incorporate its value directly for 1 pass - _STD _Transform_inclusive_scan_per_chunk(_In_range._First, _In_range._Last, _Dest, _Reduce_op, _Transform_op, - _Chunk->_Sum._Ref(), _Prev_chunk->_Sum._Ref()); + _STD _Transform_inclusive_scan_per_chunk(_In_range._First, _In_range._Last, _Dest, _Reduce_op, + _Transform_op, _Chunk->_Sum._Ref(), _Prev_chunk->_Sum._Ref()); _Chunk->_Store_available_state(_Sum_available); return _Cancellation_status::_Running; } @@ -4895,14 +4902,14 @@ _FwdIt2 transform_inclusive_scan(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _L } _STD _Seek_wrapped(_Dest, _STD transform_inclusive_scan(_UFirst, _ULast, _UDest, _STD _Pass_fn(_Reduce_op), - _STD _Pass_fn(_Transform_op), _STD move(_Val))); + _STD _Pass_fn(_Transform_op), _STD move(_Val))); return _Dest; } } - _STD _Seek_wrapped(_Dest, - _STD transform_inclusive_scan(_UFirst, _ULast, _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_FwdIt1>(_UFirst, _ULast)), - _STD _Pass_fn(_Reduce_op), _STD _Pass_fn(_Transform_op), _STD move(_Val))); + _STD _Seek_wrapped(_Dest, _STD transform_inclusive_scan(_UFirst, _ULast, + _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_FwdIt1>(_UFirst, _ULast)), + _STD _Pass_fn(_Reduce_op), _STD _Pass_fn(_Transform_op), _STD move(_Val))); return _Dest; } @@ -4940,15 +4947,15 @@ _FwdIt2 transform_inclusive_scan(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _L _CATCH_END } - _STD _Seek_wrapped(_Dest, - _STD transform_inclusive_scan(_UFirst, _ULast, _UDest, _STD _Pass_fn(_Reduce_op), _STD _Pass_fn(_Transform_op))); + _STD _Seek_wrapped(_Dest, _STD transform_inclusive_scan(_UFirst, _ULast, _UDest, _STD _Pass_fn(_Reduce_op), + _STD _Pass_fn(_Transform_op))); return _Dest; } } - _STD _Seek_wrapped(_Dest, - _STD transform_inclusive_scan(_UFirst, _ULast, _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_FwdIt1>(_UFirst, _ULast)), - _STD _Pass_fn(_Reduce_op), _STD _Pass_fn(_Transform_op))); + _STD _Seek_wrapped(_Dest, _STD transform_inclusive_scan(_UFirst, _ULast, + _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_FwdIt1>(_UFirst, _ULast)), + _STD _Pass_fn(_Reduce_op), _STD _Pass_fn(_Transform_op))); return _Dest; } @@ -5048,8 +5055,9 @@ _FwdIt2 adjacent_difference(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _Last, } // Don't call serial adjacent_difference because it's described as creating a temporary we can avoid - _STD _Seek_wrapped(_Dest, _STD _Adjacent_difference_seq(_UFirst, _ULast, - _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_FwdIt1>(_UFirst, _ULast)), _STD _Pass_fn(_Diff_op))); + _STD _Seek_wrapped(_Dest, + _STD _Adjacent_difference_seq(_UFirst, _ULast, + _STD _Get_unwrapped_n(_Dest, _STD _Idl_distance<_FwdIt1>(_UFirst, _ULast)), _STD _Pass_fn(_Diff_op))); return _Dest; } diff --git a/stl/inc/numeric b/stl/inc/numeric index 9e1a105374..e11b83a51d 100644 --- a/stl/inc/numeric +++ b/stl/inc/numeric @@ -541,8 +541,8 @@ namespace ranges { _STATIC_CALL_OPERATOR constexpr iota_result<_It, _Ty> operator()( _It _First, _Se _Last, _Ty _Val) _CONST_CALL_OPERATOR { _STD _Adl_verify_range(_First, _Last); - _STD _Seek_wrapped( - _First, _Iota_impl(_RANGES _Unwrap_iter<_Se>(_STD move(_First)), _RANGES _Unwrap_sent<_It>(_STD move(_Last)), _Val)); + _STD _Seek_wrapped(_First, _Iota_impl(_RANGES _Unwrap_iter<_Se>(_STD move(_First)), + _RANGES _Unwrap_sent<_It>(_STD move(_Last)), _Val)); return {_STD move(_First), _STD move(_Val)}; } @@ -550,7 +550,8 @@ namespace ranges { _STATIC_CALL_OPERATOR constexpr iota_result, _Ty> operator()( _Rng&& _Range, _Ty _Val) _CONST_CALL_OPERATOR { auto _First = _RANGES begin(_Range); - _STD _Seek_wrapped(_First, _Iota_impl(_RANGES _Unwrap_range_iter<_Rng>(_STD move(_First)), _Uend(_Range), _Val)); + _STD _Seek_wrapped( + _First, _Iota_impl(_RANGES _Unwrap_range_iter<_Rng>(_STD move(_First)), _Uend(_Range), _Val)); return {_STD move(_First), _STD move(_Val)}; } From ca86f9148fcaf5ada16de9d600ca3f1bf42721d4 Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Wed, 14 Feb 2024 20:47:41 +0800 Subject: [PATCH 6/7] Test coverage --- .../test.compile.pass.cpp | 88 +++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/tests/std/tests/GH_001596_adl_proof_algorithms/test.compile.pass.cpp b/tests/std/tests/GH_001596_adl_proof_algorithms/test.compile.pass.cpp index 20454468b7..10bbe8a507 100644 --- a/tests/std/tests/GH_001596_adl_proof_algorithms/test.compile.pass.cpp +++ b/tests/std/tests/GH_001596_adl_proof_algorithms/test.compile.pass.cpp @@ -9,6 +9,7 @@ #endif // _HAS_CXX17 #include #include +#include #include #include @@ -429,6 +430,55 @@ void test_algorithms() { // (void) std::prev_permutation(varr, varr); // requires Cpp17ValueSwappable (void) std::prev_permutation(iarr, iarr, validating_less{}); + (void) std::accumulate(varr, varr, validator{}, simple_left_selector{}); + (void) std::accumulate(iarr, iarr, 0, validating_left_selector{}); + +#if _HAS_CXX17 + (void) std::reduce(varr, varr, validator{}, simple_left_selector{}); + (void) std::reduce(iarr, iarr, 0, validating_left_selector{}); +#endif // _HAS_CXX17 + + (void) std::inner_product(varr, varr, varr, validator{}, simple_left_selector{}, simple_left_selector{}); + (void) std::inner_product(iarr, iarr, iarr, 0, validating_left_selector{}, validating_left_selector{}); + +#if _HAS_CXX17 + (void) std::transform_reduce(varr, varr, varr, validator{}, simple_left_selector{}, simple_left_selector{}); + (void) std::transform_reduce(iarr, iarr, iarr, 0, validating_left_selector{}, validating_left_selector{}); + (void) std::transform_reduce(varr, varr, validator{}, simple_left_selector{}, simple_identity{}); + (void) std::transform_reduce(iarr, iarr, 0, validating_left_selector{}, simple_identity{}); +#endif // _HAS_CXX17 + + (void) std::partial_sum(varr, varr, varr2, simple_left_selector{}); + (void) std::partial_sum(iarr, iarr, iarr2, validating_left_selector{}); + +#if _HAS_CXX17 + (void) std::exclusive_scan(varr, varr, varr2, validator{}, simple_left_selector{}); + (void) std::exclusive_scan(iarr, iarr, iarr2, 0, validating_left_selector{}); + + (void) std::inclusive_scan(varr, varr, varr2, simple_left_selector{}); + (void) std::inclusive_scan(iarr, iarr, iarr2, validating_left_selector{}); + (void) std::inclusive_scan(varr, varr, varr2, simple_left_selector{}, validator{}); + (void) std::inclusive_scan(iarr, iarr, iarr2, validating_left_selector{}, 0); + + (void) std::transform_exclusive_scan(varr, varr, varr2, validator{}, simple_left_selector{}, simple_identity{}); + (void) std::transform_exclusive_scan(iarr, iarr, iarr2, 0, validating_left_selector{}, validating_identity{}); + + (void) std::transform_inclusive_scan(varr, varr, varr2, simple_left_selector{}, simple_identity{}); + (void) std::transform_inclusive_scan(iarr, iarr, iarr2, validating_left_selector{}, validating_identity{}); + (void) std::transform_inclusive_scan(varr, varr, varr2, simple_left_selector{}, simple_identity{}, validator{}); + (void) std::transform_inclusive_scan(iarr, iarr, iarr2, validating_left_selector{}, validating_identity{}, 0); +#endif // _HAS_CXX17 + + (void) std::adjacent_difference(varr, varr, varr2, simple_left_selector{}); + (void) std::adjacent_difference(iarr, iarr, iarr2, validating_left_selector{}); + + validator* pvarr[1]{}; + std::iota(pvarr, pvarr, +varr); + +#if _HAS_CXX20 + (void) std::midpoint(+varr, +varr); +#endif // _HAS_CXX20 + validating_nontrivial narr[1]{}; validating_nontrivial narr2[1]{}; @@ -700,6 +750,40 @@ void test_per_execution_policy() { (void) std::lexicographical_compare(ExecutionPolicy, varr, varr, varr, varr); (void) std::lexicographical_compare(ExecutionPolicy, iarr, iarr, iarr, iarr, validating_less{}); + (void) std::reduce(ExecutionPolicy, varr, varr, validator{}, simple_left_selector{}); + (void) std::reduce(ExecutionPolicy, iarr, iarr, 0, simple_left_selector{}); + + (void) std::transform_reduce( + ExecutionPolicy, varr, varr, varr, validator{}, simple_left_selector{}, simple_left_selector{}); + (void) std::transform_reduce( + ExecutionPolicy, iarr, iarr, iarr, 0, validating_left_selector{}, validating_left_selector{}); + (void) std::transform_reduce(ExecutionPolicy, varr, varr, validator{}, simple_left_selector{}, simple_identity{}); + (void) std::transform_reduce(ExecutionPolicy, iarr, iarr, 0, validating_left_selector{}, simple_identity{}); + + (void) std::exclusive_scan(ExecutionPolicy, varr, varr, varr2, validator{}, simple_left_selector{}); + (void) std::exclusive_scan(ExecutionPolicy, iarr, iarr, iarr2, 0, validating_left_selector{}); + + (void) std::inclusive_scan(ExecutionPolicy, varr, varr, varr2, simple_left_selector{}); + (void) std::inclusive_scan(ExecutionPolicy, iarr, iarr, iarr2, validating_left_selector{}); + (void) std::inclusive_scan(ExecutionPolicy, varr, varr, varr2, simple_left_selector{}, validator{}); + (void) std::inclusive_scan(ExecutionPolicy, iarr, iarr, iarr2, validating_left_selector{}, 0); + + (void) std::transform_exclusive_scan( + ExecutionPolicy, varr, varr, varr2, validator{}, simple_left_selector{}, simple_identity{}); + (void) std::transform_exclusive_scan( + ExecutionPolicy, iarr, iarr, iarr2, 0, validating_left_selector{}, validating_identity{}); + + (void) std::transform_inclusive_scan(ExecutionPolicy, varr, varr, varr2, simple_left_selector{}, simple_identity{}); + (void) std::transform_inclusive_scan( + ExecutionPolicy, iarr, iarr, iarr2, validating_left_selector{}, validating_identity{}); + (void) std::transform_inclusive_scan( + ExecutionPolicy, varr, varr, varr2, simple_left_selector{}, simple_identity{}, validator{}); + (void) std::transform_inclusive_scan( + ExecutionPolicy, iarr, iarr, iarr2, validating_left_selector{}, validating_identity{}, 0); + + (void) std::adjacent_difference(ExecutionPolicy, varr, varr, varr2, simple_left_selector{}); + (void) std::adjacent_difference(ExecutionPolicy, iarr, iarr, iarr2, validating_left_selector{}); + validating_nontrivial narr[1]{}; validating_nontrivial narr2[1]{}; @@ -847,6 +931,10 @@ void test_ranges_non_projected_algorithms() { (void) shift_right(varr, varr, 0); (void) shift_right(varr, 0); + + validator* pvarr[1]{}; + (void) iota(pvarr, pvarr, +varr); + (void) iota(pvarr, +varr); #endif // _HAS_CXX23 validating_nontrivial narr[1]{}; From 25fb864923b4d2086075915eb087fa6e94e6b228 Mon Sep 17 00:00:00 2001 From: "Stephan T. Lavavej" Date: Thu, 15 Feb 2024 14:29:22 -0800 Subject: [PATCH 7/7] Code review feedback. Always use `_STD_VECTORIZE_WITH_FLOAT_CONTROL` guards around `_Plus_on_arithmetic_ranges_reduction_v` and `_Default_ops_transform_reduce_v` machinery, drop their `false` fallbacks (which were weird and non-conventional). Use arrow comments for consistency. `_STD` qualify two more calls to `_Reduce_plus_arithmetic_ranges` in ``. --- stl/inc/execution | 14 ++++++++++---- stl/inc/numeric | 15 ++++----------- 2 files changed, 14 insertions(+), 15 deletions(-) diff --git a/stl/inc/execution b/stl/inc/execution index 1a28c1aa40..0d51ba3b54 100644 --- a/stl/inc/execution +++ b/stl/inc/execution @@ -4009,9 +4009,12 @@ _FwdIt3 set_difference(_ExPo&&, _FwdIt1 _First1, _FwdIt1 _Last1, _FwdIt2 _First2 template _Ty _Reduce_move_unchecked(_InIt _First, const _InIt _Last, _Ty _Val, _BinOp _Reduce_op) { // return reduction, choose optimization +#if _STD_VECTORIZE_WITH_FLOAT_CONTROL if constexpr (_Plus_on_arithmetic_ranges_reduction_v<_Unwrapped_t, _Ty, _BinOp>) { - return _Reduce_plus_arithmetic_ranges(_First, _Last, _Val); - } else { + return _STD _Reduce_plus_arithmetic_ranges(_First, _Last, _Val); + } else +#endif // ^^^ _STD_VECTORIZE_WITH_FLOAT_CONTROL ^^^ + { for (; _First != _Last; ++_First) { _Val = _Reduce_op(_STD move(_Val), _STD move(*_First)); // Requirement missing from N4950 } @@ -4024,9 +4027,12 @@ template _Ty _Reduce_at_least_two(const _FwdIt _First, const _FwdIt _Last, _BinOp _Reduce_op) { // return reduction with no initial value // pre: distance(_First, _Last) >= 2 +#if _STD_VECTORIZE_WITH_FLOAT_CONTROL if constexpr (_Plus_on_arithmetic_ranges_reduction_v<_FwdIt, _Ty, _BinOp>) { - return _Reduce_plus_arithmetic_ranges(_First, _Last, _Ty{0}); - } else { + return _STD _Reduce_plus_arithmetic_ranges(_First, _Last, _Ty{0}); + } else +#endif // ^^^ _STD_VECTORIZE_WITH_FLOAT_CONTROL ^^^ + { auto _Next = _First; _Ty _Val = _Reduce_op(*_First, *++_Next); while (++_Next != _Last) { diff --git a/stl/inc/numeric b/stl/inc/numeric index e11b83a51d..f10480a3d8 100644 --- a/stl/inc/numeric +++ b/stl/inc/numeric @@ -64,11 +64,7 @@ _Ty _Reduce_plus_arithmetic_ranges(_InIt _First, const _InIt _Last, _Ty _Val) { return _Val; } #pragma float_control(pop) - -#else // ^^^ _STD_VECTORIZE_WITH_FLOAT_CONTROL / !_STD_VECTORIZE_WITH_FLOAT_CONTROL vvv -template -inline constexpr bool _Plus_on_arithmetic_ranges_reduction_v = false; -#endif // ^^^ !_STD_VECTORIZE_WITH_FLOAT_CONTROL ^^^ +#endif // ^^^ _STD_VECTORIZE_WITH_FLOAT_CONTROL ^^^ _EXPORT_STD template _NODISCARD _CONSTEXPR20 _Ty reduce(const _InIt _First, const _InIt _Last, _Ty _Val, _BinOp _Reduce_op) { @@ -85,7 +81,7 @@ _NODISCARD _CONSTEXPR20 _Ty reduce(const _InIt _First, const _InIt _Last, _Ty _V return _STD _Reduce_plus_arithmetic_ranges(_UFirst, _ULast, _Val); } } -#endif // _STD_VECTORIZE_WITH_FLOAT_CONTROL +#endif // ^^^ _STD_VECTORIZE_WITH_FLOAT_CONTROL ^^^ for (; _UFirst != _ULast; ++_UFirst) { _Val = _Reduce_op(_STD move(_Val), *_UFirst); // Requirement missing from N4950 @@ -170,10 +166,7 @@ _Ty _Transform_reduce_arithmetic_defaults(_InIt1 _First1, const _InIt1 _Last1, _ return _Val; } #pragma float_control(pop) -#else // ^^^ _STD_VECTORIZE_WITH_FLOAT_CONTROL / !_STD_VECTORIZE_WITH_FLOAT_CONTROL vvv -template -inline constexpr bool _Default_ops_transform_reduce_v = false; -#endif // ^^^ !_STD_VECTORIZE_WITH_FLOAT_CONTROL ^^^ +#endif // ^^^ _STD_VECTORIZE_WITH_FLOAT_CONTROL ^^^ _EXPORT_STD template _NODISCARD _CONSTEXPR20 _Ty transform_reduce( @@ -195,7 +188,7 @@ _NODISCARD _CONSTEXPR20 _Ty transform_reduce( return _STD _Transform_reduce_arithmetic_defaults(_UFirst1, _ULast1, _UFirst2, _STD move(_Val)); } } -#endif // _STD_VECTORIZE_WITH_FLOAT_CONTROL +#endif // ^^^ _STD_VECTORIZE_WITH_FLOAT_CONTROL ^^^ for (; _UFirst1 != _ULast1; ++_UFirst1, (void) ++_UFirst2) { _Val = _Reduce_op(_STD move(_Val), _Transform_op(*_UFirst1, *_UFirst2)); // Requirement missing from N4950