Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
83 changes: 47 additions & 36 deletions stl/inc/execution
Original file line number Diff line number Diff line change
Expand Up @@ -75,42 +75,71 @@ constexpr size_t _Still_active = static_cast<size_t>(-1);

// EXECUTION POLICIES
namespace execution {
class sequenced_policy { // request for sequential execution with termination
class sequenced_policy {
// indicates support for only sequential execution, and requests termination on exceptions
public:
using _Standard_execution_policy = int;
static constexpr bool _Parallelize = false;
static constexpr bool _Ivdep = false;
};

inline constexpr sequenced_policy seq{/* unspecified */};

class parallel_policy { // request for parallel execution with termination
class parallel_policy {
// indicates support by element access functions for parallel execution with parallel forward progress
// guarantees, and requests termination on exceptions
public:
using _Standard_execution_policy = int;
static constexpr bool _Parallelize = true;
static constexpr bool _Ivdep = true;
};

inline constexpr parallel_policy par{/* unspecified */};

class parallel_unsequenced_policy {
// request for parallel execution without thread identity with termination
// indicates support by element access functions for parallel execution with weakly parallel forward progress
// guarantees, and requests termination on exceptions
//
// (at this time, equivalent to parallel_policy)
public:
using _Standard_execution_policy = int;
static constexpr bool _Parallelize = true;
static constexpr bool _Ivdep = true;
};

inline constexpr parallel_unsequenced_policy par_unseq{/* unspecified */};

#if _HAS_CXX20
class unsequenced_policy {
// indicates support by element access functions for weakly parallel forward progress guarantees, and for
// executing interleaved on the same thread, and requests termination on exceptions
//
// (at this time, equivalent to sequenced_policy except for the for_each family)
public:
using _Standard_execution_policy = int;
static constexpr bool _Parallelize = false;
static constexpr bool _Ivdep = true;
};

inline constexpr unsequenced_policy unseq{/* unspecified */};
#endif // _HAS_CXX20

} // namespace execution

// All of the above are execution policies:
template <>
struct is_execution_policy<execution::sequenced_policy> : true_type {}; // sequenced_policy is an execution policy
struct is_execution_policy<execution::sequenced_policy> : true_type {};

template <>
struct is_execution_policy<execution::parallel_policy> : true_type {}; // parallel_policy is an execution policy
struct is_execution_policy<execution::parallel_policy> : true_type {};

template <>
struct is_execution_policy<execution::parallel_unsequenced_policy> : true_type {
}; // parallel_unsequenced_policy is an execution policy
struct is_execution_policy<execution::parallel_unsequenced_policy> : true_type {};

#if _HAS_CXX20
template <>
struct is_execution_policy<execution::unsequenced_policy> : true_type {};
#endif // _HAS_CXX20

// STRUCT _Parallelism_resources_exhausted
struct _Parallelism_resources_exhausted : exception {
Expand Down Expand Up @@ -1216,6 +1245,8 @@ void for_each(_ExPo&&, _FwdIt _First, _FwdIt _Last, _Fn _Func) noexcept /* termi
}
}

_For_each_ivdep(_UFirst, _ULast, _Pass_fn(_Func));
} else if constexpr (remove_reference_t<_ExPo>::_Ivdep) {
_For_each_ivdep(_UFirst, _ULast, _Pass_fn(_Func));
} else {
for (; _UFirst != _ULast; ++_UFirst) {
Expand Down Expand Up @@ -1258,6 +1289,8 @@ _FwdIt for_each_n(_ExPo&&, _FwdIt _First, const _Diff _Count_raw, _Fn _Func) noe
_CATCH_END
}

_Seek_wrapped(_First, _For_each_n_ivdep(_UFirst, _Count, _Pass_fn(_Func)));
} else if constexpr (remove_reference_t<_ExPo>::_Ivdep) {
_Seek_wrapped(_First, _For_each_n_ivdep(_UFirst, _Count, _Pass_fn(_Func)));
} else {
for (; 0 < _Count; --_Count, (void) ++_UFirst) {
Expand Down Expand Up @@ -2281,17 +2314,6 @@ _NODISCARD _FwdIt search_n(_ExPo&&, const _FwdIt _First, _FwdIt _Last, const _Di
}

// PARALLEL FUNCTION TEMPLATE transform
template <class _FwdIt1, class _FwdIt2, class _Fn>
_FwdIt2 _Transform_ivdep(_FwdIt1 _First, const _FwdIt1 _Last, _FwdIt2 _Dest, _Fn _Func) {
// unary op transform with independent loop bodies
#pragma loop(ivdep)
for (; _First != _Last; ++_First, (void) ++_Dest) {
*_Dest = _Func(*_First);
}

return _Dest;
}

template <class _FwdIt1, class _FwdIt2, class _Fn>
struct _Static_partitioned_unary_transform2 {
using _Diff = _Common_diff_t<_FwdIt1, _FwdIt2>;
Expand All @@ -2311,7 +2333,7 @@ struct _Static_partitioned_unary_transform2 {
const auto _Key = _Team._Get_next_key();
if (_Key) {
const auto _Source = _Source_basis._Get_chunk(_Key);
_Transform_ivdep(_Source._First, _Source._Last, _Dest_basis._Get_chunk(_Key)._First, _Func);
_STD transform(_Source._First, _Source._Last, _Dest_basis._Get_chunk(_Key)._First, _Func);
return _Cancellation_status::_Running;
}

Expand Down Expand Up @@ -2349,12 +2371,12 @@ _FwdIt2 transform(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _Last, _FwdIt2 _D
_CATCH_END
}

_Seek_wrapped(_Dest, _Transform_ivdep(_UFirst, _ULast, _UDest, _Pass_fn(_Func)));
_Seek_wrapped(_Dest, _STD transform(_UFirst, _ULast, _UDest, _Pass_fn(_Func)));
return _Dest;
} else {
_Seek_wrapped(
_Dest, _Transform_ivdep(_UFirst, _ULast,
_Get_unwrapped_n(_Dest, _Idl_distance<_FwdIt1>(_UFirst, _ULast)), _Pass_fn(_Func)));
_Dest, _STD transform(_UFirst, _ULast, _Get_unwrapped_n(_Dest, _Idl_distance<_FwdIt1>(_UFirst, _ULast)),
_Pass_fn(_Func)));
return _Dest;
}
} else {
Expand All @@ -2364,17 +2386,6 @@ _FwdIt2 transform(_ExPo&&, const _FwdIt1 _First, const _FwdIt1 _Last, _FwdIt2 _D
}
}

template <class _FwdIt1, class _FwdIt2, class _FwdIt3, class _Fn>
_FwdIt3 _Transform_ivdep(_FwdIt1 _First1, const _FwdIt1 _Last1, _FwdIt2 _First2, _FwdIt3 _Dest, _Fn _Func) {
// binary op transform with independent loop bodies
#pragma loop(ivdep)
for (; _First1 != _Last1; ++_First1, (void) ++_First2, ++_Dest) {
*_Dest = _Func(*_First1, *_First2);
}

return _Dest;
}

template <class _FwdIt1, class _FwdIt2, class _FwdIt3, class _Fn>
struct _Static_partitioned_binary_transform2 {
using _Diff = _Common_diff_t<_FwdIt1, _FwdIt2, _FwdIt3>;
Expand All @@ -2396,7 +2407,7 @@ struct _Static_partitioned_binary_transform2 {
const auto _Key = _Team._Get_next_key();
if (_Key) {
const auto _Source1 = _Source1_basis._Get_chunk(_Key);
_Transform_ivdep(_Source1._First, _Source1._Last, _Source2_basis._Get_chunk(_Key)._First,
_STD transform(_Source1._First, _Source1._Last, _Source2_basis._Get_chunk(_Key)._First,
_Dest_basis._Get_chunk(_Key)._First, _Func);
return _Cancellation_status::_Running;
}
Expand Down Expand Up @@ -2442,11 +2453,11 @@ _FwdIt3 transform(_ExPo&&, const _FwdIt1 _First1, const _FwdIt1 _Last1, const _F
_CATCH_END
}

_Seek_wrapped(_Dest, _Transform_ivdep(_UFirst1, _ULast1, _UFirst2, _UDest, _Pass_fn(_Func)));
_Seek_wrapped(_Dest, _STD transform(_UFirst1, _ULast1, _UFirst2, _UDest, _Pass_fn(_Func)));
return _Dest;
} else {
const auto _Count = _Idl_distance<_FwdIt1>(_UFirst1, _ULast1);
_Seek_wrapped(_Dest, _Transform_ivdep(_UFirst1, _ULast1, _Get_unwrapped_n(_First2, _Count),
_Seek_wrapped(_Dest, _STD transform(_UFirst1, _ULast1, _Get_unwrapped_n(_First2, _Count),
_Get_unwrapped_n(_Dest, _Count), _Pass_fn(_Func)));
return _Dest;
}
Expand Down
18 changes: 14 additions & 4 deletions stl/inc/yvals_core.h
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@
// (partially implemented, missing noop coroutines)
// P0919R3 Heterogeneous Lookup For Unordered Containers
// P0966R1 string::reserve() Should Not Shrink
// P1001R2 execution::unseq
// P1006R1 constexpr For pointer_traits<T*>::pointer_to()
// P1023R0 constexpr For std::array Comparisons
// P1024R3 Enhancing span Usability
Expand Down Expand Up @@ -225,6 +226,10 @@
// C++ allows an implementation to implement parallel algorithms as calls to the serial algorithms.
// This implementation parallelizes several common algorithm calls, but not all.
//
// std::execution::unseq has no direct analogue for any optimizer we target as of 2020-07-29,
// though we will map it to #pragma loop(ivdep) for the for_each algorithms only as these are the only algorithms where
// the library does not need to introduce inter-loop-body dependencies to accomplish the algorithm's goals.
//
// The following algorithms are parallelized.
// * adjacent_difference
// * adjacent_find
Expand Down Expand Up @@ -1091,10 +1096,7 @@
#if _HAS_STD_BYTE
#define __cpp_lib_byte 201603L
#endif // _HAS_STD_BYTE
#define __cpp_lib_clamp 201603L
#ifndef _M_CEE
#define __cpp_lib_execution 201603L
#endif // _M_CEE
#define __cpp_lib_clamp 201603L
#define __cpp_lib_filesystem 201703L
#define __cpp_lib_gcd_lcm 201606L
#define __cpp_lib_hardware_interference_size 201703L
Expand Down Expand Up @@ -1184,6 +1186,14 @@
#define __cpp_lib_unwrap_ref 201811L
#endif // _HAS_CXX20

#ifndef _M_CEE
#if _HAS_CXX20
#define __cpp_lib_execution 201902L // P1001R2 execution::unseq
#elif _HAS_CXX17
#define __cpp_lib_execution 201603L // P0024R2 Parallel Algorithms
#endif // language mode
#endif // _M_CEE

#if _HAS_CXX20
#define __cpp_lib_array_constexpr 201811L // P1032R1 Miscellaneous constexpr
#elif _HAS_CXX17 // ^^^ _HAS_CXX20 / _HAS_CXX17 vvv
Expand Down
18 changes: 18 additions & 0 deletions tests/std/include/instantiate_algorithms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,9 @@ namespace std_testing {
test_exec_fwd1_fwd2(std::execution::seq, fwd1, fwd2);
test_exec_fwd1_fwd2(std::execution::par, fwd1, fwd2);
test_exec_fwd1_fwd2(std::execution::par_unseq, fwd1, fwd2);
#if _HAS_CXX20
test_exec_fwd1_fwd2(std::execution::unseq, fwd1, fwd2);
#endif // _HAS_CXX20
#endif // HAS_PARALLEL_ALGORITHMS

(void) std::find_end(fwd1, fwd1, fwd2, fwd2);
Expand Down Expand Up @@ -503,6 +506,9 @@ namespace std_testing {
test_exec_fwd1(std::execution::seq, fwd1);
test_exec_fwd1(std::execution::par, fwd1);
test_exec_fwd1(std::execution::par_unseq, fwd1);
#if _HAS_CXX20
test_exec_fwd1(std::execution::unseq, fwd1);
#endif // _HAS_CXX20
#endif // HAS_PARALLEL_ALGORITHMS

test_fwd1_fwd2(fwd1, FWDIT);
Expand Down Expand Up @@ -592,6 +598,9 @@ namespace std_testing {
test_exec_bid1_bid2_xxx_backward(std::execution::seq, bid1, bid2);
test_exec_bid1_bid2_xxx_backward(std::execution::par, bid1, bid2);
test_exec_bid1_bid2_xxx_backward(std::execution::par_unseq, bid1, bid2);
#if _HAS_CXX20
test_exec_bid1_bid2_xxx_backward(std::execution::unseq, bid1, bid2);
#endif // _HAS_CXX20
#endif // HAS_PARALLEL_ALGORITHMS

std::copy_backward(bid1, bid1, bid2);
Expand All @@ -615,6 +624,9 @@ namespace std_testing {
test_exec_bid1_fwd1(std::execution::seq, bid1, fwd1);
test_exec_bid1_fwd1(std::execution::par, bid1, fwd1);
test_exec_bid1_fwd1(std::execution::par_unseq, bid1, fwd1);
#if _HAS_CXX20
test_exec_bid1_fwd1(std::execution::unseq, bid1, fwd1);
#endif // _HAS_CXX20
}

template <typename Bid1, typename ExecutionPolicy>
Expand Down Expand Up @@ -653,6 +665,9 @@ namespace std_testing {
test_exec_bid1(std::execution::seq, bid1);
test_exec_bid1(std::execution::par, bid1);
test_exec_bid1(std::execution::par_unseq, bid1);
#if _HAS_CXX20
test_exec_bid1(std::execution::unseq, bid1);
#endif // _HAS_CXX20
#endif // HAS_PARALLEL_ALGORITHMS

std::reverse(bid1, bid1);
Expand Down Expand Up @@ -700,6 +715,9 @@ namespace std_testing {
test_exec_ran(std::execution::seq, ran);
test_exec_ran(std::execution::par, ran);
test_exec_ran(std::execution::par_unseq, ran);
#if _HAS_CXX20
test_exec_ran(std::execution::unseq, ran);
#endif // _HAS_CXX20
#endif // HAS_PARALLEL_ALGORITHMS

#if _HAS_AUTO_PTR_ETC
Expand Down
48 changes: 31 additions & 17 deletions tests/std/tests/P0024R2_parallel_algorithms_for_each/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,26 +48,40 @@ const auto call_only_once = [](atomic<bool>& b) { assert(!b.exchange(true)); };
const auto atomic_identity = [](atomic<bool>& b) { return b.load(); };

template <template <class...> class Container>
void test_case_for_each_parallel(const size_t testSize) {
Container<atomic<bool>> c(testSize);
for_each(par, c.begin(), c.end(), call_only_once);
assert(all_of(c.begin(), c.end(), atomic_identity));
}
struct test_case_for_each_parallel {
template <typename ExecutionPolicy>
void operator()(const size_t testSize, const ExecutionPolicy& exec) {
Container<atomic<bool>> c(testSize);
for_each(exec, c.begin(), c.end(), call_only_once);
assert(all_of(c.begin(), c.end(), atomic_identity));
}
};

template <template <class...> class Container>
void test_case_for_each_n_parallel(const size_t testSize) {
Container<atomic<bool>> c(testSize);
auto result = for_each_n(par, c.begin(), testSize, call_only_once);
assert(result == c.end());
assert(all_of(c.begin(), c.end(), atomic_identity));
}
struct test_case_for_each_n_parallel {
template <typename ExecutionPolicy>
void operator()(const size_t testSize, const ExecutionPolicy& exec) {
Container<atomic<bool>> c(testSize);
auto result = for_each_n(exec, c.begin(), testSize, call_only_once);
assert(result == c.end());
assert(all_of(c.begin(), c.end(), atomic_identity));
}
};

int main() {
test_case_for_each_n();
parallel_test_case(test_case_for_each_parallel<forward_list>);
parallel_test_case(test_case_for_each_parallel<list>);
parallel_test_case(test_case_for_each_parallel<vector>);
parallel_test_case(test_case_for_each_n_parallel<forward_list>);
parallel_test_case(test_case_for_each_n_parallel<list>);
parallel_test_case(test_case_for_each_n_parallel<vector>);
parallel_test_case(test_case_for_each_parallel<forward_list>{}, par);
parallel_test_case(test_case_for_each_parallel<list>{}, par);
parallel_test_case(test_case_for_each_parallel<vector>{}, par);
parallel_test_case(test_case_for_each_n_parallel<forward_list>{}, par);
parallel_test_case(test_case_for_each_n_parallel<list>{}, par);
parallel_test_case(test_case_for_each_n_parallel<vector>{}, par);
#if _HAS_CXX20
parallel_test_case(test_case_for_each_parallel<forward_list>{}, unseq);
parallel_test_case(test_case_for_each_parallel<list>{}, unseq);
parallel_test_case(test_case_for_each_parallel<vector>{}, unseq);
parallel_test_case(test_case_for_each_n_parallel<forward_list>{}, unseq);
parallel_test_case(test_case_for_each_n_parallel<list>{}, unseq);
parallel_test_case(test_case_for_each_n_parallel<vector>{}, unseq);
#endif // _HAS_CXX20
}
10 changes: 9 additions & 1 deletion tests/std/tests/VSO_0157762_feature_test_macros/test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,15 @@ STATIC_ASSERT(__cpp_lib_erase_if == 202002L);
STATIC_ASSERT(__cpp_lib_exchange_function == 201304L);
#endif

#if _HAS_CXX17 && !defined(_M_CEE)
#if _HAS_CXX20 && !defined(_M_CEE)
#ifndef __cpp_lib_execution
#error __cpp_lib_execution is not defined
#elif __cpp_lib_execution != 201902L
#error __cpp_lib_execution is not 201902L
#else
STATIC_ASSERT(__cpp_lib_execution == 201902L);
#endif
#elif _HAS_CXX17 && !defined(_M_CEE)
#ifndef __cpp_lib_execution
#error __cpp_lib_execution is not defined
#elif __cpp_lib_execution != 201603L
Expand Down