diff --git a/README.md b/README.md index f489398..395d319 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,12 @@ NYYYY is document NXXXX reformatted as a PDTS draft document. * Apply the proposed wording in [P0214R9](https://wg21.link/P0214R9) (Data-Parallel Vector Types & Operations). +## Editorial Changes + +* Reordered Execution Policy and Parallel Exceptions clauses. +* Various typographical corrections. + # Acknowledgements -Thanks to Alisdair Meredith for suggesting editorial changes. +Thanks to Alisdair Meredith, Matthias Kretz, and Marshall Clow for reviewing and suggesting editorial changes. diff --git a/algorithms.html b/algorithms.html index c5ded45..881bce8 100644 --- a/algorithms.html +++ b/algorithms.html @@ -19,7 +19,7 @@

Wavefront Application

This includes evaluations occurring in function invocations. -

+

An evaluation A is ordered before an evaluation B if A is deterministically @@ -56,7 +56,7 @@

Wavefront Application

Informally, A is a vertical antecedent of B if A is sequenced immediately before B or A is nested zero or more levels within a statement S that immediately precedes B. -

+

In the following, Xi and Xj refer to evaluations of the same expression @@ -78,7 +78,6 @@

Wavefront Application

Horizontally matched establishes a theoretical lock-step relationship between evaluations in different applications of an element access function. -

Let f be a function called for each argument list in a sequence of argument lists. @@ -97,7 +96,6 @@

Wavefront Application

The relationships between Ai and Bi and between Aj and Bj are sequenced before, not vertical antecedent. -

diff --git a/data_parallel_types.html b/data_parallel_types.html index 87fecda..ebacb9c 100644 --- a/data_parallel_types.html +++ b/data_parallel_types.html @@ -14,7 +14,7 @@

General

Throughout this Clause, the term data-parallel type refers to all supported specializations of the simd and simd_mask class templates. A data-parallel object is an object of data-parallel type. -

+

@@ -44,16 +44,17 @@

Header <experimental/simd> synopsis

namespace std::experimental { inline namespace parallelism_v2 { -namespace simd_abi { - - struct scalar {}; - template<int N> struct fixed_size {}; - template<class T> inline constexpr int max_fixed_size = implementation-defined; - template<class T> using compatible = implementation-defined; - template<class T> using native = implementation-defined; - - template<class T, size_t N> struct deduce { using type = see-below; }; - template<class T, size_t N> using deduce_t = typename deduce<T, N>::type; + namespace simd_abi { + + struct scalar {}; + template<int N> struct fixed_size {}; + template<class T> inline constexpr int max_fixed_size = implementation-defined; + template<class T> using compatible = implementation-defined; + template<class T> using native = implementation-defined; + + template<class T, size_t N> struct deduce { using type = see below; }; + template<class T, size_t N> using deduce_t = typename deduce<T, N>::type; + } struct element_aligned_tag {}; struct vector_aligned_tag {}; @@ -62,7 +63,7 @@

Header <experimental/simd> synopsis

inline constexpr vector_aligned_tag vector_aligned{}; template<size_t N> inline constexpr overaligned_tag<N> overaligned{}; - traits + template<class T> struct is_abi_tag; template<class T> inline constexpr bool is_abi_tag_v = is_abi_tag<T>::value; @@ -83,24 +84,24 @@

Header <experimental/simd> synopsis

template<class T, class U = typename T::value_type> inline constexpr size_t memory_alignment_v = memory_alignment<T,U>::value; - class template simd + template<class T, class Abi = simd_abi::compatible<T>> class simd; - template<class T> using native_simd<T, simd_abi::native<T>>; + template<class T> using native_simd = simd<T, simd_abi::native<T>>; template<class T, int N> using fixed_size_simd = simd<T, simd_abi::fixed_size<N>>; - class template simd_mask + template<class T, class Abi = simd_abi::compatible<T>> class simd_mask; - template<class T> using native_simd_mask<T, simd_abi::native<T>>; + template<class T> using native_simd_mask = simd_mask<T, simd_abi::native<T>>; template<class T, int N> using fixed_size_simd_mask = simd_mask<T, simd_abi::fixed_size<N>>; - casts - template<class T, class U, class Abi> see-below simd_cast(const simd<U, Abi>&); - template<class T, class U, class Abi> see-below static_simd_cast(const simd<U, Abi>&); + + template<class T, class U, class Abi> see below simd_cast(const simd<U, Abi>&); + template<class T, class U, class Abi> see below static_simd_cast(const simd<U, Abi>&); template<class T, class Abi> - fixed_size_simd<T, simd_size_v<T, Abi>> to_fixed_size(const simd<T, Abi>&) noexcept; + fixed_size_simd<T, simd_size_v<T, Abi>> to_fixed_size(const simd<T, Abi>&) noexcept; template<class T, class Abi> - fixed_size_simd_mask<T, simd_size_v<T, Abi>> to_fixed_size(const simd_mask<T, Abi>&) noexcept; + fixed_size_simd_mask<T, simd_size_v<T, Abi>> to_fixed_size(const simd_mask<T, Abi>&) noexcept; template<class T, int N> native_simd<T> to_native(const fixed_size_simd<T, N>&) noexcept; template<class T, int N> native_simd_mask<T> to_native(const fixed_size_simd_mask<T, N>&) noexcept; template<class T, int N> simd<T> to_compatible(const fixed_size_simd<T, N>&) noexcept; @@ -116,11 +117,11 @@

Header <experimental/simd> synopsis

array<V, simd_size_v<typename V::value_type, Abi> / V::size()> split(const simd_mask<typename V::value_type, Abi>&); template<class T, class... Abis> - simd<T, simd_abi::deduce_t>T, (simd_size_v<T, Abis> + ...)>> concat(const simd<T, Abis>&...); + simd<T, simd_abi::deduce_t<T, (simd_size_v<T, Abis> + ...)>> concat(const simd<T, Abis>&...); template<class T, class... Abis> - simd_mask<T, simd_abi::deduce_t>T, (simd_size_v<T, Abis> + ...)>> concat(const simd_mask<T, Abis>&...); + simd_mask<T, simd_abi::deduce_t<T, (simd_size_v<T, Abis> + ...)>> concat(const simd_mask<T, Abis>&...); - reductions + template<class T, class Abi> bool all_of(const simd_mask<T, Abi>&) noexcept; template<class T, class Abi> bool any_of(const simd_mask<T, Abi>&) noexcept; template<class T, class Abi> bool none_of(const simd_mask<T, Abi>&) noexcept; @@ -129,67 +130,69 @@

Header <experimental/simd> synopsis

template<class T, class Abi> int find_first_set(const simd_mask<T, Abi>&); template<class T, class Abi> int find_last_set(const simd_mask<T, Abi>&); - bool all_of(see-below) noexcept; - bool any_of(see-below) noexcept; - bool none_of(see-below) noexcept; - bool some_of(see-below) noexcept; - int popcount(see-below) noexcept; - int find_first_set(see-below) noexcept; - int find_last_set(see-below) noexcept; + bool all_of(see below) noexcept; + bool any_of(see below) noexcept; + bool none_of(see below) noexcept; + bool some_of(see below) noexcept; + int popcount(see below) noexcept; + int find_first_set(see below) noexcept; + int find_last_set(see below) noexcept; - where expressions + template<class M, class T> class const_where_expression; template<class M, class T> class where_expression; - masked assignment + template<class T> struct nodeduce { using type = T; }; // exposition only template<class T> using nodeduce_t = typename nodeduce<T>::type; // exposition only template<class T, class Abi> - where_expression<simd_mask<T, Abi>, simd<T, Abi>> where(const typename simd<T, Abi>::mask_type&, simd<T, Abi>&) noexcept; + where_expression<simd_mask<T, Abi>, simd<T, Abi>> where(const typename simd<T, Abi>::mask_type&, simd<T, Abi>&) noexcept; template<class T, class Abi> - const_where_expression<simd_mask<T, Abi>, simd<T, Abi>> where(const typename simd<T, Abi>::mask_type&, const simd<T, Abi>&) noexcept; + const_where_expression<simd_mask<T, Abi>, simd<T, Abi>> where(const typename simd<T, Abi>::mask_type&, const simd<T, Abi>&) noexcept; template<class T, class Abi> - where_expression<simd_mask<T, Abi>, simd_mask<T, Abi>> where(const nodeduce_t<simd_mask<T, Abit>>&, simd_mask<T, Abi>&) noexcept; + where_expression<simd_mask<T, Abi>, simd_mask<T, Abi>> where(const nodeduce_t<simd_mask<T, Abit>>&, simd_mask<T, Abi>&) noexcept; template<class T, class Abi> - const_where_expression<simd_mask<T, Abi>, simd_mask<T, Abi>> where(const nodeduce_t<simd_mask<T, Abit>>&, const simd_mask<T, Abi>&) noexcept; + const_where_expression<simd_mask<T, Abi>, simd_mask<T, Abi>> where(const nodeduce_t<simd_mask<T, Abit>>&, const simd_mask<T, Abi>&) noexcept; + + template<class T> + where_expression<bool, T> where(see below k, T& d) noexcept; template<class T> - const_where_expression<bool, T> where(see-below k, const T& d) noexcept; + const_where_expression<bool, T> where(see below k, const T& d) noexcept; - reductions + template<class T, class Abi, class BinaryOperation = plus<>> - T reduce(const simd<T, Abi>&, BinaryOperation = {}); + T reduce(const simd<T, Abi>&, BinaryOperation = {}); template<class M, class V, class BinaryOperation> - typename V::value_type reduce(const const_where_expression<M, V>& x, - typename V::value_type identity_element, BinaryOperation binary_op); + typename V::value_type reduce(const const_where_expression<M, V>& x, + typename V::value_type identity_element, BinaryOperation binary_op); template<class M, class V> - typename V::value_type reduce(const const_where_expression<M, V> x, plus<> binary_op = {}); + typename V::value_type reduce(const const_where_expression<M, V>& x, plus<> binary_op = {}); template<class M, class V> - typename V::value_type reduce(const const_where_expression<M, V> x, multiplies<> binary_op = {}); + typename V::value_type reduce(const const_where_expression<M, V>& x, multiplies<> binary_op); template<class M, class V> - typename V::value_type reduce(const const_where_expression<M, V> x, bit_and<> binary_op = {}); + typename V::value_type reduce(const const_where_expression<M, V>& x, bit_and<> binary_op); template<class M, class V> - typename V::value_type reduce(const const_where_expression<M, V> x, bit_or<> binary_op = {}); + typename V::value_type reduce(const const_where_expression<M, V>& x, bit_or<> binary_op); template<class M, class V> - typename V::value_type reduce(const const_where_expression<M, V> x, bit_xor<> binary_op = {}); + typename V::value_type reduce(const const_where_expression<M, V>& x, bit_xor<> binary_op); template<class T, class Abi> T hmin(const simd<T, abi>&); template<class T, class Abi> typename V::value_type hmin(const const_where_expression<M, V>&); template<class T, class Abi> T hmax(const simd<T, abi>&); template<class T, class Abi> typename V::value_type hmax(const const_where_expression<M, V>&); - algorithms + template<class T, class Abi> simd<T, Abi> min(const simd<T, Abi>& a, const simd<T, Abi>& b) noexcept; template<class T, class Abi> simd<T, Abi> max(const simd<T, Abi>& a, const simd<T, Abi>& b) noexcept; - template<class T, class Abi> simd<T, Abi> minmax(const simd<T, Abi>& a, const simd<T, Abi>& b) noexcept; + template<class T, class Abi> pair<simd<T, Abi>, simd<T, Abi>> minmax(const simd<T, Abi>& a, const simd<T, Abi>& b) noexcept; template<class T, class Abi> simd<T, Abi> clamp(const simd<T, Abi>& v, const simd<T, Abi>& lo, const simd<T, Abi>& hi) noexcept; } -} } @@ -199,15 +202,11 @@

Header <experimental/simd> synopsis

The header <experimental/simd> defines class templates, tag types, trait types, and function templates for element-wise operations on data-parallel objects.

-
- - -

simd ABI tags

- -
 
-namespace std::experimental {
-inline namespace parallelism_v2 {
+    
+      

simd ABI tags

+ +
 namespace simd_abi {
   struct scalar {};
   template<int N> struct fixed_size {};
@@ -215,59 +214,71 @@ 

simd ABI tags

template<class T> using compatible = implementation-defined; template<class T> using native = implementation-defined; } -} -} -
-
+
+
-

- - An ABI tag is a type in the std::experimental::parallelism_v2::simd_abi namespace that indicates a choice of size and binary representation for objects of data-parallel type. The intent is for the size and binary representation to depend on the target architecture. The ABI tag, together with a given element type implies a number of elements. ABI tag types are used as the second template argument to simd and simd_mask. The ABI tag is orthogonal to selecting the machine instruction set. The selected machine instruction set limits the usable ABI tag types, though (see ). The ABI tags enable users to safely pass objects of data-parallel type between translation unit boundaries (e.g. function calls or I/O). - -

+

+ + An ABI tag is a type in the std::experimental::parallelism_v2::simd_abi namespace that indicates a choice of size and binary representation for objects of data-parallel type. The intent is for the size and binary representation to depend on the target architecture. The ABI tag, together with a given element type implies a number of elements. ABI tag types are used as the second template argument to simd and simd_mask. + +

-

- - Use of the scalar tag type requires data-parallel types to store a single element (i.e., simd<T, simd_abi::scalar>::size() returns 1). scalar is not an alias for fixed_size<1>. - -

+

+ + The ABI tag is orthogonal to selecting the machine instruction set. The selected machine instruction set limits the usable ABI tag types, though (see ). The ABI tags enable users to safely pass objects of data-parallel type between translation unit boundaries (e.g. function calls or I/O). + +

-

- - The value of max_fixed_size<T> is at least 32. - -

+

+ + Use of the scalar tag type requires data-parallel types to store a single element (i.e., simd<T, simd_abi::scalar>::size() returns 1). scalar is not an alias for fixed_size<1>. + +

-

- - Use of the simd_abi::fixed_size<N> tag type requires data-parallel types to store N elements (i.e. simd<T, simd_abi::fixed_size<N>>::size() is N). simd<T, fixed_size<N>> and simd_mask<T, fixed_size<N>> with N > 0 and N <= max_fixed_size<T> is supported. Additionally, for every supported simd<T, Abi> (see ), where Abi is an ABI tag is not a specialization of simd_abi::fixed_size, N == simd<T, Abi>::size() is true. +

+ + The value of max_fixed_size<T> is at least 32. + +

- It is unspecified whether simd<T, fixed_size<T, fixed_size<N>> with N > max_fixed_size<T> is supported. The value of max_fixed_size<T> can depend on compiler flags and can change between different compiler versions. +

+ + Use of the simd_abi::fixed_size<N> tag type requires data-parallel types to store N elements (i.e. simd<T, simd_abi::fixed_size<N>>::size() is N). simd<T, fixed_size<N>> and simd_mask<T, fixed_size<N>> with N > 0 and N <= max_fixed_size<T> shall be supported. Additionally, for every supported simd<T, Abi> (see ), where Abi is an ABI tag that is not a specialization of simd_abi::fixed_size, N == simd<T, Abi>::size() shall be supported. + +

- An implementation may forego ABI compatibility between differently compiled translation units for simd and simd_mask specializations using the same simd_abi::fixed_size<N> tag. Otherwise, the efficiency of simd<T, Abi> is likely to be better than for simd<T, fixed_size<simd_size_v<T, Abi>>> (with Abi not a specialization of simd_abi::fixed_size). - -

+

+ + It is unspecified whether simd<T, fixed_size<T, fixed_size<N>> with N > max_fixed_size<T> is supported. The value of max_fixed_size<T> can depend on compiler flags and can change between different compiler versions. + +

-

- - An implementation may define additional extended ABI tag types in the std::experimental::parallelism_v2::simd_abi namespace, to support other forms of data-parallel computation. - -

+

+ + An implementation can forego ABI compatibility between differently compiled translation units for simd and simd_mask specializations using the same simd_abi::fixed_size<N> tag. Otherwise, the efficiency of simd<T, Abi> is likely to be better than for simd<T, fixed_size<simd_size_v<T, Abi>>> (with Abi not a specialization of simd_abi::fixed_size). + +

-

- - compatible<T> is an implementation-defined alias for an ABI tag. The intent is to use the ABI tag producing the most efficient data-parallel execution for the element type T that ensures ABI compatibility between translation units on the target architecture. +

+ + An implementation may define additional extended ABI tag types in the std::experimental::parallelism_v2::simd_abi namespace, to support other forms of data-parallel computation. + +

+ +

+ + compatible<T> is an implementation-defined alias for an ABI tag. The intent is to use the ABI tag producing the most efficient data-parallel execution for the element type T that ensures ABI compatibility between translation units on the target architecture. + +
+
-
-
- - Consider a target architecture supporting the extended ABI tags __simd128 and __simd256, where the __simd256 type requires an optional ISA extension on said architecture. Also, the target architecture does not support long double with either ABI tag. The implementation therefore defines + [ Example: Consider a target architecture supporting the extended ABI tags __simd128 and __simd256, where the __simd256 type requires an optional ISA extension on said architecture. Also, the target architecture does not support long double with either ABI tag. The implementation therefore defines - +

-

- - native<T> is an implementation-defined alias for an ABI tag. The intent is to use the ABI tag producing the most efficient data-parallel execution for the element type T that is supported on the currently targeted system. For target architectures without ISA extensions, the native<T> and compatible<T> aliases will likely be the same. For target architectures with ISA extensions, compiler flags may influence the native<T> alias while compatible<T> will be the same independent of such flags. + + — end example ] + + + -
-
+

+ + native<T> is an implementation-defined alias for an ABI tag. The intent is to use the ABI tag producing the most efficient data-parallel execution for the element type T that is supported on the currently targeted system. For target architectures without ISA extensions, the native<T> and compatible<T> aliases will likely be the same. For target architectures with ISA extensions, compiler flags may influence the native<T> alias while compatible<T> will be the same independent of such flags. + +
+
+ + [ Example: Consider a target architecture supporting the extended ABI tags __simd128 and __simd256, where hardware support for __simd256 only exists for floating-point types. The implementation therefore defines native<T> as an alias for + +

+ + — end example ] + + +

+ + +
template<T, size_t N> struct deduce { using type = see below; };
+
- - Consider a target architecture supporting the extended ABI tags __simd128 and __simd256, where hardware support for __simd256 only exists for floating-point types. The implementation therefore defines native<T> as an alias for +

+ + The member type shall be present if and only if - +

+ + - -
+      

+ + Where present, the member typedef type shall name an ABI tag type that satisfies + -namespace std::experimental { -inline namespace parallelism_v2 { -namespace simd_abi { +

    +
  • + + simd_size<T, type> == N, and + +
  • - template<T, size_t N> struct deduce { using type = see-below; }; -} -} -} -
-
+
  • + + simd<T, type> is default constructible (see ). + +
  • + -

    - - The member type is present if and only if + + If N is 1, the member typedef type is simd_abi::scalar. Otherwise, if there are multiple ABI tag types that satisfy the constraints, the member typedef type is implementation-defined. It is expected that extended ABI tags can produce better optimizations and thus are preferred over simd_abi::fixed_size<N>. + + - -

  • - - T is a vectorizable type, and - -
  • +

    + + The behavior of a program that adds specializations for deduce is undefined. + +

    +
    -
  • - - simd_abi::fixed_size<N> is supported (see ). - -
  • - - -

    + +

    simd type traits

    -

    - - Where present, the member typedef type shall name an ABI tag type that satisfies - + + template<class T> struct is_abi_tag { see below }; + - -

  • - - simd_size<T, type> == N, and - -
  • +

    + + The type is_abi_tag<T> is a UnaryTypeTrait with a BaseCharacteristic of true_type if T is a standard or extended ABI tag, and false_type otherwise. + +

    -
  • - - simd<T, type> is default constructible (see ). - -
  • - +

    + + The behavior of a program that adds specializations for is_abi_tag is undefined. + +

    -
    + + template<class T> struct is_simd { see below }; + - - If N is 1, the member typedef type is simd_abi::scalar. Otherwise, if there are multiple ABI tag types that satisfy the constraints, the member typedef type is implementation-defined. It is expected that extended ABI tags can produce better optimizations and thus are preferred over simd_abi::fixed_size<N>. - -

    +

    + + The type is_simd<T> is a UnaryTypeTrait with a BaseCharacteristic of true_type if T is a specialization of the simd class template, and false_type otherwise. + +

    -

    - - The behavior of a program that adds specializations for deduce is undefined. - -

    -
    +

    + + The behavior of a program that adds specializations for is_simd is undefined. + +

    - -

    simd type traits

    + template<class T> struct is_simd_mask { see below }; - - template<class> struct is_abi_tag { see-below }; - +

    + + The type is_simd_mask<T> is a UnaryTypeTrait with a BaseCharacteristic of true_type if T is a specialization of the simd_mask class template, and false_type otherwise. + +

    -

    - - The type is_abi_tag<T> is a UnaryTypeTrait with a BaseCharacteristic of true_type if T is a standard or extended ABI tag, and false_type otherwise. - -

    +

    + + The behavior of a program that adds specializations for is_simd_mask is undefined. + +

    -

    - - The behavior of a program that adds specializations for is_abi_tag is undefined. - -

    + + template<class T> struct is_simd_flag_type { see below }; + - - template<class> struct is_simd { see-below }; - +

    + + The type is_simd_flag_type<class T> is a UnaryTypeTrait with a BaseCharacteristic of true_type if T is one of -

    - - The type is_simd<T> is a UnaryTypeTrait with a BaseCharacteristic of true_type if T is a specialization of the simd class template, and false_type otherwise. - -

    + -

    - - The behavior of a program that adds specializations for is_simd_mask is undefined. - -

    - - - template<class T> struct is_simd_flag_type { see-below }; - - -

    - - The type is_simd_flag_type<class T> is a UnaryTypeTrait with a BaseCharacteristic of true_type if T is one of - - -

  • - - element_aligned_tag, or - -
  • - -
  • - - vector_aligned_tag, or - -
  • - -
  • - - overaligned_tag<N> with N > 0 and N an integral power of two, - -
  • - - -
    + + and false_type otherwise. + + + +

    - and false_type otherwise. + The behavior of a program that adds specializations for is_simd_flag_type is undefined. - -

    - -

    - - The behavior of a program that adds specializations for is_simd_flag_type is undefined. - -

    - - - template<class T, class Abi = simd_abi::compatible<T>> struct simd_size { see-below }; - - -

    - - simd_size<T, Abi> has a member value if and only if - - -

  • - - T is a vectorizable type, and - -
  • - -
  • - - is_abi_tag_v<Abi> is true. - -
  • - - -
    +

    - - The rules are different from . - + template<class T, class Abi = simd_abi::compatible<T>> struct simd_size { see below }; - -

    - -

    - - If value is present, the type simd_size<T, Abi> is a BinaryTypeTrait with a BaseCharacteristic of integral_constant<size_t, N> with N equal to the number of elements in a simd<T, Abi> object. If simd<T, Abi> is not supported for the currently targeted system, simd_size<T, Abi>::value produces the value simd<T, Abi>::size() would return if it were supported. - -

    - -

    - - The behavior of a program that adds specializations for simd_size is undefined. - -

    - - template<class T, class U = typename T::value_type> struct memory_alignment { see-below }; - +

    + + simd_size<T, Abi> shall have a member value if and only if -

    - - memory_alignment<T, U> has a member value if and only if - +

    -
  • - is_simd_v<T> is true and U is a vectorizable type. + + The rules are different from those in (). + -
  • - -

    - -

    - - If value is present, the type memory_alignment<T, U> is a BinaryTypeTrait with a BaseCharacteristic of integral_constant<size_t, N> for some implementation-defined N (see and ). value identifies the alignment restrictions on pointers used for (converting) loads and stores for the give type T on arrays of type U. - -

    - -

    - - The behavior of a program that adds specializations for memory_alignment is undefined. - -

    -
    - - -

    Class templates const_where_expression and where_expression

    - -
    -namespace std::experimental {
    -inline namespace parallelism_v2 {
    -  template<class M, class T> class const_where_expression {
    -    const M mask; // exposition only
    -    T& data; // exposision only
    -
    -  public:
    -    const_where_expression(const const_where_expression&) = delete;
    -    const_where_expression& operator=(const const_where_expression&) = delete;
    -
    -    T operator-() const &&;
    -    T operator+() const &&;
    -    T operator~() const &&;
    -
    -    template<class U, class Flags> void copy_to(U* mem, Flags f) const &&;
    -  };
    -
    -  template<class M, class T>
    -  class where_expression : public const_where_expression<M, T> {
    -  public:
    -    template<class U> void operator=(U&& x) &&;
    -    template<class U> void operator+=(U&& x) &&;
    -    template<class U> void operator-=(U&& x) &&;
    -    template<class U> void operator*=(U&& x) &&;
    -    template<class U> void operator/=(U&& x) &&;
    -    template<class U> void operator%=(U&& x) &&;
    -    template<class U> void operator&=(U&& x) &&;
    -    template<class U> void operator|=(U&& x) &&;
    -    template<class U> void operator^=(U&& x) &&;
    -    template<class U> void operator<<=(U&& x) &&;
    -    template<class U> void operator>>=(U&& x) &&;
    -    void operator++() &&
    -    void operator++(int) &&
    -    void operator--() &&
    -    void operator--(int) &&
    -
    -    template<class U, class Flags> void copy_from(const U* mem, Flags) &&;
    -  };
    -}
    -}
    -    
    -
    - -

    - - The class templates const_where_expression and where_expression abstract the notion of selecting elements of a given object of arithmetic or data-parallel type. - -

    - -

    - - The first templates argument M shall be cv-unqualified bool or a cv-unqualified simd_mask specialization. - -

    - -

    - - If M is bool, T shall be a cv-unqualified arithmetic type. Otherwise, T shall either be M or typename M::simd_type. - -

    - -

    - - In this subclause, data[0] is used interchangably for data, mask[0] is used interchangably for mask, and M::size() is used interchangably for 1. - -

    - -

    - - The selected indices signify the integers i ∊ {j ∊ ℕ0j < M::size()mask[j] }. The selected elements signify the elements data[i] for all selected indices i. - -

    - -

    - - In this subclause, the value_type is an alias for T if M is bool, or an alias for typename T::value_type if is_simd_mask_v<M> is true. - -

    - -

    - - The where functions initialize mask with the first argument to where and data with the second argument to where. - -

    - - - -T operator-() const &&; -T operator+() const &&; -T operator~() const &&; - + +

    - - - A copy of data with the indicated unary operator applied to all selected elements. - - + If value is present, the type simd_size<T, Abi> is a BinaryTypeTrait with a BaseCharacteristic of integral_constant<size_t, N> with N equal to the number of elements in a simd<T, Abi> object. If simd<T, Abi> is not supported for the currently targeted system, simd_size<T, Abi>::value produces the value simd<T, Abi>::size() would return if it were supported. +

    +

    - - - Nothing. + The behavior of a program that adds specializations for simd_size is undefined. - - - +

    - - - template<class U, class Flags> void copy_to(U* mem, Flags) const &&; + template<class T, class U = typename T::value_type> struct memory_alignment { see below }; - - +

    - If the template parameter Flags is vector_aligned_tag, mem shall point to storage aligned by memory_alignment_v<T, U>. If the template parameter flags is overaligned_tag<N>, mem shall point to storage aligned by N. If the template parameter Flags is element_aligned_tag, mem shall point to storage aligned by alignof(U). If M is not bool, the largest i[0, M::size()) where mask[i] is true is less than the number of values pointed to by mem. + memory_alignment<T, U> shall have a member value if and only if - - +

      +
    • + + is_simd_mask_v<T> is true and U is bool, or + +
    • + +
    • + + is_simd_v<T> is true and U is a vectorizable type. + +
    • +
    + + +

    - Copies the selected elements as if mem[i] = static_cast<U>(data[i]) for all selected indices i. + If value is present, the type memory_alignment<T, U> is a BinaryTypeTrait with a BaseCharacteristic of integral_constant<size_t, N> for some implementation-defined N (see and ). value identifies the alignment restrictions on pointers used for (converting) loads and stores for the give type T on arrays of type U. - +

    - +

    - Nothing. + The behavior of a program that adds specializations for memory_alignment is undefined. - +

    +
    - + +

    Class templates const_where_expression and where_expression

    - This function shall not participate in overload resolution unless - - -
  • - is_simd_flag_type_v<Flags> is true, and -
  • - -
  • - either +
    +template<class M, class T> class const_where_expression {
    +  const M mask; // exposition only
    +  T& data; // exposition only
     
    -          
      -
    • - U is bool and value_type is bool, or -
    • +public: + const_where_expression(const const_where_expression&) = delete; + const_where_expression& operator=(const const_where_expression&) = delete; -
    • - U is a vectorizable type and value_type is not bool. -
    • -
    -
  • -
    -
    -
    - + T operator-() const &&; + T operator+() const &&; + T operator~() const &&; - - -template<class U> void operator=(U&& x) &&; - - + template<class U, class Flags> void copy_to(U* mem, Flags f) const &&; +}; - - - Replaces data[i] with static_cast<T>(std::forward<U>(x))[i] for all selected indices i. +template<class M, class T> +class where_expression : public const_where_expression<M, T> { +public: + template<class U> void operator=(U&& x) &&; + template<class U> void operator+=(U&& x) &&; + template<class U> void operator-=(U&& x) &&; + template<class U> void operator*=(U&& x) &&; + template<class U> void operator/=(U&& x) &&; + template<class U> void operator%=(U&& x) &&; + template<class U> void operator&=(U&& x) &&; + template<class U> void operator|=(U&& x) &&; + template<class U> void operator^=(U&& x) &&; + template<class U> void operator<<=(U&& x) &&; + template<class U> void operator>>=(U&& x) &&; + + void operator++() && + void operator++(int) && + void operator--() && + void operator--(int) && + + template<class U, class Flags> void copy_from(const U* mem, Flags) &&; +}; + - - - + +

    - This operator shall not participate in overload resolution unless U is convertible to T. + The class templates const_where_expression and where_expression abstract the notion of selecting elements of a given object of arithmetic or data-parallel type. - - - - - -template<class U> void operator+=(U&& x) &&; -template<class U> void operator-=(U&& x) &&; -template<class U> void operator*=(U&& x) &&; -template<class U> void operator/=(U&& x) &&; -template<class U> void operator%=(U&& x) &&; -template<class U> void operator&=(U&& x) &&; -template<class U> void operator|=(U&& x) &&; -template<class U> void operator^=(U&& x) &&; -template<class U> void operator<<=(U&& x) &&; -template<class U> void operator>>=(U&& x) &&; - - - +

    + +

    - Replaces data[i] with static_cast<T>(data @ std::forward<U>(x))[i] (where @ denotes the indicated operator) for all selected indices i. + The first templates argument M shall be cv-unqualified bool or a cv-unqualified simd_mask specialization. - - - +

    + +

    - Each of these operators shall not participate in overload resolution unless the return type of data @ std::forward<U>(x) is convertible to T. It is unspecified whether the binary operator, implied by the compound assignment operator, is executed on all elements or only on the selected elements. + If M is bool, T shall be a cv-unqualified arithmetic type. Otherwise, T shall either be M or typename M::simd_type. - - - - - -void operator++() &&; -void operator++(int) &&; -void operator--() &&; -void operator--(int) &&; - - - - +

    + +

    - Applies the indicated operator to the selected elements. + In this subclause, if M is bool, data[0] is used interchangably for data, mask[0] is used interchangably for mask, and M::size() is used interchangably for 1. - - - +

    + +

    - Each of these operators shall not participate in overload resolution unless the indicated operator can be applied to objects of type T. + The selected indices signify the integers i ∊ {j ∊ ℕ0j < M::size()mask[j] }. The selected elements signify the elements data[i] for all selected indices i. - - - - - - -template<class U, class Flags> void copy_from(const U* mem, Flags) &&; - - - +

    + +

    - If the template parameter Flags is vector_aligned_tag, mem shall point to storage aligned by memory_alignment_v<T, U>. If the template parameter Flags is overaligned_tag<N>, mem shall point to storage aligned by N. If the template parameter Flags is element_aligned_tag, mem shall point to storage aligned by alignon(U). If is_simd_flag_type_v<U> is true, for all selected indices i, i shall be less than the number of values pointed to by mem. + In this subclause, the type value_type is an alias for T if M is bool, or an alias for typename T::value_type if is_simd_mask_v<M> is true. - - - + + +

    - Replaces the selected elements as if data[i] = static_cast<value_type>(mem[i]) for all selected indices i. + The where functions initialize mask with the first argument to where and data with the second argument to where. - - - - - This function shall not participate in overload resolution unless - - -

  • - is_simd_flag_type_v<Flags> is true, and -
  • - -
  • - either - -
      -
    • - U is bool and value_type is bool, or -
    • - -
    • - U is a vectorizable type and value_type is not bool. -
    • -
    -
  • - - -
    -
    +

    + + + + T operator-() const &&; + T operator+() const &&; + T operator~() const &&; + + + + + + A copy of data with the indicated unary operator applied to all selected elements. + + + + + + Nothing. + + + + + + +template<class U, class Flags> void copy_to(U* mem, Flags) const &&; + + + + + + If the template parameter Flags is vector_aligned_tag, mem shall point to storage aligned by memory_alignment_v<T, U>. If the template parameter Flags is overaligned_tag<N>, mem shall point to storage aligned by N. If the template parameter Flags is element_aligned_tag, mem shall point to storage aligned by alignof(U). If M is not bool, the largest i[0, M::size()) where mask[i] is true is less than the number of values pointed to by mem. + + + + + + Copies the selected elements as if mem[i] = static_cast<U>(data[i]) for all selected indices i. + + + + + + Nothing. + + + + + + This function shall not participate in overload resolution unless + +
      +
    • + is_simd_flag_type_v<Flags> is true, and +
    • + +
    • + either + +
        +
      • + U is bool and value_type is bool, or +
      • + +
      • + U is a vectorizable type and value_type is not bool. +
      • +
      +
    • +
    +
    +
    +
    + + + +template<class U> void operator=(U&& x) &&; + + + + + + Replaces data[i] with static_cast<T>(std::forward<U>(x))[i] for all selected indices i. + + + + + + This operator shall not participate in overload resolution unless U is convertible to T. + + + + + + +template<class U> void operator+=(U&& x) &&; +template<class U> void operator-=(U&& x) &&; +template<class U> void operator*=(U&& x) &&; +template<class U> void operator/=(U&& x) &&; +template<class U> void operator%=(U&& x) &&; +template<class U> void operator&=(U&& x) &&; +template<class U> void operator|=(U&& x) &&; +template<class U> void operator^=(U&& x) &&; +template<class U> void operator<<=(U&& x) &&; +template<class U> void operator>>=(U&& x) &&; + + + + + Replaces data[i] with static_cast<T>(data @ std::forward<U>(x))[i] (where @ denotes the indicated operator) for all selected indices i. + + + + + + Each of these operators shall not participate in overload resolution unless the return type of data @ std::forward<U>(x) is convertible to T. It is unspecified whether the binary operator, implied by the compound assignment operator, is executed on all elements or only on the selected elements. + + + + + + +void operator++() &&; +void operator++(int) &&; +void operator--() &&; +void operator--(int) &&; + + + + + + Applies the indicated operator to the selected elements. + + + + + + Each of these operators shall not participate in overload resolution unless the indicated operator can be applied to objects of type T. + + + + + + + +template<class U, class Flags> void copy_from(const U* mem, Flags) &&; + + + + + If the template parameter Flags is vector_aligned_tag, mem shall point to storage aligned by memory_alignment_v<T, U>. If the template parameter Flags is overaligned_tag<N>, mem shall point to storage aligned by N. If the template parameter Flags is element_aligned_tag, mem shall point to storage aligned by alignof(U). If is_simd_flag_type_v<U> is true, for all selected indices i, i shall be less than the number of values pointed to by mem. + + + + + + Replaces the selected elements as if data[i] = static_cast<value_type>(mem[i]) for all selected indices i. + + + + + + This function shall not participate in overload resolution unless + +
      +
    • + is_simd_flag_type_v<Flags> is true, and +
    • + +
    • + either + +
        +
      • + U is bool and value_type is bool, or +
      • + +
      • + U is a vectorizable type and value_type is not bool. +
      • +
      +
    • +
    +
    +
    +
    + @@ -833,89 +827,83 @@

    Class template simd overview

    -namespace std::experimental {
    -inline namespace parallelism_v2 {
    -  template<class T, class Abi> class simd {
    -  public:
    -    using value_type = T;
    -    using reference = see-below;
    -    using mask_type = simd_mask<T, Abi>
    -    using abi_type = Abi;
    -
    -    static constexpr size_t size() noexcept;
    -
    -    simd() = default;
    -
    -    // implicit conversion constructor
    -    template<class U> simd(const simd<U, simd_abi::fixed_size<size()>>&);
    -
    -    // implicit broadcast constructor (see below for constraints)
    -    template<class U> simd(U&& value);
    -
    -    // generator constructor (see below for constraints)
    -    template<class G> explicit simd(G&& gen);
    -
    -    // load constructor
    -    template<class U, class Flags> simd(const U* mem, Flags f);
    -
    -     loads
    -    template<class U, class Flags> copy_from(const U* mem, Flags f);
    -
    -     stores
    -    template<class U, class Flags> copy_to(U* mem, Flags f);
    -
    -     scalar access
    -    reference operator[](size_t);
    -    value_type operator[](size_t) const;
    -
    -     unary operators
    -    simd& operator++();
    -    simd operator++(int);
    -    simd& operator--();
    -    simd operator--(int);
    -    mask_type operator!() const;
    -    simd operator~() const; // see below
    -    simd operator+() const;
    -    simd operator-() const;
    -
    -     binary operators
    -    friend simd operator+(const simd&, const simd&);
    -    friend simd operator-(const simd&, const simd&);
    -    friend simd operator*(const simd&, const simd&);
    -    friend simd operator/(const simd&, const simd&);
    -    friend simd operator%(const simd&, const simd&);
    -    friend simd operator&(const simd&, const simd&);
    -    friend simd operator|(const simd&, const simd&);
    -    friend simd operator^(const simd&, const simd&);
    -    friend simd operator<<(const simd&, const simd&);
    -    friend simd operator>>(const simd&, const simd&);
    -    friend simd operator<<(const simd&, int);
    -    friend simd operator>>(const simd&, int);
    -
    -     compound assignment
    -    friend simd& operator+=(simd&, const simd&);
    -    friend simd& operator-=(simd&, const simd&);
    -    friend simd& operator*=(simd&, const simd&);
    -    friend simd& operator/=(simd&, const simd&);
    -    friend simd& operator%=(simd&, const simd&);
    -    friend simd& operator&=(simd&, const simd&);
    -    friend simd& operator|=(simd&, const simd&);
    -    friend simd& operator^=(simd&, const simd&);
    -    friend simd& operator<<=(simd&, const simd&);
    -    friend simd& operator>>=(simd&, const simd&);
    -    friend simd& operator<<=(simd&, int);
    -    friend simd& operator>>=(simd&, int);
    -
    -     compares
    -    friend mask_type operator==(const simd&, const simd&);
    -    friend mask_type operator!=(const simd&, const simd&);
    -    friend mask_type operator>=(const simd&, const simd&);
    -    friend mask_type operator<=(const simd&, const simd&);
    -    friend mask_type operator>(const simd&, const simd&);
    -    friend mask_type operator<(const simd&, const simd&);
    -  };
    -}
    -}
    +template<class T, class Abi> class simd {
    +public:
    +  using value_type = T;
    +  using reference = see below;
    +  using mask_type = simd_mask<T, Abi>
    +  using abi_type = Abi;
    +
    +  static constexpr size_t size() noexcept;
    +
    +  simd() = default;
    +
    +  // implicit conversion constructor
    +  template<class U> simd(const simd<U, simd_abi::fixed_size<size()>>&);
    +
    +  // implicit broadcast constructor (see below for constraints)
    +  template<class U> simd(U&& value);
    +
    +  // generator constructor (see below for constraints)
    +  template<class G> explicit simd(G&& gen);
    +
    +  // load constructor
    +  template<class U, class Flags> simd(const U* mem, Flags f);
    +
    +  
    +  template<class U, class Flags> copy_from(const U* mem, Flags f);
    +  template<class U, class Flags> copy_to(U* mem, Flags f);
    +
    +  
    +  reference operator[](size_t);
    +  value_type operator[](size_t) const;
    +
    +  
    +  simd& operator++();
    +  simd operator++(int);
    +  simd& operator--();
    +  simd operator--(int);
    +  mask_type operator!() const;
    +  simd operator~() const;
    +  simd operator+() const;
    +  simd operator-() const;
    +
    +  
    +  friend simd operator+(const simd&, const simd&);
    +  friend simd operator-(const simd&, const simd&);
    +  friend simd operator*(const simd&, const simd&);
    +  friend simd operator/(const simd&, const simd&);
    +  friend simd operator%(const simd&, const simd&);
    +  friend simd operator&(const simd&, const simd&);
    +  friend simd operator|(const simd&, const simd&);
    +  friend simd operator^(const simd&, const simd&);
    +  friend simd operator<<(const simd&, const simd&);
    +  friend simd operator>>(const simd&, const simd&);
    +  friend simd operator<<(const simd&, int);
    +  friend simd operator>>(const simd&, int);
    +
    +  
    +  friend simd& operator+=(simd&, const simd&);
    +  friend simd& operator-=(simd&, const simd&);
    +  friend simd& operator*=(simd&, const simd&);
    +  friend simd& operator/=(simd&, const simd&);
    +  friend simd& operator%=(simd&, const simd&);
    +  friend simd& operator&=(simd&, const simd&);
    +  friend simd& operator|=(simd&, const simd&);
    +  friend simd& operator^=(simd&, const simd&);
    +  friend simd& operator<<=(simd&, const simd&);
    +  friend simd& operator>>=(simd&, const simd&);
    +  friend simd& operator<<=(simd&, int);
    +  friend simd& operator>>=(simd&, int);
    +
    +  
    +  friend mask_type operator==(const simd&, const simd&);
    +  friend mask_type operator!=(const simd&, const simd&);
    +  friend mask_type operator>=(const simd&, const simd&);
    +  friend mask_type operator<=(const simd&, const simd&);
    +  friend mask_type operator>(const simd&, const simd&);
    +  friend mask_type operator<(const simd&, const simd&);
    +};
           
    @@ -929,19 +917,19 @@

    Class template simd overview

    Every specialization of simd shall be a complete type. The specialization simd<T, Abi> is supported if T is a vectorizable type and - -
  • - - Abi is simd_abi::scalar, or - -
  • +
      +
    • + + Abi is simd_abi::scalar, or + +
    • -
    • - - Abi is simd_abi::fixed_size<N>, with N is constrained as defined in . - -
    • - +
    • + + Abi is simd_abi::fixed_size<N>, with N is constrained as defined in . + +
    • +
    If Abi is an extended ABI tag, it is implementation-defined whether simd<T, Abi> is supported. The intent is for implementations to decide on the basis of the currently targeted system. @@ -955,7 +943,7 @@

    Class template simd overview

    Consider an implementation that defines the extended ABI tags __simd_x and __gpu_y. When the compiler is invoked to translate to a machine that has support for the __simd_x ABI tag for all arithmetic types other than long double and no support for the __gpu_y ABI tag, then: - +
    • simd<T, simd_abi::__gpu_y> is not supported for any T and has a deleted constructor.
    • @@ -971,11 +959,11 @@

      Class template simd overview

    • simd<long double, simd_abi::scalar> is supported.
    • - +
    -

    +

    @@ -1009,7 +997,7 @@

    Class template simd overview


    [ Example: - Consider an implementation that supports the type __vec4f and the function __vec4f, _vec4f_addsub(__vec4f, __vec4f) for the currently targeted system. + Consider an implementation that supports the type __vec4f and the function __vec4f _vec4f_addsub(__vec4f, __vec4f) for the currently targeted system. A user may require the use of _vec4f_addsub for maximum performance and thus writes:
    
    @@ -1023,7 +1011,7 @@ 

    Class template simd overview

    — end example ]
    -

    + @@ -1042,7 +1030,7 @@

    Element references

             
    -class reference // exposition only
    +class reference // exposition only
     {
     public:
       reference() = delete;
    @@ -1071,10 +1059,9 @@ 

    Element references

    friend void swap(reference&& a, reference&& b) noexcept; friend void swap(value_type&& a, reference&& b) noexcept; friend void swap(reference&& a, value_type&& b) noexcept; - };
    -

    + operator value_type() const noexcept; @@ -1136,7 +1123,7 @@

    Element references

    - This function shall not participate in overload resolution unless declval<value_type &&>() @= std::forward<U>(x) (where @= denotes the indicated compound assignment operator) is well-formed. + This function shall not participate in overload resolution unless declval<value_type &>() @= std::forward<U>(x) (where @= denotes the indicated compound assignment operator) is well-formed.
    @@ -1233,7 +1220,7 @@

    simd constructors



    - +
    • From is a vectorizable type and every possibly value of From can be represented with type value_type, or
    • @@ -1249,7 +1236,7 @@

      simd constructors

      From is unsigned int and value_type is an unsigned integral type. - +
    @@ -1268,7 +1255,7 @@

    simd constructors

    This constructor shall not participate in overload resolution unless - +
    • abi_type is simd_abi::fixed_size<size()>, and @@ -1286,7 +1273,7 @@

      simd constructors

      if both U and value_type are integral, the integer conversion rank [conv.rank] of value_type is greater than the integer conversion rank of U.
    • - +
    @@ -1317,7 +1304,7 @@

    simd constructors

    - If the template parameter Flags is vector_aligned_tag, mem shall point to storage aligend by memory_alignment_v<simd, U>. If the template parameter Flags is overaligned_tag<N>, mem shall point to storage aligned by N. If the template parameter Flags is element_aligned_tag, mem shall point to storage aligned by alignon(U). [mem, mem + size()) is a valid range. + If the template parameter Flags is vector_aligned_tag, mem shall point to storage aligend by memory_alignment_v<simd, U>. If the template parameter Flags is overaligned_tag<N>, mem shall point to storage aligned by N. If the template parameter Flags is element_aligned_tag, mem shall point to storage aligned by alignof(U). [mem, mem + size()) is a valid range. @@ -1331,7 +1318,7 @@

    simd constructors

    This constructor shall not participate in overload resolution unless - +
    • is_simd_flag_type_v<Flags> is true, and @@ -1343,7 +1330,7 @@

      simd constructors

      U is a vectorizable type.
    • - +
    @@ -1357,7 +1344,7 @@

    simd copy functions

    - If the template parameter Flags is vector_aligned_tag, mem shall point to storage aligned by memory_alignment_v<simd, U>. If the template parameter Flags is overaligned_tag<N>, mem shall point to storage aligend by N. If the template parameter Flags is element_aligned_tag, mem shall point to storage aligned by alignonf(U). [mem, mem + size()) is a valid range. + If the template parameter Flags is vector_aligned_tag, mem shall point to storage aligned by memory_alignment_v<simd, U>. If the template parameter Flags is overaligned_tag<N>, mem shall point to storage aligend by N. If the template parameter Flags is element_aligned_tag, mem shall point to storage aligned by alignof(U). [mem, mem + size()) is a valid range. @@ -1371,7 +1358,7 @@

    simd copy functions

    This function shall not participate in overload resolution unless - +
    • is_simd_flag_type_v<Flags> is true, and
    • @@ -1379,7 +1366,7 @@

      simd copy functions

    • U is a vectorizable type.
    • - +
    @@ -1403,7 +1390,7 @@

    simd copy functions

    This function shall not participate in overload resolution unless - +
    • is_simd_flag_type_v<Flags> is true, and
    • @@ -1411,7 +1398,7 @@

      simd copy functions

    • U is a vectorizable type.
    • - +
    @@ -1431,7 +1418,7 @@

    simd subscript operators

    - A reference (see [parallel.simd.reference]) referring to the i-th element. + A reference (see ) referring to the i-th element. @@ -1509,7 +1496,7 @@

    simd unary operators

    A copy of *this before incrementing. - + @@ -1553,7 +1540,7 @@

    simd unary operators

    A copy of *this before decrementing. - + @@ -1632,13 +1619,15 @@

    simd unary operators

    + - -

    simd non-member operations

    + +

    simd non-member operations

    - - - + +

    simd binary operators

    + + friend simd operator+(const simd& lhs, const simd& rhs); friend simd operator-(const simd& lhs, const simd& rhs); friend simd operator*(const simd& lhs, const simd& rhs); @@ -1651,56 +1640,56 @@

    simd non-member operations

    friend simd operator>>(const simd& lhs, const simd& rhs);
    - - - A simd object initialized with the results of the element-wise application of the indicated operator. - - + + + A simd object initialized with the results of the element-wise application of the indicated operator. + + - - - Nothing. - - + + + Nothing. + + - - - Each of these operators shall not participate in overload resolution unless the indicated operator can be applied to objects of type value_type. - - -
    + + + Each of these operators shall not participate in overload resolution unless the indicated operator can be applied to objects of type value_type. + + +
    - - + + friend simd operator<<(const simd& v, int n); friend simd operator>>(const simd& v, int n); - - - A simd object where the i-th element is initialized to the result of applying the indicated operator to v[i] and n for all i[0, size()). - - + + + A simd object where the i-th element is initialized to the result of applying the indicated operator to v[i] and n for all i[0, size()). + + - - - Nothing. - - + + + Nothing. + + - - - These operators shall not participate in overload resolution unless the indicated operator can be applied to objects of type value_type. - - - -
    + + + These operators shall not participate in overload resolution unless the indicated operator can be applied to objects of type value_type. + + + +
    - -

    simd compound assignment

    + +

    simd compound assignment

    - - + + friend simd& operator+=(simd& lhs, const simd& rhs); friend simd& operator-=(simd& lhs, const simd& rhs); friend simd& operator*=(simd& lhs, const simd& rhs); @@ -1715,37 +1704,37 @@

    simd compound assignment

    friend simd& operator>>=(simd& lhs, int n);
    - - - These operators perform the indicated binary element-wise operation. - - + + + These operators perform the indicated binary element-wise operation. + + - - - lhs. - - + + + lhs. + + - - - Nothing. - - + + + Nothing. + + - - - These operators shall not participate in overload resolution unless the indicated operator can be applied to objects of type value_type. - - -
    -
    + + + These operators shall not participate in overload resolution unless the indicated operator can be applied to objects of type value_type. + + + +
    - -

    simd compare operators

    + +

    simd compare operators

    - - + + friend mask_type operator==(const simd&, const simd&); friend mask_type operator!=(const simd&, const simd&); friend mask_type operator>=(const simd&, const simd&); @@ -1754,1136 +1743,1135 @@

    simd compare operators

    friend mask_type operator<(const simd&, const simd&);
    - - - A simd_mask object initialized with the results of the element-wise application of the indicated operator. - - + + + A simd_mask object initialized with the results of the element-wise application of the indicated operator. + + - - - Nothing. - - -
    -
    + + + Nothing. + + + +
    + + +

    simd reductions

    - - - +

    + + In this subclause, BinaryOperation shall be a binary element-wise operation. + +

    + + + template<class T, class Abi, class BinaryOperation = plus<>> T reduce(const simd<T, Abi>& x, BinaryOperation binary_op = {}); - - - binary_op shall be callable with two arguments of type T returning T, or callable with two arguments of type simd<T, A1> returning simd<T, A1> for every A1 that is an ABI tag type. - - - - - - GENERALIZED_SUM(binary_op, x.data[i], ...) for all i[0, size()). - - + + + binary_op shall be callable with two arguments of type T returning T, or callable with two arguments of type simd<T, A1> returning simd<T, A1> for every A1 that is an ABI tag type. + + - - - Any exception thrown from binary_op. - - + + + GENERALIZED_SUM(binary_op, x.data[i], ...) for all i[0, size()). + + -

    - - - This overload of reduce does not require an initial value because x is guaranteed to be non-empty. - - -

    -
    + + + Any exception thrown from binary_op. + + +
    - - + + template<class M, class V, class BinaryOperation> typename V::value_type reduce(const const_where_expression<M, V>& x, typename V::value_type identity_element, - BinaryOperation binary_op); + BinaryOperation binary_op = {}); - - - binary_op shall be callable with two arguments of type T returning T, or callable with two arguments of type simd<T, A1> returning simd<T, A1> for every A1 that is an ABI tag type. The results of binary_op(identity_element, x) and binary_op(x, identity_element) shall be equal to x for all finite values x representable by V::value_type. - - + + + binary_op shall be callable with two arguments of type T returning T, or callable with two arguments of type simd<T, A1> returning simd<T, A1> for every A1 that is an ABI tag type. The results of binary_op(identity_element, x) and binary_op(x, identity_element) shall be equal to x for all finite values x representable by V::value_type. + + - - - If none_of(x.mask), returns identity_element. Otherwise, returns GENERALIZED_SUM(binary_op, x.data[i], ...) for all i ∊ {j ∊ ℕ0j < M::size()mask[j] }. - - + + + If none_of(x.mask), returns identity_element. Otherwise, returns GENERALIZED_SUM(binary_op, x.data[i], ...) for all i ∊ {j ∊ ℕ0j < M::size()mask[j] }. + + - - - Any exception thrown from binary_op. - - - + + + Any exception thrown from binary_op. + + + - - + + template<class M, class V> -typename V::value_type reduce(const const_where_expression<M, V>& x, plus<> binary_op = {}); +typename V::value_type reduce(const const_where_expression<M, V>& x, plus<> binary_op); - - - If none_of(x.mask), returns 0. Otherwise, returns GENERALIZED_SUM(binary_op, x.data[i], ...) for all i ∊ {j ∊ ℕ0j < M::size()mask[j] }. - - + + + If none_of(x.mask), returns 0. Otherwise, returns GENERALIZED_SUM(binary_op, x.data[i], ...) for all i ∊ {j ∊ ℕ0j < M::size()mask[j] }. + + - - - Nothing. - - - + + + Nothing. + + + - - + + template<class M, class V> -typename V::value_type reduce(const const_where_expression<M, V>& x, multiplies<> binary_op = {}); +typename V::value_type reduce(const const_where_expression<M, V>& x, multiplies<> binary_op); - - - If none_of(x.mask), returns 1. Otherwise, returns GENERALIZED_SUM(binary_op, x.data[i], ...) for all i ∊ {j ∊ ℕ0j < M::size()mask[j] }. - - + + + If none_of(x.mask), returns 1. Otherwise, returns GENERALIZED_SUM(binary_op, x.data[i], ...) for all i ∊ {j ∊ ℕ0j < M::size()mask[j] }. + + - - - Nothing. - - - + + + Nothing. + + + - - + + template<class M, class V> -typename V::value_type reduce(const const_where_expression<M, V>& x, bit_and<> binary_op = {}); +typename V::value_type reduce(const const_where_expression<M, V>& x, bit_and<> binary_op); - - - is_integral_v<V::value_type> is true. - - + + + is_integral_v<V::value_type> is true. + + - - - If none_of(x.mask), returns ~V::value_type(). Otherwise, returns GENERALIZED_SUM(binary_op, x.data[i], ...) for all i ∊ {j ∊ ℕ0j < M::size()mask[j] }. - - + + + If none_of(x.mask), returns ~V::value_type(). Otherwise, returns GENERALIZED_SUM(binary_op, x.data[i], ...) for all i ∊ {j ∊ ℕ0j < M::size()mask[j] }. + + - - - Nothing. - - - + + + Nothing. + + + - - + + template<class M, class V> -typename V::value_type reduce(const const_where_expression<M, V>& x, bit_or<> binary_op = {}); +typename V::value_type reduce(const const_where_expression<M, V>& x, bit_or<> binary_op); template<class M, class V> -typename V::value_type reduce(const const_where_expression<M, V>& x, bit_xor<> binary_op = {}); +typename V::value_type reduce(const const_where_expression<M, V>& x, bit_xor<> binary_op); - - - is_integral_v<V::value_type> is true. - - + + + is_integral_v<V::value_type> is true. + + - - - If none_of(x.mask), returns 0. Otherwise, returns GENERALIZED_SUM(binary_op, x.data[i], ...) for all i ∊ {j ∊ ℕ0j < M::size()mask[j] }. - - + + + If none_of(x.mask), returns 0. Otherwise, returns GENERALIZED_SUM(binary_op, x.data[i], ...) for all i ∊ {j ∊ ℕ0j < M::size()mask[j] }. + + - - - Nothing. - - - + + + Nothing. + + + - - template<class T, class Abi> T hmin(const simd<T, Abi>& x); + + template<class T, class Abi> T hmin(const simd<T, Abi>& x); - - - The value of an element x[j] for which x[j] <= x[i] for all i[0, size()). - - + + + The value of an element x[j] for which x[j] <= x[i] for all i[0, size()). + + - - - Nothing. - - - + + + Nothing. + + + - - template<class T, class V> typename V::value_type hmin(const const_where_expression<M, V>& x); + + template<class T, class V> typename V::value_type hmin(const const_where_expression<M, V>& x); - - - If none_of(x.mask), the return value is numeric_limits<V::value_type>::max(). Otherwise, returns the value of an element x.data[j] for which x.mask[j] == true and x.data[j] <= x.data[i] for all i ∊ {j ∊ ℕ0j < M::size()mask[j] }. - - + + + If none_of(x.mask), the return value is numeric_limits<V::value_type>::max(). Otherwise, returns the value of an element x.data[j] for which x.mask[j] == true and x.data[j] <= x.data[i] for all i ∊ {j ∊ ℕ0j < M::size()mask[j] }. + + - - - Nothing. - - - + + + Nothing. + + + - - template<class T, class Abi> T hmax(const simd<T, Abi>& x); + + template<class T, class Abi> T hmax(const simd<T, Abi>& x); - - - The value of an element x[j] for which x[j] >= x[i] for all i[0, size()). - - + + + The value of an element x[j] for which x[j] >= x[i] for all i[0, size()). + + - - - Nothing. - - - + + + Nothing. + + + - - template<class T, class V> typename V::value_type hmax(const const_where_expression<M, V>& x); + + template<class T, class V> typename V::value_type hmax(const const_where_expression<M, V>& x); - - - If none_of(x.mask), the return value is numeric_limits<V::value_type>::lowest(). Otherwise, returns the value of an element x.data[j] for which x.mask[j] == true and x.data[j] >= x.data[i] for all i ∊ {j ∊ ℕ0j < M::size()mask[j] }. - - + + + If none_of(x.mask), the return value is numeric_limits<V::value_type>::lowest(). Otherwise, returns the value of an element x.data[j] for which x.mask[j] == true and x.data[j] >= x.data[i] for all i ∊ {j ∊ ℕ0j < M::size()mask[j] }. + + - - - Nothing. - - - -
    + + + Nothing. + + + +
    - -

    simd casts

    + +

    simd casts

    - - template<class T, class U, class Abi> see-below simd_cast(const simd<U, Abi>& x) + + template<class T, class U, class Abi> see below simd_cast(const simd<U, Abi>& x) -

    - - Let To identify T::value_type if is_simd_v<T> is true, or T otherwise. - -

    +

    + + Let To identify T::value_type if is_simd_v<T> is true, or T otherwise. + +

    - - - A simd object with the i-th element initialized to static_cast<To>(x[i]) for all i[0, size()). - - + + + A simd object with the i-th element initialized to static_cast<To>(x[i]) for all i[0, size()). + + - - - Nothing. - - + + + Nothing. + + - - - The function shall not participate in overload resolution unless + + + The function shall not participate in overload resolution unless - -
  • - - every possible value of type U can be represented with type To, and - -
  • +
      +
    • + + every possible value of type U can be represented with type To, and + +
    • -
    • - - either - -
        -
      • - - is_simd_v<T> is false, or - -
      • - -
      • - - T::size() == simd<U, Abi>::size() is true. - -
      • -
      -
      -
    • - - - +
    • + + either + +
        +
      • + + is_simd_v<T> is false, or + +
      • + +
      • + + T::size() == simd<U, Abi>::size() is true. + +
      • +
      +
      +
    • +
    +
    +
    -

    - - The return type is - +

    + + The return type is + - -

  • - - T if is_simd_v<T> is true, otherwise - -
  • +
      +
    • + + T if is_simd_v<T> is true, otherwise + +
    • -
    • - - simd<T, Abi> is U is T, otherwise - -
    • +
    • + + simd<T, Abi> is U is T, otherwise + +
    • -
    • - - simd<T, simd_abi::fixed_size<simd<U, Abi>::size()>> - -
    • - -
      - +
    • + + simd<T, simd_abi::fixed_size<simd<U, Abi>::size()>> + +
    • +
    +
    +
    - - template<class T, class U, class Abi> see-below static_simd_cast(const simd<U, Abi>& x); + + template<class T, class U, class Abi> see below static_simd_cast(const simd<U, Abi>& x); -

    - - Let To identify T::value_type if is_simd_v<T> is true or T otherwise. - -

    +

    + + Let To identify T::value_type if is_simd_v<T> is true or T otherwise. + +

    - - - A simd object with the i-th element initialized to static_cast<To>(x[i]) for all i[0, size()). - - + + + A simd object with the i-th element initialized to static_cast<To>(x[i]) for all i[0, size()). + + - - - Nothing. - - + + + Nothing. + + - - - The function shall not participate in overload resolution unless either + + + The function shall not participate in overload resolution unless either - -
  • - - is_simd_v<T> is false, or - -
  • +
      +
    • + + is_simd_v<T> is false, or + +
    • -
    • - - T::size() == simd<U, Abi>::size() is true. - -
    • - - - +
    • + + T::size() == simd<U, Abi>::size() is true. + +
    • +
    +
    +
    -

    - The return type is +

    + The return type is - -

  • - - T if is_simd_v<T> is true, otherwise - -
  • +
      +
    • + + T if is_simd_v<T> is true, otherwise + +
    • -
    • - - simd<T, Abi> if either U is T or U and T are integral types that only differ in signedness, otherwise - -
    • +
    • + + simd<T, Abi> if either U is T or U and T are integral types that only differ in signedness, otherwise + +
    • -
    • - - simd<T, simd_abi::fixed_size<simd<U, Abi>::size()>>. - -
    • - -
      - +
    • + + simd<T, simd_abi::fixed_size<simd<U, Abi>::size()>>. + +
    • +
    +
    +
    - - + + template<class T, class Abi> fixed_size_simd<T, simd_size_v<T, Abi>> to_fixed_size(const simd<T, Abi>& x) noexcept; template<class T, class Abi> fixed_size_simd_mask<T, simd_size_v<T, Abi>> to_fixed_size(const simd_mask<T, Abi>& x) noexcept; - - - A data-parallel object with the i-th element initialized to x[i] for all i[0, size()). - - - + + + A data-parallel object with the i-th element initialized to x[i] for all i[0, size()). + + + - - + + template<class T, int N> native_simd<T> to_native(const fixed_size_simd<T, N>& x) noexcept; template<class T, int N> native_simd_mask<T> to_native(const fixed_size_simd_mask<T, N>& x) noexcept; - - - A data-parallel object with the i-th element initialized to x[i] for all i[0, size()). - - + + + A data-parallel object with the i-th element initialized to x[i] for all i[0, size()). + + - - - These functions shall not participate in overload resolution unless simd_size_v<T, simd_abi::native<T>> == N is true. - - - + + + These functions shall not participate in overload resolution unless simd_size_v<T, simd_abi::native<T>> == N is true. + + + - - + + template<class T, int N> simd<T> to_compatible(const fixed_size_simd<T, N>& x) noexcept; template<class T, int N> simd_mask<T> to_compatible(const fixed_size_simd_mask<T, N>& x) noexcept; - - - A data-parallel object with the i-th element initialized to x[i] for all i[0, size()). - - + + + A data-parallel object with the i-th element initialized to x[i] for all i[0, size()). + + - - - These functions shall not participate in overload resolution unless simd_size_v<T, simd_abi::compatible<T>> == N is true. - - - + + + These functions shall not participate in overload resolution unless simd_size_v<T, simd_abi::compatible<T>> == N is true. + + + - - + + template<size_t... Sizes, class T, class Abi> tuple<simd<T, simd_abi::deduce_t<T, Sizes>>...> split(const simd<T, Abi>& x); template<size_t... Sizes, class T, class Abi> tuple<simd_mask<T, simd_abi::deduce_t<T, Sizes>>...> split(const simd_mask<T, Abi>& x); - - - A tuple of data-parallel objects with the i-th simd/simd_mask element of the j-th tuple element initialized to the value of the element x with index i + sum of the first j values in the Sizes pack. - - + + + A tuple of data-parallel objects with the i-th simd/simd_mask element of the j-th tuple element initialized to the value of the element x with index i + sum of the first j values in the Sizes pack. + + - - - These functions shall not participate in overload resolution unless the sum of all values in the Sizes pack is equal to simd_size_v<T, Abi>. - - - + + + These functions shall not participate in overload resolution unless the sum of all values in the Sizes pack is equal to simd_size_v<T, Abi>. + + + - - + + template<class V, class Abi> array<V, simd_size_v<typename V::value_type, Abi> / V::size()> split(const simd<typename V::value_type, Abi>& x); template<class V, class Abi> array<V, simd_size_v<typename V::value_type, Abi> / V::size()> split(const simd_mask<typename V::value_type, Abi>& x); - - - An array of data-parallel objects with the i-th simd/simd_mask element of the j-th array element initialized to the value of the element in x with index i + j * V::size(). - - + + + An array of data-parallel objects with the i-th simd/simd_mask element of the j-th array element initialized to the value of the element in x with index i + j * V::size(). + + - - - These functions shall not participate in overload resolution unless + + + These functions shall not participate in overload resolution unless - -
  • - - simd_size_v<typename V::value_type, Abi> is an integral multiple of V::size(), and - -
  • +
      +
    • + + simd_size_v<typename V::value_type, Abi> is an integral multiple of V::size(), and + +
    • -
    • - - for the overload with a simd parameter is_simd_v<V> is true, for the overload with a simd_mask parameter is_simd_mask_v<V> is true. - -
    • - - - - +
    • + + for the overload with a simd parameter is_simd_v<V> is true, for the overload with a simd_mask parameter is_simd_mask_v<V> is true. + +
    • +
    +
    +
    +
    - - + + template<class T, class... Abis> simd<T, simd_abi::deduce_t<T, (simd_size_v<T, Abis> + ...)>> concat(const simd<T, Abis>&... xs); template<class T, class... Abis> simd_mask<T, simd_abi::deduce_t<T, (simd_size_v<T, Abis> + ...)>> concat(const simd_mask<T, Abis>&... xs); - - - A data-parallel object initialized with the concatenated values in the xs pack of data-parallel objects: The i-th simd/simd_mask element of the j-th parameter in the xs pack is copied to the return value's element with index i + the sum of the width of the first j parameters in the xs pack. - - - -
    - - -

    simd algorithms

    - - - template<class T, class Abi> simd<T, Abi> min(const simd<T, Abi>& a, const simd<T, Abi>& b) noexcept; - - - - The result of the element-wise application of std::min(a[i], b[i]) for all i[0, size()). - - - - - - template<class T, class Abi> simd<T, Abi> max(const simd<T, Abi>& a, const simd<T, Abi>& b) noexcept; - - - - The result of the element-wise application of std::max(a[i], b[i]) for all i[0, size()). - - - - - - -template<class T, class Abi> -pair<simd<T, Abi>, simd<T, Abi>> minmax(const simd<T, Abi>& a, const simd<T, Abi>& b) noexcept; - - - - A pair initialized with - - -
  • - - the result of element-wise application of std::min(a[i], b[i]) for all i[0, size()) in the first member, and - - -
  • - -
  • - - the result of element-wise application of std::max(a[i], b[i]) for all i[0, size()) in the second member, and - - -
  • -
    -
    -
    -
    - - - -template<class T, class Abi> simd<T, Abi> -clamp(const simd<T, Abi>& v, const simd<T, Abi>& lo, const simd<T, Abi>& hi); - - - - No element in lo shall be greater than the corresponding element in hi. - - - - - - The result of element-wise application of std::clamp(v[i], lo[i], hi[i]) for all i[0, size()). - - - - -
    + + + A data-parallel object initialized with the concatenated values in the xs pack of data-parallel objects: The i-th simd/simd_mask element of the j-th parameter in the xs pack is copied to the return value's element with index i + the sum of the width of the first j parameters in the xs pack. + + +
    - -

    simd math library

    - -

    - - For each set of overloaded functions within <cmath>, there shall be additional overloads sufficient to ensure that if any argument corresponding to a double parameter has type simd<T, Abi>, where is_floating_point_v<T> is true, then: - - -

  • - - All arguments corresponding to double parameters shall be convertible to simd<T, Abi>. - -
  • - -
  • - - All arguments corresponding to double* parameters shall be of type simd<T, Abi>*. - -
  • - -
  • - - All arguments corresponding to parameters of integral type U shall be convertible to fixed_size_simd<U, simd_size_v<T, Abi>>. - -
  • - -
  • - - All arguments corresponding to U*, where U is integral, shall be of type fixed_size_simd<U, simd_size_v<T, Abi>>*. - -
  • - -
  • - - If the corresponding return type is double, the return type of the additional overloads is simd<T, Abi>. Otherwise, if the corresponding return type is bool, the return type of the additional overload is simd_mask<T, Abi>. Otherwise, the return type is fixed_size_simd<R, simd_size_v<T, Abi>>, with R denoting the corresponding return type. - -
  • - + +

    simd algorithms

    -
    + + template<class T, class Abi> simd<T, Abi> min(const simd<T, Abi>& a, const simd<T, Abi>& b) noexcept; + - It is unspecified whether a call to these overloads with arguments that are all convertible to simd<T, Abi> but are not of type simd<T, Abi> is well-formed. + The result of the element-wise application of std::min(a[i], b[i]) for all i[0, size()). - -

    - -

    - - Each function overload produced by the above rules applies the indicated <cmath> function element-wise. The results per element are not required to be bitwise equal to the application of the function which is overloaded for the element type. - -

    - -

    - - The behavior is undefined if a domain, pole, or range error occurs when the input argument(s) are applied to the indicated <cmath> function. - -

    +
    +
    -

    - - If abs is called with an argument of type simd<X, Abi> for which is_unsigned_v<X> is true, the program is ill-formed. - -

    -
    + + template<class T, class Abi> simd<T, Abi> max(const simd<T, Abi>& a, const simd<T, Abi>& b) noexcept; - -

    Class template simd_mask

    + + + The result of the element-wise application of std::max(a[i], b[i]) for all i[0, size()). + + +
    - -

    Class template simd_mask overview

    + + +template<class T, class Abi> +pair<simd<T, Abi>, simd<T, Abi>> minmax(const simd<T, Abi>& a, const simd<T, Abi>& b) noexcept; -
    +        
             
    -namespace std::experimental {
    -inline namespace parallelism_v2 {
    -  template<class T, class Abi> class simd_mask {
    -  public:
    -    using value_type = bool;
    -    using reference = see-below;
    -    using simd_type = simd<T, Abi>;
    -    using abi_type = Abi;
    +          A pair initialized with
    +
    +          
      +
    • + + the result of element-wise application of std::min(a[i], b[i]) for all i[0, size()) in the first member, and - static constexpr size_t size() noexcept; + +
    • - simd_mask() = default; +
    • + + the result of element-wise application of std::max(a[i], b[i]) for all i[0, size()) in the second member, and - // broadcast constructor - explicit simd_mask(value_type) noexcept; + +
    • +
    +
    +
    + - // implicit type conversion constructor - template<class U> simd_mask(const simd_mask<U, simd_abi::fixed_size<size()>>&) noexcept; + + +template<class T, class Abi> simd<T, Abi> +clamp(const simd<T, Abi>& v, const simd<T, Abi>& lo, const simd<T, Abi>& hi); - // load constructor - template<class Flags> simd_mask(const value_Type* mem, Flags); + + + No element in lo shall be greater than the corresponding element in hi. + + - loads - template<class Flags> void copy_from(const value_type* mem, Flags); - template<class Flags> void copy_to(value_type* mem, Flags); + + + The result of element-wise application of std::clamp(v[i], lo[i], hi[i]) for all i[0, size()). - scalar access - reference operator[](size_t); - value_type operator[](size_t) const; + + + + - unary operators - simd_mask operator!() const noexcept; - - simd_mask binary operators - friend simd_mask operator&&(const simd_mask&, const simd_mask&) noexcept; - friend simd_mask operator||(const simd_mask&, const simd_mask&) noexcept; - friend simd_mask operator&(const simd_mask&, const simd_mask&) noexcept; - friend simd_mask operator|(const simd_mask&, const simd_mask&) noexcept; - friend simd_mask operator^(const simd_mask&, const simd_mask&) noexcept; - - simd_mask compound assignment - friend simd_mask& operator&=(simd_mask&, const simd_mask&) noexcept; - friend simd_mask& operator|=(simd_mask&, const simd_mask&) noexcept; - friend simd_mask& operator^=(simd_mask&, const simd_mask&) noexcept; - - simd_mask compares - friend simd_mask operator==(const simd_mask&, const simd_mask&) noexcept; - friend simd_mask operator!=(const simd_mask&, const simd_mask&) noexcept; - }; -} -}
    + +

    simd math library

    - The class template simd_mask is a data-parallel type with the element type bool. The width of a given simd_mask specialization is a constant expression, determined by the template parameters. Specifically, simd_mask<T, Abi>::size() == simd<T, Abi>::size(). - -

    + For each set of overloaded functions within <cmath>, there shall be additional overloads sufficient to ensure that if any argument corresponding to a double parameter has type simd<T, Abi>, where is_floating_point_v<T> is true, then: -

    - - Every specialization of simd_mask shall be a complete type. The specialization simd_mask<T, Abi> is supported if T is a vectorizable type and +

      +
    • + + All arguments corresponding to double parameters shall be convertible to simd<T, Abi>. + +
    • + +
    • + + All arguments corresponding to double* parameters shall be of type simd<T, Abi>*. + +
    • + +
    • + + All arguments corresponding to parameters of integral type U shall be convertible to fixed_size_simd<U, simd_size_v<T, Abi>>. + +
    • -
    • - Abi is simd_abi::scalar, or + All arguments corresponding to U*, where U is integral, shall be of type fixed_size_simd<U, simd_size_v<T, Abi>>*.
    • - Abi is simd_abi::fixed_size<N>, with N constrained as defined in [parallel.simd.abi]. + If the corresponding return type is double, the return type of the additional overloads is simd<T, Abi>. Otherwise, if the corresponding return type is bool, the return type of the additional overload is simd_mask<T, Abi>. Otherwise, the return type is fixed_size_simd<R, simd_size_v<T, Abi>>, with R denoting the corresponding return type.
    • -
      +
    - If Abi is an extended ABI tag, it is implementation-defined whether simd_mask<T, Abi> is supported. The intent is for implementations to decide on the basis of the currently targeted system. - If simd_mask<T, Abi> is not supported, the specialization shall have a deleted default constructor, deleted destructor, deleted copy constructor, and deleted copy assignment. + It is unspecified whether a call to these overloads with arguments that are all convertible to simd<T, Abi> but are not of type simd<T, Abi> is well-formed. -
    +

    - Default initialization performs no intialization of the elements; value-initialization initializes each element with false. Thus, default initialization leaves the elements in an indeterminate state. + Each function overload produced by the above rules applies the indicated <cmath> function element-wise. The results per element are not required to be bitwise equal to the application of the function which is overloaded for the element type.

    - - static constexpr size_t size() noexcept; - - - - The width of simd<T, Abi>. - - - -

    - Implementations should enable explicit conversion from and to implementation-defined types. This adds one or more of the following declarations to class simd_mask: - -

    -          
    -explicit operator implementation-defined() const;
    -explicit simd_mask(const implementation-defined& init) const;
    -
    + The behavior is undefined if a domain, pole, or range error occurs when the input argument(s) are applied to the indicated <cmath> function. + +

    - The member type reference has the same interface as simd<T, Abi>::reference, except its value_type is bool. ([parallel.simd.reference]) + If abs is called with an argument of type simd<X, Abi> for which is_unsigned_v<X> is true, the program is ill-formed.

    - - -

    simd_mask constructors

    - - - explicit simd_mask(value_type x) noexcept - - - - Constructs an object with each element initialized to x. - - - - - - template<class U> simd_mask(const simd_mask<U, simd_abi::fixed_size<size()>>& x) noexcept; - - - - Constructs an object of type simd_mask where the i-th element equals x[i] for all i[0, size()). - - - - - - This constructor shall not participate in overload resolution unless abi_type is simd_abi::fixed_size<size()>. - - - - - - template<class Flags> simd_mask(const value_type* mem, Flags); - - - - If the template parameter Flags is vector_aligned_tag, mem shall point to storage aligned by memory_alignment_v<simd_mask>. If the template parameter Flags is overaligned_tag<N>, mem shall point to storage aligned by N. If the template parameter Flags is element_aligned_tag, mem shall point to storage aligned by alignof(U). [mem, mem + size()) is a valid range. - - +
    +
    - - - Constructs an object where the i-th element is initialized to mem[i] for all i[0, size()). - - + +

    Class template simd_mask

    - - - This constructor shall not participate in overload resolution unless is_simd_flag_type_v<Flags> is true. - - - -
    + +

    Class template simd_mask overview

    - -

    simd_mask copy functions

    +
    +      
    +template<class T, class Abi> class simd_mask {
    +public:
    +  using value_type = bool;
    +  using reference = see below;
    +  using simd_type = simd<T, Abi>;
    +  using abi_type = Abi;
     
    -        
    -          template<class Flags> void copy_from(const value_type* mem, Flags);
    +  static constexpr size_t size() noexcept;
     
    -          
    -          
    -            If the template parameter Flags is vector_aligned_tag, mem shall point to storage aligned by memory_alignment_v<simd_mask>. If the template parameter Flags is overaligned_tag<N>, mem shall point to storage aligned by N. If the template parameter Flags is element_aligned_tag, mem shall point to storage aligned by alignof(U). [mem, mem + size()) is a valid range.
    -          
    -          
    +  simd_mask() = default;
     
    -          
    -          
    -            Replaces the elements of the simd_mask object such that the i-th element is replaced with mem[i] for all i[0, size()).
    +  // broadcast constructor
    +  explicit simd_mask(value_type) noexcept;
     
    -          
    -          
    +  // implicit type conversion constructor
    +  template<class U> simd_mask(const simd_mask<U, simd_abi::fixed_size<size()>>&) noexcept;
     
    -          
    -          
    -            This function shall not participate in overload resolution unless is_simd_flag_type_v<Flags> is true.
    -          
    -          
    -        
    +  // load constructor
    +  template<class Flags> simd_mask(const value_Type* mem, Flags);
     
    -        
    -          template<class Flags> void copy_to(value_type* mem, Flags);
    +  
    +  template<class Flags> void copy_from(const value_type* mem, Flags);
    +  template<class Flags> void copy_to(value_type* mem, Flags);
     
    -          
    -          
    -            If the template parameter Flags is vector_aligned_tag, mem shall point to storage aligned by memory_alignment_v<simd_mask>. If the template parameter Flags is overaligned_tag<N>, mem shall point to storage aligned by N. If the template parameter Flags is element_aligned_tag, mem shall point to storage aligned by alignof(U). [mem, mem + size()) is a valid range.
    -          
    -          
    +  
    +  reference operator[](size_t);
    +  value_type operator[](size_t) const;
     
    -          
    -          
    -            Copies all simd_mask elements as if mem[i] = operator[](i) for all i[0, size()).
    -          
    -          
    +  
    +  simd_mask operator!() const noexcept;
    +  
    +  
    +  friend simd_mask operator&&(const simd_mask&, const simd_mask&) noexcept;
    +  friend simd_mask operator||(const simd_mask&, const simd_mask&) noexcept;
    +  friend simd_mask operator&(const simd_mask&, const simd_mask&) noexcept;
    +  friend simd_mask operator|(const simd_mask&, const simd_mask&) noexcept;
    +  friend simd_mask operator^(const simd_mask&, const simd_mask&) noexcept;
     
    -          
    -          
    -            This function shall not participate in overload resolution unless is_simd_flag_type_v<Flags> is true.
    -          
    -          
    -        
    -      
    +  
    +  friend simd_mask& operator&=(simd_mask&, const simd_mask&) noexcept;
    +  friend simd_mask& operator|=(simd_mask&, const simd_mask&) noexcept;
    +  friend simd_mask& operator^=(simd_mask&, const simd_mask&) noexcept;
     
    -      
    -        

    simd_mask subscript operators

    + + friend simd_mask operator==(const simd_mask&, const simd_mask&) noexcept; + friend simd_mask operator!=(const simd_mask&, const simd_mask&) noexcept; +};
    - - reference operator[](size_t i); +

    + + The class template simd_mask is a data-parallel type with the element type bool. The width of a given simd_mask specialization is a constant expression, determined by the template parameters. Specifically, simd_mask<T, Abi>::size() == simd<T, Abi>::size(). + +

    - - - i < size(). - - +

    + + Every specialization of simd_mask shall be a complete type. The specialization simd_mask<T, Abi> is supported if T is a vectorizable type and - +

      +
    • - A reference (see [parallel.simd.reference]) referring to the i-th element. + Abi is simd_abi::scalar, or - +
    • - +
    • - Nothing. + Abi is simd_abi::fixed_size<N>, with N constrained as defined in (). - - +
    • +
    - - value_type operator[](size_t i) const; + + If Abi is an extended ABI tag, it is implementation-defined whether simd_mask<T, Abi> is supported. The intent is for implementations to decide on the basis of the currently targeted system. + If simd_mask<T, Abi> is not supported, the specialization shall have a deleted default constructor, deleted destructor, deleted copy constructor, and deleted copy assignment. + + +
    - - - i < size(). - - +

    + + Default initialization performs no intialization of the elements; value-initialization initializes each element with false. Thus, default initialization leaves the elements in an indeterminate state. + +

    - - - The value of the i-th element. - - + + static constexpr size_t size() noexcept; - - - Nothing. - - - -
    + + + The width of simd<T, Abi>. + + + - -

    simd_mask unary operators

    +

    + + Implementations should enable explicit conversion from and to implementation-defined types. This adds one or more of the following declarations to class simd_mask: - - simd_mask operator!() const noexcept; +

    +        
    +explicit operator implementation-defined() const;
    +explicit simd_mask(const implementation-defined& init) const;
    +
    - - - The result of the element-wise appliation of operator!. - - - -
    +

    + + The member type reference has the same interface as simd<T, Abi>::reference, except its value_type is bool. () + +

    + + +

    simd_mask constructors

    - -

    simd_mask non-member operations

    - - -

    simd_mask binary operators

    - - - -friend simd_mask operator&&(const simd_mask&, const simd_mask&) noexcept; -friend simd_mask operator||(const simd_mask&, const simd_mask&) noexcept; -friend simd_mask operator&(const simd_mask&, const simd_mask&) noexcept; -friend simd_mask operator|(const simd_mask&, const simd_mask&) noexcept; -friend simd_mask operator^(const simd_mask&, const simd_mask&) noexcept; - - - - - A simd_mask object initialized with the results of the element-wise appliation of the indicated operator. - - - -
    - + + explicit simd_mask(value_type x) noexcept - -

    simd_mask compound assignment

    + + + Constructs an object with each element initialized to x. + + +
    - -friend simd_mask& operator&=(simd_mask& lhs, const simd_mask& rhs) noexcept; -friend simd_mask& operator|=(simd_mask& lhs, const simd_mask& rhs) noexcept; -friend simd_mask& operator^=(simd_mask& lhs, const simd_mask& rhs) noexcept; - + template<class U> simd_mask(const simd_mask<U, simd_abi::fixed_size<size()>>& x) noexcept; - These operators perform the indicated binary element-wise operation. + Constructs an object of type simd_mask where the i-th element equals x[i] for all i[0, size()). - + - lhs. + This constructor shall not participate in overload resolution unless abi_type is simd_abi::fixed_size<size()>. - + -
    - - + - -friend simd_mask operator==(const simd_mask&, const simd_mask&) noexcept; -friend simd_mask operator!=(const simd_mask&, const simd_mask&) noexcept; - + template<class Flags> simd_mask(const value_type* mem, Flags); - + - An object initialized with the results of the element-wise application of the indicated operator. + If the template parameter Flags is vector_aligned_tag, mem shall point to storage aligned by memory_alignment_v<simd_mask>. If the template parameter Flags is overaligned_tag<N>, mem shall point to storage aligned by N. If the template parameter Flags is element_aligned_tag, mem shall point to storage aligned by alignof(U). [mem, mem + size()) is a valid range. - + + + + + Constructs an object where the i-th element is initialized to mem[i] for all i[0, size()). + + + + + + This constructor shall not participate in overload resolution unless is_simd_flag_type_v<Flags> is true. + + - + +

    simd_mask copy functions

    + - template<class T, class Abi> bool all_of(const simd_mask<T, abi>& k) noexcept; + template<class Flags> void copy_from(const value_type* mem, Flags); - + - true if all boolean elements in k are true, false otherwise. + If the template parameter Flags is vector_aligned_tag, mem shall point to storage aligned by memory_alignment_v<simd_mask>. If the template parameter Flags is overaligned_tag<N>, mem shall point to storage aligned by N. If the template parameter Flags is element_aligned_tag, mem shall point to storage aligned by alignof(U). [mem, mem + size()) is a valid range. - - + - - template<class T, class Abi> bool any_of(const simd_mask<T, abi>& k) noexcept; + + + Replaces the elements of the simd_mask object such that the i-th element is replaced with mem[i] for all i[0, size()). + + + - + - true if at least one boolean element in k is true, false otherwise. + This function shall not participate in overload resolution unless is_simd_flag_type_v<Flags> is true. - + - template<class T, class Abi> bool none_of(const simd_mask<T, abi>& k) noexcept; + template<class Flags> void copy_to(value_type* mem, Flags); - + - true if none of the one boolean elements in k is true, false otherwise. + If the template parameter Flags is vector_aligned_tag, mem shall point to storage aligned by memory_alignment_v<simd_mask>. If the template parameter Flags is overaligned_tag<N>, mem shall point to storage aligned by N. If the template parameter Flags is element_aligned_tag, mem shall point to storage aligned by alignof(U). [mem, mem + size()) is a valid range. - - - - - template<class T, class Abi> bool some_of(const simd_mask<T, abi>& k) noexcept; + - + - true if at least one of the one boolean elements in k is true and at least one of the boolean elements in k is false, false otherwise. + Copies all simd_mask elements as if mem[i] = operator[](i) for all i[0, size()). - - - - - template<class T, class Abi> int popcount(const simd_mask<T, Abi>& k) noexcept; + - + - The number of boolean elements in k that are true. + This function shall not participate in overload resolution unless is_simd_flag_type_v<Flags> is true. - + +
    + + +

    simd_mask subscript operators

    - template<class T, class Abi> int find_first_set(const simd_mask<T, Abi>& k); + reference operator[](size_t i); - any_of(k) returns true. + i < size(). - The lowest element index i where k[i] is true. + A reference (see ) referring to the i-th element. + + + + Nothing. + + - template<class T, class Abi> int find_last_set(const simd_mask<T, Abi>& k); + value_type operator[](size_t i) const; - any_of(k) returns true. + i < size(). - The greatest element index i where k[i] is true. + The value of the i-th element. + + + + Nothing. + + +
    + + +

    simd_mask unary operators

    - -bool all_of(see-below) noexcept; -bool any_of(see-below) noexcept; -bool none_of(see-below) noexcept; -bool some_of(see-below) noexcept; -int popcount(see-below) noexcept; - + simd_mask operator!() const noexcept; - all_of and any_of return their arguments; none_of returns the negation of its argument; some_of returns false; popcount returns the integral representation of its argument; find_first_set and find_last_set return 0. + The result of the element-wise appliation of operator!. - - - - The functions shall not participate in overload resolution unless the argument is of type bool. - - +
    +
    + + +

    simd_mask non-member operations

    + + +

    simd_mask binary operators

    -int find_first_set(see-below) noexcept; -int find_last_set(see-below) noexcept; +friend simd_mask operator&&(const simd_mask&, const simd_mask&) noexcept; +friend simd_mask operator||(const simd_mask&, const simd_mask&) noexcept; +friend simd_mask operator& (const simd_mask&, const simd_mask&) noexcept; +friend simd_mask operator| (const simd_mask&, const simd_mask&) noexcept; +friend simd_mask operator^ (const simd_mask&, const simd_mask&) noexcept; - - - The value of the argument is true. - - - - 0. + A simd_mask object initialized with the results of the element-wise appliation of the indicated operator. - - - - The functions shall not participate in overload resolution unless the argument is of type bool. - -
    + - -

    Where functions

    + +

    simd_mask compound assignment

    - - + + +friend simd_mask& operator&=(simd_mask& lhs, const simd_mask& rhs) noexcept; +friend simd_mask& operator|=(simd_mask& lhs, const simd_mask& rhs) noexcept; +friend simd_mask& operator^=(simd_mask& lhs, const simd_mask& rhs) noexcept; + + + + + These operators perform the indicated binary element-wise operation. + + + + + + lhs. + + + +
    + + +

    simd_mask comparisons

    + + + +friend simd_mask operator==(const simd_mask&, const simd_mask&) noexcept; +friend simd_mask operator!=(const simd_mask&, const simd_mask&) noexcept; + + + + + An object initialized with the results of the element-wise application of the indicated operator. + + + +
    + + +

    simd_mask reductions

    + + + template<class T, class Abi> bool all_of(const simd_mask<T, abi>& k) noexcept; + + + + true if all boolean elements in k are true, false otherwise. + + + + + + template<class T, class Abi> bool any_of(const simd_mask<T, abi>& k) noexcept; + + + + true if at least one boolean element in k is true, false otherwise. + + + + + + template<class T, class Abi> bool none_of(const simd_mask<T, abi>& k) noexcept; + + + + true if none of the one boolean elements in k is true, false otherwise. + + + + + + template<class T, class Abi> bool some_of(const simd_mask<T, abi>& k) noexcept; + + + + true if at least one of the one boolean elements in k is true and at least one of the boolean elements in k is false, false otherwise. + + + + + + template<class T, class Abi> int popcount(const simd_mask<T, Abi>& k) noexcept; + + + + The number of boolean elements in k that are true. + + + + + + template<class T, class Abi> int find_first_set(const simd_mask<T, Abi>& k); + + + + any_of(k) returns true. + + + + + + The lowest element index i where k[i] is true. + + + + + + template<class T, class Abi> int find_last_set(const simd_mask<T, Abi>& k); + + + + any_of(k) returns true. + + + + + + The greatest element index i where k[i] is true. + + + + + + +bool all_of(see below) noexcept; +bool any_of(see below) noexcept; +bool none_of(see below) noexcept; +bool some_of(see below) noexcept; +int popcount(see below) noexcept; + + + + + all_of and any_of return their arguments; none_of returns the negation of its argument; some_of returns false; popcount returns the integral representation of its argument. + + + + + + The functions shall not participate in overload resolution unless the argument is of type bool. + + + + + + +int find_first_set(see below) noexcept; +int find_last_set(see below) noexcept; + + + + + The value of the argument is true. + + + + + + 0. + + + + + + The functions shall not participate in overload resolution unless the argument is of type bool. + + + +
    + + +

    Where functions

    + + + template<class T, class Abi> where_expression<simd_mask<T, Abi>, simd<T, Abi>> where(const typename simd<T, Abi>::mask_type& k, simd<T, Abi>& v) noexcept; @@ -2898,47 +2886,46 @@

    Where functions

    const simd_mask<T, Abi>& v) noexcept;
    - - - An object [parallel.simd.wherexpr] with mask and data initialized with k and v respectively. - - -
    + + + An object () with mask and data initialized with k and v respectively. + + + - - -template<class T> where_expression<bool T> where(see-below k, T& v) noexcept; + + +template<class T> where_expression<bool T> where(see below k, T& v) noexcept; template<class T> -const_where_expression<bool, T> where(see-below k, const T& v) noexcept; +const_where_expression<bool, T> where(see below k, const T& v) noexcept; - - - The functions shall not participate in overload resolution unles + + + The functions shall not participate in overload resolution unless - -
  • - - T is neither a simd nor a simd_mask specialization, and - -
  • +
      +
    • + + T is neither a simd nor a simd_mask specialization, and + +
    • -
    • - - the first argument is of type bool. - -
    • - - - +
    • + + the first argument is of type bool. + +
    • +
    +
    +
    - - - And object [parallel.simd.whereexpr] with mask and data initialized with k and v respectively. - - -
    -
    + + + An object () with mask and data initialized with k and v respectively. + + +
    diff --git a/main.html b/main.html index d1fbaf0..6dbee67 100644 --- a/main.html +++ b/main.html @@ -12,8 +12,8 @@ - + diff --git a/task_block.html b/task_block.html index fbbb0db..00631a4 100644 --- a/task_block.html +++ b/task_block.html @@ -240,7 +240,7 @@

    Function template define_task_block

    The define_task_block function may return on a thread other than the one on which it was called unless there are no task blocks active on entry to define_task_block (see ), in which case the function returns on the original thread. When define_task_block returns on a different thread, - it synchronizes with operations following the call. The return from define_task_block is ordered + it synchronizes with operations following the call. The return from define_task_blockdefine_task_block is ordered similarly to an ordinary function call in a single thread. The define_task_block_restore_thread function always returns on the same thread as the one on which it was called.