diff --git a/README.md b/README.md index f489398..395d319 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,12 @@ NYYYY is document NXXXX reformatted as a PDTS draft document. * Apply the proposed wording in [P0214R9](https://wg21.link/P0214R9) (Data-Parallel Vector Types & Operations). +## Editorial Changes + +* Reordered Execution Policy and Parallel Exceptions clauses. +* Various typographical corrections. + # Acknowledgements -Thanks to Alisdair Meredith for suggesting editorial changes. +Thanks to Alisdair Meredith, Matthias Kretz, and Marshall Clow for reviewing and suggesting editorial changes. diff --git a/algorithms.html b/algorithms.html index c5ded45..881bce8 100644 --- a/algorithms.html +++ b/algorithms.html @@ -19,7 +19,7 @@
An evaluation A is ordered before an evaluation B if A is deterministically @@ -56,7 +56,7 @@
In the following, Xi and Xj refer to evaluations of the same expression @@ -78,7 +78,6 @@
Let f be a function called for each argument list in a sequence of argument lists. @@ -97,7 +96,6 @@
simd
and simd_mask
class templates. A data-parallel object is an object of data-parallel type.
- +
@@ -44,16 +44,17 @@ Header
namespace std::experimental {
inline namespace parallelism_v2 {
-namespace simd_abi {
-
- struct scalar {};
- template<int N> struct fixed_size {};
- template<class T> inline constexpr int max_fixed_size = implementation-defined;
- template<class T> using compatible = implementation-defined;
- template<class T> using native = implementation-defined;
-
- template<class T, size_t N> struct deduce { using type = see-below; };
- template<class T, size_t N> using deduce_t = typename deduce<T, N>::type;
+ namespace simd_abi {
+
+ struct scalar {};
+ template<int N> struct fixed_size {};
+ template<class T> inline constexpr int max_fixed_size = implementation-defined;
+ template<class T> using compatible = implementation-defined;
+ template<class T> using native = implementation-defined;
+
+ template<class T, size_t N> struct deduce { using type = see below; };
+ template<class T, size_t N> using deduce_t = typename deduce<T, N>::type;
+ }
struct element_aligned_tag {};
struct vector_aligned_tag {};
@@ -62,7 +63,7 @@ <experimental/simd>
synopsisHeader
inline constexpr vector_aligned_tag vector_aligned{};
template<size_t N> inline constexpr overaligned_tag<N> overaligned{};
- <experimental/simd>
synopsisHeader
template<class T, class U = typename T::value_type>
inline constexpr size_t memory_alignment_v = memory_alignment<T,U>::value;
- <experimental/simd>
synopsisHeader
array<V, simd_size_v<typename V::value_type, Abi> / V::size()> split(const simd_mask<typename V::value_type, Abi>&);
template<class T, class... Abis>
- simd<T, simd_abi::deduce_t>T, (simd_size_v<T, Abis> + ...)>> concat(const simd<T, Abis>&...);
+ simd<T, simd_abi::deduce_t<T, (simd_size_v<T, Abis> + ...)>> concat(const simd<T, Abis>&...);
template<class T, class... Abis>
- simd_mask<T, simd_abi::deduce_t>T, (simd_size_v<T, Abis> + ...)>> concat(const simd_mask<T, Abis>&...);
+ simd_mask<T, simd_abi::deduce_t<T, (simd_size_v<T, Abis> + ...)>> concat(const simd_mask<T, Abis>&...);
- <experimental/simd>
synopsisHeader
template<class T, class Abi> int find_first_set(const simd_mask<T, Abi>&);
template<class T, class Abi> int find_last_set(const simd_mask<T, Abi>&);
- bool all_of(see-below) noexcept;
- bool any_of(see-below) noexcept;
- bool none_of(see-below) noexcept;
- bool some_of(see-below) noexcept;
- int popcount(see-below) noexcept;
- int find_first_set(see-below) noexcept;
- int find_last_set(see-below) noexcept;
+ bool all_of(see below) noexcept;
+ bool any_of(see below) noexcept;
+ bool none_of(see below) noexcept;
+ bool some_of(see below) noexcept;
+ int popcount(see below) noexcept;
+ int find_first_set(see below) noexcept;
+ int find_last_set(see below) noexcept;
- <experimental/simd>
synopsis
<experimental/simd>
synopsis<experimental/simd>
defines class templates, tag types, trait types, and function templates for element-wise operations on data-parallel objects.
- simd
ABI tags-namespace std::experimental { -inline namespace parallelism_v2 { ++ -+ + +
simd
ABI tagsnamespace simd_abi { struct scalar {}; template<int N> struct fixed_size {}; @@ -215,59 +214,71 @@- +template<class T> using compatible = implementation-defined; template<class T> using native = implementation-defined; } -} -} -
simd
ABI tags
-
- An ABI tag is a type in the std::experimental::parallelism_v2::simd_abi
namespace that indicates a choice of size and binary representation for objects of data-parallel type. simd
and simd_mask
.
+
+ An ABI tag is a type in the std::experimental::parallelism_v2::simd_abi
namespace that indicates a choice of size and binary representation for objects of data-parallel type. simd
and simd_mask
.
+
+
-
- Use of the scalar
tag type requires data-parallel types to store a single element (i.e., simd<T, simd_abi::scalar>::size()
returns 1). scalar
is not an alias for fixed_size<1>
.
+
+
-
- The value of max_fixed_size<T>
is at least 32.
-
-
+
+ Use of the scalar
tag type requires data-parallel types to store a single element (i.e., simd<T, simd_abi::scalar>::size()
returns 1). scalar
is not an alias for fixed_size<1>
.
-
- Use of the
+
+ The value of
+
+ Use of the simd_abi::fixed_size<N>
tag type requires data-parallel types to store N
elements (i.e. simd<T, simd_abi::fixed_size<N>>::size()
is N
). simd<T, fixed_size<N>>
and simd_mask<T, fixed_size<N>>
with N > 0
and N <= max_fixed_size<T>
is supported. Additionally, for every supported simd<T, Abi>
(see Abi
is an ABI tag is not a specialization of simd_abi::fixed_size
, N == simd<T, Abi>::size()
is true.
+ max_fixed_size<T>
is at least 32.
+
+ simd<T, fixed_size<T, fixed_size<N>>
with N > max_fixed_size<T>
is supported. The value of max_fixed_size<T>
can depend on compiler flags and can change between different compiler versions.simd_abi::fixed_size<N>
tag type requires data-parallel types to store N
elements (i.e. simd<T, simd_abi::fixed_size<N>>::size()
is N
). simd<T, fixed_size<N>>
and simd_mask<T, fixed_size<N>>
with N > 0
and N <= max_fixed_size<T>
shall be supported. Additionally, for every supported simd<T, Abi>
(see Abi
is an ABI tag that is not a specialization of simd_abi::fixed_size
, N == simd<T, Abi>::size()
shall be supported.
+
+ simd
and simd_mask
specializations using the same simd_abi::fixed_size<N>
tag. Otherwise, the efficiency of simd<T, Abi>
is likely to be better than for simd<T, fixed_size<simd_size_v<T, Abi>>>
(with Abi
not a specialization of simd_abi::fixed_size
).
+
+ simd<T, fixed_size<T, fixed_size<N>>
with N > max_fixed_size<T>
is supported. The value of max_fixed_size<T>
can depend on compiler flags and can change between different compiler versions.
-
- An implementation may define additional extended ABI tag types in the std::experimental::parallelism_v2::simd_abi
namespace, to support other forms of data-parallel computation.
-
-
+
+ simd
and simd_mask
specializations using the same simd_abi::fixed_size<N>
tag. Otherwise, the efficiency of simd<T, Abi>
is likely to be better than for simd<T, fixed_size<simd_size_v<T, Abi>>>
(with Abi
not a specialization of simd_abi::fixed_size
).
-
-
+
+ An implementation may define additional extended ABI tag types in the
+
+ compatible<T>
is an implementation-defined alias for an ABI tag. T
that ensures ABI compatibility between translation units on the target architecture.std::experimental::parallelism_v2::simd_abi
namespace, to support other forms of data-parallel computation.
+
+ compatible<T>
is an implementation-defined alias for an ABI tag. T
that ensures ABI compatibility between translation units on the target architecture.
+
-
-
- __simd128
and __simd256
, where the __simd256
type requires an optional ISA extension on said architecture. Also, the target architecture does not support long double
with either ABI tag. The implementation therefore defines
+ [ Example: Consider a target architecture supporting the extended ABI tags __simd128
and __simd256
, where the __simd256
type requires an optional ISA extension on said architecture. Also, the target architecture does not support long double
with either ABI tag. The implementation therefore defines
-
compatible<T>
as an alias for __simd128
for all vectorizable T
, except long double
, and
+ compatible<T>
is an alias for __simd128
for all vectorizable T
, except long double
, and
simd
ABI tagscompatible<long double>
as an alias for scalar
.
-
- native<T>
is an implementation-defined alias for an ABI tag. T
that is supported on the currently targeted system. For target architectures without ISA extensions, the native<T>
and compatible<T>
aliases will likely be the same. For target architectures with ISA extensions, compiler flags may influence the native<T>
alias while compatible<T>
will be the same independent of such flags.
-
+
+
+ native<T>
is an implementation-defined alias for an ABI tag. T
that is supported on the currently targeted system. For target architectures without ISA extensions, the native<T>
and compatible<T>
aliases will likely be the same. For target architectures with ISA extensions, compiler flags may influence the native<T>
alias while compatible<T>
will be the same independent of such flags.
+
+
+ [ Example: Consider a target architecture supporting the extended ABI tags __simd128
and __simd256
, where hardware support for __simd256
only exists for floating-point types. The implementation therefore defines native<T>
as an alias for
+
+
+
+
+ — end example ]
+
+
+ __simd256
if T
is a floating-point type, and
+
+ __simd128
otherwise.
+
+
template<T, size_t N> struct deduce { using type = see below; };+ -
__simd128
and __simd256
, where hardware support for __simd256
only exists for floating-point types. The implementation therefore defines native<T>
as an alias for
+
+
+ The member type
shall be present if and only if
-
__simd256
if T
is a floating-point type, and
+ T
is a vectorizable type, and
__simd128
otherwise.
+ simd_abi::fixed_size<N>
is supported (see
++ + Where present, the member typedef
type
shall name an ABI tag type that satisfies + -namespace std::experimental { -inline namespace parallelism_v2 { -namespace simd_abi { +
simd_size<T, type> == N
, and
+
+ simd<T, type>
is default constructible (see
-
- The member
+
+ The behavior of a program that adds specializations for type
is present if and only if
+
+ If N
is 1
, the member typedef type
is simd_abi::scalar
. Otherwise, if there are multiple ABI tag types that satisfy the constraints, the member typedef type
is implementation-defined. simd_abi::fixed_size<N>
.T
is a vectorizable type, and
-
- deduce
is undefined.
+
+
simd_abi::fixed_size<N>
is supported (see simd
type traits
-
- Where present, the member typedef
+
+ The type type
shall name an ABI tag type that satisfies
-
+
+ template<class T> struct is_abi_tag { see below };
+
- simd_size<T, type> == N
, and
-
- is_abi_tag<T>
is a UnaryTypeTrait
with a BaseCharacteristic
of true_type
if T
is a standard or extended ABI tag, and false_type
otherwise.
+
+ simd<T, type>
is default constructible (see
+
+ The behavior of a program that adds specializations for is_abi_tag
is undefined.
+
+
template<class T> struct is_simd { see below };
+
-
- If N
is 1
, the member typedef type
is simd_abi::scalar
. Otherwise, if there are multiple ABI tag types that satisfy the constraints, the member typedef type
is implementation-defined. simd_abi::fixed_size<N>
.
+
+ The type is_simd<T>
is a UnaryTypeTrait
with a BaseCharacteristic
of true_type
if T
is a specialization of the simd
class template, and false_type
otherwise.
+
+
-
- The behavior of a program that adds specializations for deduce
is undefined.
-
-
+
+ The behavior of a program that adds specializations for is_simd
is undefined.
+
+
simd
type traitstemplate<class T> struct is_simd_mask { see below };
-
- template<class> struct is_abi_tag { see-below };
-
+
+
+ The type is_simd_mask<T>
is a UnaryTypeTrait
with a BaseCharacteristic
of true_type
if T
is a specialization of the simd_mask
class template, and false_type
otherwise.
+
+
-
- The type is_abi_tag<T>
is a UnaryTypeTrait
with a BaseCharacteristic
of true_type
if T
is a standard or extended ABI tag, and false_type
otherwise.
-
-
+
+ The behavior of a program that adds specializations for is_simd_mask
is undefined.
+
+
-
- The behavior of a program that adds specializations for is_abi_tag
is undefined.
-
-
template<class T> struct is_simd_flag_type { see below };
+
-
- template<class> struct is_simd { see-below };
-
+
+
+ The type
-
- The type
-
- The behavior of a program that adds specializations for
-
- The type
-
- The behavior of a program that adds specializations for
-
- The type
- and is_simd_flag_type<class T>
is a UnaryTypeTrait
with a BaseCharacteristic
of true_type
if T
is one of
- is_simd<T>
is a UnaryTypeTrait
with a BaseCharacteristic
of true_type
if T
is a specialization of the simd
class template, and false_type
otherwise.
-
-
+
- element_aligned_tag
, or
+
+ is_simd
is undefined.
-
- vector_aligned_tag
, or
+
+ is_simd_mask<T>
is a UnaryTypeTrait
with a BaseCharacteristic
of true_type
if T
is a specialization of the simd_mask
class template, and false_type
otherwise.
-
- overaligned_tag<N>
with N > 0
and N
an integral power of two,
+
+ is_simd_mask
is undefined.
-
- template<class T> struct is_simd_flag_type { see-below };
-
-
- is_simd_flag_type<class T>
is a UnaryTypeTrait
with a BaseCharacteristic
of true_type
if T
is one of
-
- element_aligned_tag
, or
-
- vector_aligned_tag
, or
-
- overaligned_tag<N>
with N > 0
and N
an integral power of two,
-
-
+
+ and false_type
otherwise.
+
+
+
+ false_type
otherwise.
+ The behavior of a program that adds specializations for is_simd_flag_type
is undefined.
-
-
- The behavior of a program that adds specializations for is_simd_flag_type
is undefined.
-
-
template<class T, class Abi = simd_abi::compatible<T>> struct simd_size { see-below };
-
-
-
-
- simd_size<T, Abi>
has a member value
if and only if
-
- T
is a vectorizable type, and
-
- is_abi_tag_v<Abi>
is true
.
-
-
+
template<class T, class Abi = simd_abi::compatible<T>> struct simd_size { see below };
-
-
-
-
-
- If value
is present, the type simd_size<T, Abi>
is a BinaryTypeTrait
with a BaseCharacteristic
of integral_constant<size_t, N>
with N
equal to the number of elements in a simd<T, Abi>
object. simd<T, Abi>
is not supported for the currently targeted system, simd_size<T, Abi>::value
produces the value simd<T, Abi>::size()
would return if it were supported.
-
- The behavior of a program that adds specializations for simd_size
is undefined.
-
-
template<class T, class U = typename T::value_type> struct memory_alignment { see-below };
-
+
+
+
-
- simd_size<T, Abi>
shall have a member value
if and only if
- memory_alignment<T, U>
has a member value
if and only if
-
+
+
- T
is a vectorizable type, and
+
+ is_simd_mask_v<T>
is true
and U
is bool
, or
-
- is_abi_tag_v<Abi>
is true
.
+
+ is_simd_v<T>
is true
and U
is a vectorizable type.
+
-
- If value
is present, the type memory_alignment<T, U>
is a BinaryTypeTrait
with a BaseCharacteristic
of integral_constant<size_t, N>
for some implementation-defined N
(see value
identifies the alignment restrictions on pointers used for (converting) loads and stores for the give type T
on arrays of type U
.
-
- The behavior of a program that adds specializations for memory_alignment
is undefined.
-
-
const_where_expression
and where_expression
-namespace std::experimental { -inline namespace parallelism_v2 { - template<class M, class T> class const_where_expression { - const M mask; // exposition only - T& data; // exposision only - - public: - const_where_expression(const const_where_expression&) = delete; - const_where_expression& operator=(const const_where_expression&) = delete; - - T operator-() const &&; - T operator+() const &&; - T operator~() const &&; - - template<class U, class Flags> void copy_to(U* mem, Flags f) const &&; - }; - - template<class M, class T> - class where_expression : public const_where_expression<M, T> { - public: - template<class U> void operator=(U&& x) &&; - template<class U> void operator+=(U&& x) &&; - template<class U> void operator-=(U&& x) &&; - template<class U> void operator*=(U&& x) &&; - template<class U> void operator/=(U&& x) &&; - template<class U> void operator%=(U&& x) &&; - template<class U> void operator&=(U&& x) &&; - template<class U> void operator|=(U&& x) &&; - template<class U> void operator^=(U&& x) &&; - template<class U> void operator<<=(U&& x) &&; - template<class U> void operator>>=(U&& x) &&; - void operator++() && - void operator++(int) && - void operator--() && - void operator--(int) && - - template<class U, class Flags> void copy_from(const U* mem, Flags) &&; - }; -} -} -- - -
-
- The class templates const_where_expression
and where_expression
abstract the notion of selecting elements of a given object of arithmetic or data-parallel type.
-
-
-
- The first templates argument M
shall be cv-unqualified bool
or a cv-unqualified simd_mask
specialization.
-
-
-
- If M
is bool
, T
shall be a cv-unqualified arithmetic type. Otherwise, T
shall either be M
or typename M::simd_type
.
-
-
-
- In this subclause, data[0]
is used interchangably for data
, mask[0]
is used interchangably for mask
, and M::size()
is used interchangably for 1
.
-
-
-
- The selected indices signify the integers i
∊ {j ∊ ℕ0 ∣ j < M::size()
⋀ mask[
j]
}. The selected elements signify the elements data[i]
for all selected indices i
.
-
-
-
- In this subclause, the value_type
is an alias for T
if M
is bool
, or an alias for typename T::value_type
if is_simd_mask_v<M>
is true
.
-
-
- -
-
- where
functions mask
with the first argument to where
and data
with the second argument to where
.
- data
with the indicated unary operator applied to all selected elements.
-
- value
is present, the type simd_size<T, Abi>
is a BinaryTypeTrait
with a BaseCharacteristic
of integral_constant<size_t, N>
with N
equal to the number of elements in a simd<T, Abi>
object. simd<T, Abi>
is not supported for the currently targeted system, simd_size<T, Abi>::value
produces the value simd<T, Abi>::size()
would return if it were supported.
- simd_size
is undefined.
-
template<class T, class U = typename T::value_type> struct memory_alignment { see below };
-
- If the template parameter Flags
is vector_aligned_tag
, mem
shall point to storage aligned by memory_alignment_v<T, U>
. If the template parameter flags
is overaligned_tag<N>
, mem
shall point to storage aligned by N
. If the template parameter Flags
is element_aligned_tag
, mem
shall point to storage aligned by alignof(U)
. If M
is not bool
, the largest i ∊ [0, M::size())
where mask[i]
is true
is less than the number of values pointed to by mem
.
+ memory_alignment<T, U>
shall have a member value
if and only if
-
is_simd_mask_v<T>
is true
and U
is bool
, or
+
+ is_simd_v<T>
is true
and U
is a vectorizable type.
+
+
- Copies the selected elements as if mem[i] = static_cast<U>(data[i])
for all selected indices i
.
+ If value
is present, the type memory_alignment<T, U>
is a BinaryTypeTrait
with a BaseCharacteristic
of integral_constant<size_t, N>
for some implementation-defined N
(see value
identifies the alignment restrictions on pointers used for (converting) loads and stores for the give type T
on arrays of type U
.
- Nothing.
+ The behavior of a program that adds specializations for memory_alignment
is undefined.
-
const_where_expression
and where_expression
is_simd_flag_type_v<Flags>
is true
, and
- +template<class M, class T> class const_where_expression { + const M mask; // exposition only + T& data; // exposition only -
U
is bool
and value_type
is bool
, or
- U
is a vectorizable type and value_type
is not bool
.
- data[i]
with static_cast<T>(std::forward<U>(x))[i]
for all selected indices i
.
+template<class M, class T>
+class where_expression : public const_where_expression<M, T> {
+public:
+ template<class U> void operator=(U&& x) &&;
+ template<class U> void operator+=(U&& x) &&;
+ template<class U> void operator-=(U&& x) &&;
+ template<class U> void operator*=(U&& x) &&;
+ template<class U> void operator/=(U&& x) &&;
+ template<class U> void operator%=(U&& x) &&;
+ template<class U> void operator&=(U&& x) &&;
+ template<class U> void operator|=(U&& x) &&;
+ template<class U> void operator^=(U&& x) &&;
+ template<class U> void operator<<=(U&& x) &&;
+ template<class U> void operator>>=(U&& x) &&;
+
+ void operator++() &&
+ void operator++(int) &&
+ void operator--() &&
+ void operator--(int) &&
+
+ template<class U, class Flags> void copy_from(const U* mem, Flags) &&;
+};
+
-
- This operator shall not participate in overload resolution unless U
is convertible to T
.
+ The class templates const_where_expression
and where_expression
abstract the notion of selecting elements of a given object of arithmetic or data-parallel type.
-
- Replaces data[i]
with static_cast<T>(data @ std::forward<U>(x))[i]
(where @
denotes the indicated operator) for all selected indices i
.
+ The first templates argument M
shall be cv-unqualified bool
or a cv-unqualified simd_mask
specialization.
-
- Each of these operators shall not participate in overload resolution unless the return type of data @ std::forward<U>(x)
is convertible to T
. It is unspecified whether the binary operator, implied by the compound assignment operator, is executed on all elements or only on the selected elements.
+ If M
is bool
, T
shall be a cv-unqualified arithmetic type. Otherwise, T
shall either be M
or typename M::simd_type
.
-
- Applies the indicated operator to the selected elements.
+ In this subclause, if M
is bool
, data[0]
is used interchangably for data
, mask[0]
is used interchangably for mask
, and M::size()
is used interchangably for 1
.
-
- Each of these operators shall not participate in overload resolution unless the indicated operator can be applied to objects of type T
.
+ The selected indices signify the integers i
∊ {j ∊ ℕ0 ∣ j < M::size()
⋀ mask[
j]
}. The selected elements signify the elements data[i]
for all selected indices i
.
-
- If the template parameter Flags
is vector_aligned_tag
, mem
shall point to storage aligned by memory_alignment_v<T, U>
. If the template parameter Flags
is overaligned_tag<N>
, mem
shall point to storage aligned by N
. If the template parameter Flags
is element_aligned_tag
, mem
shall point to storage aligned by alignon(U)
. If is_simd_flag_type_v<U>
is true
, for all selected indices i, i shall be less than the number of values pointed to by mem
.
+ In this subclause, the type value_type
is an alias for T
if M
is bool
, or an alias for typename T::value_type
if is_simd_mask_v<M>
is true
.
-
- Replaces the selected elements as if data[i] = static_cast<value_type>(mem[i])
for all selected indices i
.
+ where
functions mask
with the first argument to where
and data
with the second argument to where
.
is_simd_flag_type_v<Flags>
is true
, and
- U
is bool
and value_type
is bool
, or
- U
is a vectorizable type and value_type
is not bool
.
- data
with the indicated unary operator applied to all selected elements.
+
+ Flags
is vector_aligned_tag
, mem
shall point to storage aligned by memory_alignment_v<T, U>
. If the template parameter Flags
is overaligned_tag<N>
, mem
shall point to storage aligned by N
. If the template parameter Flags
is element_aligned_tag
, mem
shall point to storage aligned by alignof(U)
. If M
is not bool
, the largest i ∊ [0, M::size())
where mask[i]
is true
is less than the number of values pointed to by mem
.
+
+ mem[i] = static_cast<U>(data[i])
for all selected indices i
.
+
+ is_simd_flag_type_v<Flags>
is true
, and
+ U
is bool
and value_type
is bool
, or
+ U
is a vectorizable type and value_type
is not bool
.
+ data[i]
with static_cast<T>(std::forward<U>(x))[i]
for all selected indices i
.
+
+ U
is convertible to T
.
+
+ data[i]
with static_cast<T>(data @ std::forward<U>(x))[i]
(where @
denotes the indicated operator) for all selected indices i
.
+
+ data @ std::forward<U>(x)
is convertible to T
. It is unspecified whether the binary operator, implied by the compound assignment operator, is executed on all elements or only on the selected elements.
+
+ T
.
+
+ Flags
is vector_aligned_tag
, mem
shall point to storage aligned by memory_alignment_v<T, U>
. If the template parameter Flags
is overaligned_tag<N>
, mem
shall point to storage aligned by N
. If the template parameter Flags
is element_aligned_tag
, mem
shall point to storage aligned by alignof(U)
. If is_simd_flag_type_v<U>
is true
, for all selected indices i, i shall be less than the number of values pointed to by mem
.
+
+ data[i] = static_cast<value_type>(mem[i])
for all selected indices i
.
+
+ is_simd_flag_type_v<Flags>
is true
, and
+ U
is bool
and value_type
is bool
, or
+ U
is a vectorizable type and value_type
is not bool
.
+ simd
overview-namespace std::experimental { -inline namespace parallelism_v2 { - template<class T, class Abi> class simd { - public: - using value_type = T; - using reference = see-below; - using mask_type = simd_mask<T, Abi> - using abi_type = Abi; - - static constexpr size_t size() noexcept; - - simd() = default; - - // implicit conversion constructor - template<class U> simd(const simd<U, simd_abi::fixed_size<size()>>&); - - // implicit broadcast constructor (see below for constraints) - template<class U> simd(U&& value); - - // generator constructor (see below for constraints) - template<class G> explicit simd(G&& gen); - - // load constructor - template<class U, class Flags> simd(const U* mem, Flags f); - -@@ -929,19 +917,19 @@loads - template<class U, class Flags> copy_from(const U* mem, Flags f); - - stores - template<class U, class Flags> copy_to(U* mem, Flags f); - - scalar access - reference operator[](size_t); - value_type operator[](size_t) const; - - unary operators - simd& operator++(); - simd operator++(int); - simd& operator--(); - simd operator--(int); - mask_type operator!() const; - simd operator~() const; // see below - simd operator+() const; - simd operator-() const; - - binary operators - friend simd operator+(const simd&, const simd&); - friend simd operator-(const simd&, const simd&); - friend simd operator*(const simd&, const simd&); - friend simd operator/(const simd&, const simd&); - friend simd operator%(const simd&, const simd&); - friend simd operator&(const simd&, const simd&); - friend simd operator|(const simd&, const simd&); - friend simd operator^(const simd&, const simd&); - friend simd operator<<(const simd&, const simd&); - friend simd operator>>(const simd&, const simd&); - friend simd operator<<(const simd&, int); - friend simd operator>>(const simd&, int); - - compound assignment - friend simd& operator+=(simd&, const simd&); - friend simd& operator-=(simd&, const simd&); - friend simd& operator*=(simd&, const simd&); - friend simd& operator/=(simd&, const simd&); - friend simd& operator%=(simd&, const simd&); - friend simd& operator&=(simd&, const simd&); - friend simd& operator|=(simd&, const simd&); - friend simd& operator^=(simd&, const simd&); - friend simd& operator<<=(simd&, const simd&); - friend simd& operator>>=(simd&, const simd&); - friend simd& operator<<=(simd&, int); - friend simd& operator>>=(simd&, int); - - compares - friend mask_type operator==(const simd&, const simd&); - friend mask_type operator!=(const simd&, const simd&); - friend mask_type operator>=(const simd&, const simd&); - friend mask_type operator<=(const simd&, const simd&); - friend mask_type operator>(const simd&, const simd&); - friend mask_type operator<(const simd&, const simd&); - }; -} -} +template<class T, class Abi> class simd { +public: + using value_type = T; + using reference = see below; + using mask_type = simd_mask<T, Abi> + using abi_type = Abi; + + static constexpr size_t size() noexcept; + + simd() = default; + + // implicit conversion constructor + template<class U> simd(const simd<U, simd_abi::fixed_size<size()>>&); + + // implicit broadcast constructor (see below for constraints) + template<class U> simd(U&& value); + + // generator constructor (see below for constraints) + template<class G> explicit simd(G&& gen); + + // load constructor + template<class U, class Flags> simd(const U* mem, Flags f); + + + template<class U, class Flags> copy_from(const U* mem, Flags f); + template<class U, class Flags> copy_to(U* mem, Flags f); + + + reference operator[](size_t); + value_type operator[](size_t) const; + + + simd& operator++(); + simd operator++(int); + simd& operator--(); + simd operator--(int); + mask_type operator!() const; + simd operator~() const; + simd operator+() const; + simd operator-() const; + + + friend simd operator+(const simd&, const simd&); + friend simd operator-(const simd&, const simd&); + friend simd operator*(const simd&, const simd&); + friend simd operator/(const simd&, const simd&); + friend simd operator%(const simd&, const simd&); + friend simd operator&(const simd&, const simd&); + friend simd operator|(const simd&, const simd&); + friend simd operator^(const simd&, const simd&); + friend simd operator<<(const simd&, const simd&); + friend simd operator>>(const simd&, const simd&); + friend simd operator<<(const simd&, int); + friend simd operator>>(const simd&, int); + + + friend simd& operator+=(simd&, const simd&); + friend simd& operator-=(simd&, const simd&); + friend simd& operator*=(simd&, const simd&); + friend simd& operator/=(simd&, const simd&); + friend simd& operator%=(simd&, const simd&); + friend simd& operator&=(simd&, const simd&); + friend simd& operator|=(simd&, const simd&); + friend simd& operator^=(simd&, const simd&); + friend simd& operator<<=(simd&, const simd&); + friend simd& operator>>=(simd&, const simd&); + friend simd& operator<<=(simd&, int); + friend simd& operator>>=(simd&, int); + + + friend mask_type operator==(const simd&, const simd&); + friend mask_type operator!=(const simd&, const simd&); + friend mask_type operator>=(const simd&, const simd&); + friend mask_type operator<=(const simd&, const simd&); + friend mask_type operator>(const simd&, const simd&); + friend mask_type operator<(const simd&, const simd&); +};
simd
overviewsimd
shall be a complete type. The specialization simd<T, Abi>
is supported if T
is a vectorizable type and
- Abi
is simd_abi::scalar
, or
-
- Abi
is simd_abi::scalar
, or
+
+ Abi
is simd_abi::fixed_size<N>
, with N
is constrained as defined in Abi
is simd_abi::fixed_size<N>
, with N
is constrained as defined in Abi
is an extended ABI tag, it is implementation-defined whether simd<T, Abi>
is supported. simd
overview__simd_x
and __gpu_y
. When the compiler is invoked to translate to a machine that has support for the __simd_x
ABI tag for all arithmetic types other than long double
and no support for the __gpu_y
ABI tag, then:
- simd<T, simd_abi::__gpu_y>
is not supported for any T
and has a deleted constructor.
simd
overviewsimd<long double, simd_abi::scalar>
is supported.
@@ -1009,7 +997,7 @@ Class template
simd
overview
[ Example:
- Consider an implementation that supports the type __vec4f
and the function __vec4f
, _vec4f_addsub(__vec4f, __vec4f)
for the currently targeted system.
+ Consider an implementation that supports the type __vec4f
and the function __vec4f _vec4f_addsub(__vec4f, __vec4f)
for the currently targeted system.
A user may require the use of _vec4f_addsub
for maximum performance and thus writes:
@@ -1023,7 +1011,7 @@
- Class template
— end example ]
simd
overview
-class reference // exposition only +class reference // exposition only { public: reference() = delete; @@ -1071,10 +1059,9 @@- +Element references
friend void swap(reference&& a, reference&& b) noexcept; friend void swap(value_type&& a, reference&& b) noexcept; friend void swap(reference&& a, value_type&& b) noexcept; - };
declval<value_type &&>() @= std::forward<U>(x)
(where @=
denotes the indicated compound assignment operator) is well-formed.
+ This function shall not participate in overload resolution unless declval<value_type &>() @= std::forward<U>(x)
(where @=
denotes the indicated compound assignment operator) is well-formed.
simd
constructorsFrom
is a vectorizable type and every possibly value of From
can be represented with type value_type
, or
simd
constructorsFrom
is unsigned int
and value_type
is an unsigned integral type.
- simd
constructorsabi_type
is simd_abi::fixed_size<size()>
, and
@@ -1286,7 +1273,7 @@ simd
constructorsU
and value_type
are integral, the integer conversion rank [conv.rank] of value_type
is greater than the integer conversion rank of U
.
simd
constructorsFlags
is vector_aligned_tag
, mem
shall point to storage aligend by memory_alignment_v<simd, U>
. If the template parameter Flags
is overaligned_tag<N>
, mem
shall point to storage aligned by N
. If the template parameter Flags
is element_aligned_tag
, mem
shall point to storage aligned by alignon(U)
. [mem, mem + size())
is a valid range.
+ If the template parameter Flags
is vector_aligned_tag
, mem
shall point to storage aligend by memory_alignment_v<simd, U>
. If the template parameter Flags
is overaligned_tag<N>
, mem
shall point to storage aligned by N
. If the template parameter Flags
is element_aligned_tag
, mem
shall point to storage aligned by alignof(U)
. [mem, mem + size())
is a valid range.
simd
constructorsis_simd_flag_type_v<Flags>
is true
, and
@@ -1343,7 +1330,7 @@ simd
constructorsU
is a vectorizable type.
simd
copy functionsFlags
is vector_aligned_tag
, mem
shall point to storage aligned by memory_alignment_v<simd, U>
. If the template parameter Flags
is overaligned_tag<N>
, mem
shall point to storage aligend by N
. If the template parameter Flags
is element_aligned_tag
, mem
shall point to storage aligned by alignonf(U)
. [mem, mem + size())
is a valid range.
+ If the template parameter Flags
is vector_aligned_tag
, mem
shall point to storage aligned by memory_alignment_v<simd, U>
. If the template parameter Flags
is overaligned_tag<N>
, mem
shall point to storage aligend by N
. If the template parameter Flags
is element_aligned_tag
, mem
shall point to storage aligned by alignof(U)
. [mem, mem + size())
is a valid range.
simd
copy functionsis_simd_flag_type_v<Flags>
is true
, and
simd
copy functionsU
is a vectorizable type.
simd
copy functionsis_simd_flag_type_v<Flags>
is true
, and
simd
copy functionsU
is a vectorizable type.
simd
subscript operatorsreference
(see [parallel.simd.reference]) referring to the i-th element.
+ A reference
(see simd
unary operators*this
before incrementing.
-
+
simd
unary operators*this
before decrementing.
-
+
simd
unary operatorssimd
non-member operationssimd
non-member operationssimd
binary operatorssimd
non-member operationssimd
object initialized with the results of the element-wise application of the indicated operator.
-
- simd
object initialized with the results of the element-wise application of the indicated operator.
+
+ value_type
.
-
- value_type
.
+
+ simd
object where the i-th element is initialized to the result of applying the indicated operator to v[i]
and n
for all i
∊ [0, size())
.
-
- simd
object where the i-th element is initialized to the result of applying the indicated operator to v[i]
and n
for all i
∊ [0, size())
.
+
+ value_type
.
-
- value_type
.
+
+ simd
compound assignmentsimd
compound assignmentsimd
compound assignmentlhs
.
-
- lhs
.
+
+ value_type
.
-
- value_type
.
+
+ simd
compare operatorssimd
compare operatorssimd
compare operatorssimd_mask
object initialized with the results of the element-wise application of the indicated operator.
-
- simd_mask
object initialized with the results of the element-wise application of the indicated operator.
+
+ simd
reductions
+
+ In this subclause, BinaryOperation
shall be a binary element-wise operation.
+
+
binary_op
shall be callable with two arguments of type T
returning T
, or callable with two arguments of type simd<T, A1>
returning simd<T, A1>
for every A1
that is an ABI tag type.
-
- GENERALIZED_SUM(binary_op, x.data[i], ...)
for all i
∊ [0, size())
.
-
- binary_op
shall be callable with two arguments of type T
returning T
, or callable with two arguments of type simd<T, A1>
returning simd<T, A1>
for every A1
that is an ABI tag type.
+
+ binary_op
.
-
- GENERALIZED_SUM(binary_op, x.data[i], ...)
for all i
∊ [0, size())
.
+
+
- reduce
does not require an initial value because x
is guaranteed to be non-empty.
-
-
binary_op
.
+
+ binary_op
shall be callable with two arguments of type T
returning T
, or callable with two arguments of type simd<T, A1>
returning simd<T, A1>
for every A1
that is an ABI tag type. The results of binary_op(identity_element, x)
and binary_op(x, identity_element)
shall be equal to x
for all finite values x
representable by V::value_type
.
-
- binary_op
shall be callable with two arguments of type T
returning T
, or callable with two arguments of type simd<T, A1>
returning simd<T, A1>
for every A1
that is an ABI tag type. The results of binary_op(identity_element, x)
and binary_op(x, identity_element)
shall be equal to x
for all finite values x
representable by V::value_type
.
+
+ none_of(x.mask)
, returns identity_element
. Otherwise, returns GENERALIZED_SUM(binary_op, x.data[i], ...)
for all i
∊ {j ∊ ℕ0 ∣ j < M::size()
⋀ mask[
j]
}.
-
- none_of(x.mask)
, returns identity_element
. Otherwise, returns GENERALIZED_SUM(binary_op, x.data[i], ...)
for all i
∊ {j ∊ ℕ0 ∣ j < M::size()
⋀ mask[
j]
}.
+
+ binary_op
.
-
- binary_op
.
+
+ none_of(x.mask)
, returns 0
. Otherwise, returns GENERALIZED_SUM(binary_op, x.data[i], ...)
for all i
∊ {j ∊ ℕ0 ∣ j < M::size()
⋀ mask[
j]
}.
-
- none_of(x.mask)
, returns 0
. Otherwise, returns GENERALIZED_SUM(binary_op, x.data[i], ...)
for all i
∊ {j ∊ ℕ0 ∣ j < M::size()
⋀ mask[
j]
}.
+
+ none_of(x.mask)
, returns 1
. Otherwise, returns GENERALIZED_SUM(binary_op, x.data[i], ...)
for all i
∊ {j ∊ ℕ0 ∣ j < M::size()
⋀ mask[
j]
}.
-
- none_of(x.mask)
, returns 1
. Otherwise, returns GENERALIZED_SUM(binary_op, x.data[i], ...)
for all i
∊ {j ∊ ℕ0 ∣ j < M::size()
⋀ mask[
j]
}.
+
+ is_integral_v<V::value_type>
is true
.
-
- is_integral_v<V::value_type>
is true
.
+
+ none_of(x.mask)
, returns ~V::value_type()
. Otherwise, returns GENERALIZED_SUM(binary_op, x.data[i], ...)
for all i
∊ {j ∊ ℕ0 ∣ j < M::size()
⋀ mask[
j]
}.
-
- none_of(x.mask)
, returns ~V::value_type()
. Otherwise, returns GENERALIZED_SUM(binary_op, x.data[i], ...)
for all i
∊ {j ∊ ℕ0 ∣ j < M::size()
⋀ mask[
j]
}.
+
+ is_integral_v<V::value_type>
is true
.
-
- is_integral_v<V::value_type>
is true
.
+
+ none_of(x.mask)
, returns 0
. Otherwise, returns GENERALIZED_SUM(binary_op, x.data[i], ...)
for all i
∊ {j ∊ ℕ0 ∣ j < M::size()
⋀ mask[
j]
}.
-
- none_of(x.mask)
, returns 0
. Otherwise, returns GENERALIZED_SUM(binary_op, x.data[i], ...)
for all i
∊ {j ∊ ℕ0 ∣ j < M::size()
⋀ mask[
j]
}.
+
+ x[j]
for which x[j] <= x[i]
for all i
∊ [0, size())
.
-
- x[j]
for which x[j] <= x[i]
for all i
∊ [0, size())
.
+
+ none_of(x.mask)
, the return value is numeric_limits<V::value_type>::max()
. Otherwise, returns the value of an element x.data[j]
for which x.mask[j] == true
and x.data[j] <= x.data[i]
for all i
∊ {j ∊ ℕ0 ∣ j < M::size()
⋀ mask[
j]
}.
-
- none_of(x.mask)
, the return value is numeric_limits<V::value_type>::max()
. Otherwise, returns the value of an element x.data[j]
for which x.mask[j] == true
and x.data[j] <= x.data[i]
for all i
∊ {j ∊ ℕ0 ∣ j < M::size()
⋀ mask[
j]
}.
+
+ x[j]
for which x[j] >= x[i]
for all i
∊ [0, size())
.
-
- x[j]
for which x[j] >= x[i]
for all i
∊ [0, size())
.
+
+ none_of(x.mask)
, the return value is numeric_limits<V::value_type>::lowest()
. Otherwise, returns the value of an element x.data[j]
for which x.mask[j] == true
and x.data[j] >= x.data[i]
for all i
∊ {j ∊ ℕ0 ∣ j < M::size()
⋀ mask[
j]
}.
-
- none_of(x.mask)
, the return value is numeric_limits<V::value_type>::lowest()
. Otherwise, returns the value of an element x.data[j]
for which x.mask[j] == true
and x.data[j] >= x.data[i]
for all i
∊ {j ∊ ℕ0 ∣ j < M::size()
⋀ mask[
j]
}.
+
+ simd
castssimd
casts
-
- Let To
identify T::value_type
if is_simd_v<T>
is true
, or T
otherwise.
-
-
+
+ Let To
identify T::value_type
if is_simd_v<T>
is true
, or T
otherwise.
+
+
simd
object with the i-th element initialized to static_cast<To>(x[i])
for all i
∊ [0, size())
.
-
- simd
object with the i-th element initialized to static_cast<To>(x[i])
for all i
∊ [0, size())
.
+
+ U
can be represented with type To
, and
-
- U
can be represented with type To
, and
+
+ is_simd_v<T>
is false
, or
-
- T::size() == simd<U, Abi>::size()
is true
.
-
- is_simd_v<T>
is false
, or
+
+ T::size() == simd<U, Abi>::size()
is true
.
+
+ - - The return type is - +
+
+ The return type is
+
- T
if is_simd_v<T>
is true
, otherwise
-
-
+
T
if is_simd_v<T>
is true
, otherwise
+
+ simd<T, Abi>
is U
is T
, otherwise
-
- simd<T, Abi>
is U
is T
, otherwise
+
+ simd<T, simd_abi::fixed_size<simd<U, Abi>::size()>>
-
-
-
simd<T, simd_abi::fixed_size<simd<U, Abi>::size()>>
+
+
-
- Let To
identify T::value_type
if is_simd_v<T>
is true
or T
otherwise.
-
-
+
+ Let To
identify T::value_type
if is_simd_v<T>
is true
or T
otherwise.
+
+
simd
object with the i-th element initialized to static_cast<To>(x[i])
for all i
∊ [0, size())
.
-
- simd
object with the i-th element initialized to static_cast<To>(x[i])
for all i
∊ [0, size())
.
+
+ is_simd_v<T>
is false
, or
-
- is_simd_v<T>
is false
, or
+
+ T::size() == simd<U, Abi>::size()
is true
.
-
- T::size() == simd<U, Abi>::size()
is true
.
+
+ - The return type is +
+ The return type is
- T
if is_simd_v<T>
is true
, otherwise
-
-
+
T
if is_simd_v<T>
is true
, otherwise
+
+ simd<T, Abi>
if either U
is T
or U
and T
are integral types that only differ in signedness, otherwise
-
- simd<T, Abi>
if either U
is T
or U
and T
are integral types that only differ in signedness, otherwise
+
+ simd<T, simd_abi::fixed_size<simd<U, Abi>::size()>>
.
-
-
-
simd<T, simd_abi::fixed_size<simd<U, Abi>::size()>>
.
+
+ x[i]
for all i
∊ [0, size())
.
-
- x[i]
for all i
∊ [0, size())
.
+
+ x[i]
for all i
∊ [0, size())
.
-
- x[i]
for all i
∊ [0, size())
.
+
+ simd_size_v<T, simd_abi::native<T>> == N
is true
.
-
- simd_size_v<T, simd_abi::native<T>> == N
is true
.
+
+ x[i]
for all i
∊ [0, size())
.
-
- x[i]
for all i
∊ [0, size())
.
+
+ simd_size_v<T, simd_abi::compatible<T>> == N
is true
.
-
- simd_size_v<T, simd_abi::compatible<T>> == N
is true
.
+
+ tuple
of data-parallel objects with the i-th simd
/simd_mask
element of the j-th tuple
element initialized to the value of the element x
with index i + sum of the first j values in the Sizes
pack.
-
- tuple
of data-parallel objects with the i-th simd
/simd_mask
element of the j-th tuple
element initialized to the value of the element x
with index i + sum of the first j values in the Sizes
pack.
+
+ Sizes
pack is equal to simd_size_v<T, Abi>
.
-
- Sizes
pack is equal to simd_size_v<T, Abi>
.
+
+ array
of data-parallel objects with the i-th simd
/simd_mask
element of the j-th array
element initialized to the value of the element in x
with index i + j * V::size()
.
-
- array
of data-parallel objects with the i-th simd
/simd_mask
element of the j-th array
element initialized to the value of the element in x
with index i + j * V::size()
.
+
+ simd_size_v<typename V::value_type, Abi>
is an integral multiple of V::size()
, and
-
- simd_size_v<typename V::value_type, Abi>
is an integral multiple of V::size()
, and
+
+ simd
parameter is_simd_v<V>
is true
, for the overload with a simd_mask
parameter is_simd_mask_v<V>
is true
.
-
- simd
parameter is_simd_v<V>
is true
, for the overload with a simd_mask
parameter is_simd_mask_v<V>
is true
.
+
+ xs
pack of data-parallel objects: The i-th simd
/simd_mask
element of the j-th parameter in the xs
pack is copied to the return value's element with index i + the sum of the width of the first j parameters in the xs
pack.
-
- simd
algorithmsstd::min(a[i], b[i])
for all i
∊ [0, size())
.
-
- std::max(a[i], b[i])
for all i
∊ [0, size())
.
-
- std::min(a[i], b[i])
for all i
∊ [0, size())
in the first
member, and
-
-
- std::max(a[i], b[i])
for all i
∊ [0, size())
in the second
member, and
-
-
- lo
shall be greater than the corresponding element in hi
.
-
- std::clamp(v[i], lo[i], hi[i])
for all i
∊ [0, size())
.
-
-
- xs
pack of data-parallel objects: The i-th simd
/simd_mask
element of the j-th parameter in the xs
pack is copied to the return value's element with index i + the sum of the width of the first j parameters in the xs
pack.
+
+ simd
math library
-
- For each set of overloaded functions within <cmath>
, there shall be additional overloads sufficient to ensure that if any argument corresponding to a double
parameter has type simd<T, Abi>
, where is_floating_point_v<T>
is true
, then:
-
- double
parameters shall be convertible to simd<T, Abi>
.
-
- double*
parameters shall be of type simd<T, Abi>*
.
-
- U
shall be convertible to fixed_size_simd<U, simd_size_v<T, Abi>>
.
-
- U*
, where U
is integral, shall be of type fixed_size_simd<U, simd_size_v<T, Abi>>*
.
-
- double
, the return type of the additional overloads is simd<T, Abi>
. Otherwise, if the corresponding return type is bool
, the return type of the additional overload is simd_mask<T, Abi>
. Otherwise, the return type is fixed_size_simd<R, simd_size_v<T, Abi>>
, with R
denoting the corresponding return type.
-
-
- simd
algorithms
+ simd<T, Abi>
but are not of type simd<T, Abi>
is well-formed.
+ The result of the element-wise application of std::min(a[i], b[i])
for all i
∊ [0, size())
.
-
-
- Each function overload produced by the above rules applies the indicated <cmath>
function element-wise. The results per element are not required to be bitwise equal to the application of the function which is overloaded for the element type.
-
-
-
- The behavior is undefined if a domain, pole, or range error occurs when the input argument(s) are applied to the indicated <cmath>
function.
-
-
-
- If abs
is called with an argument of type simd<X, Abi>
for which is_unsigned_v<X>
is true
, the program is ill-formed.
-
-
simd_mask
std::max(a[i], b[i])
for all i
∊ [0, size())
.
+
+ simd_mask
overview+-namespace std::experimental { -inline namespace parallelism_v2 { - template<class T, class Abi> class simd_mask { - public: - using value_type = bool; - using reference = see-below; - using simd_type = simd<T, Abi>; - using abi_type = Abi; + A pair initialized with + + ++
+ +- + + the result of element-wise application of
- simd_mask() = default; +std::min(a[i], b[i])
for alli
∊[0, size())
in thefirst
member, and - static constexpr size_t size() noexcept; + +- + + the result of element-wise application of
+std::max(a[i], b[i])
for alli
∊[0, size())
in thesecond
member, and - // broadcast constructor - explicit simd_mask(value_type) noexcept; + +
lo
shall be greater than the corresponding element in hi
.
+
+ std::clamp(v[i], lo[i], hi[i])
for all i
∊ [0, size())
.
- simd
math library
- The class template simd_mask
is a data-parallel type with the element type bool
. The width of a given simd_mask
specialization is a constant expression, determined by the template parameters. Specifically, simd_mask<T, Abi>::size() == simd<T, Abi>::size()
.
-
-
<cmath>
, there shall be additional overloads sufficient to ensure that if any argument corresponding to a double
parameter has type simd<T, Abi>
, where is_floating_point_v<T>
is true
, then:
-
-
- Every specialization of simd_mask
shall be a complete type. The specialization simd_mask<T, Abi>
is supported if T
is a vectorizable type and
+
+
- If double
parameters shall be convertible to simd<T, Abi>
.
+
+ double*
parameters shall be of type simd<T, Abi>*
.
+
+ U
shall be convertible to fixed_size_simd<U, simd_size_v<T, Abi>>
.
+
+ Abi
is simd_abi::scalar
, or
+ All arguments corresponding to U*
, where U
is integral, shall be of type fixed_size_simd<U, simd_size_v<T, Abi>>*
.
Abi
is simd_abi::fixed_size<N>
, with N
constrained as defined in [parallel.simd.abi].
+ If the corresponding return type is double
, the return type of the additional overloads is simd<T, Abi>
. Otherwise, if the corresponding return type is bool
, the return type of the additional overload is simd_mask<T, Abi>
. Otherwise, the return type is fixed_size_simd<R, simd_size_v<T, Abi>>
, with R
denoting the corresponding return type.
Abi
is an extended ABI tag, it is implementation-defined whether simd_mask<T, Abi>
is supported. simd_mask<T, Abi>
is not supported, the specialization shall have a deleted default constructor, deleted destructor, deleted copy constructor, and deleted copy assignment.
+ It is unspecified whether a call to these overloads with arguments that are all convertible to simd<T, Abi>
but are not of type simd<T, Abi>
is well-formed.
-
+
- Default initialization performs no intialization of the elements; value-initialization initializes each element with false
. <cmath>
function element-wise. The results per element are not required to be bitwise equal to the application of the function which is overloaded for the element type.
simd<T, Abi>
.
-
-
- Implementations should enable explicit conversion from and to implementation-defined types. This adds one or more of the following declarations to class simd_mask
:
-
-
-
-explicit operator implementation-defined() const;
-explicit simd_mask(const implementation-defined& init) const;
-
+ The behavior is undefined if a domain, pole, or range error occurs when the input argument(s) are applied to the indicated <cmath>
function.
+
+
- The member type reference
has the same interface as simd<T, Abi>::reference
, except its value_type
is bool
. ([parallel.simd.reference])
+ If abs
is called with an argument of type simd<X, Abi>
for which is_unsigned_v<X>
is true
, the program is ill-formed.
simd_mask
constructorsx
.
-
- simd_mask
where the i-th element equals x[i]
for all i
∊ [0, size())
.
-
- abi_type
is simd_abi::fixed_size<size()>
.
-
- Flags
is vector_aligned_tag
, mem
shall point to storage aligned by memory_alignment_v<simd_mask>
. If the template parameter Flags
is overaligned_tag<N>
, mem
shall point to storage aligned by N
. If the template parameter Flags
is element_aligned_tag
, mem
shall point to storage aligned by alignof(U)
. [mem, mem + size())
is a valid range.
-
- mem[i]
for all i
∊ [0, size())
.
-
- simd_mask
is_simd_flag_type_v<Flags>
is true
.
-
- simd_mask
overviewsimd_mask
copy functions+ +template<class T, class Abi> class simd_mask { +public: + using value_type = bool; + using reference = see below; + using simd_type = simd<T, Abi>; + using abi_type = Abi; -- + // load constructor + template<class Flags> simd_mask(const value_Type* mem, Flags); -template<class Flags> void copy_from(const value_type* mem, Flags); + static constexpr size_t size() noexcept; -- - If the template parameter + simd_mask() = default; -Flags
isvector_aligned_tag
,mem
shall point to storage aligned bymemory_alignment_v<simd_mask>
. If the template parameterFlags
isoveraligned_tag<N>
,mem
shall point to storage aligned byN
. If the template parameterFlags
iselement_aligned_tag
,mem
shall point to storage aligned byalignof(U)
.[mem, mem + size())
is a valid range. - -- - Replaces the elements of the + // implicit type conversion constructor + template<class U> simd_mask(const simd_mask<U, simd_abi::fixed_size<size()>>&) noexcept; -simd_mask
object such that the i-th element is replaced withmem[i]
for alli
∊[0, size())
. + // broadcast constructor + explicit simd_mask(value_type) noexcept; - -- - This function shall not participate in overload resolution unless -is_simd_flag_type_v<Flags>
istrue
. - -- -template<class Flags> void copy_to(value_type* mem, Flags); ++ template<class Flags> void copy_from(const value_type* mem, Flags); + template<class Flags> void copy_to(value_type* mem, Flags); - - - If the template parameter +Flags
isvector_aligned_tag
,mem
shall point to storage aligned bymemory_alignment_v<simd_mask>
. If the template parameterFlags
isoveraligned_tag<N>
,mem
shall point to storage aligned byN
. If the template parameterFlags
iselement_aligned_tag
,mem
shall point to storage aligned byalignof(U)
.[mem, mem + size())
is a valid range. - -+ reference operator[](size_t); + value_type operator[](size_t) const; - - - Copies all +simd_mask
elements as ifmem[i] = operator[](i)
for alli
∊[0, size())
. - -+ simd_mask operator!() const noexcept; + + + friend simd_mask operator&&(const simd_mask&, const simd_mask&) noexcept; + friend simd_mask operator||(const simd_mask&, const simd_mask&) noexcept; + friend simd_mask operator&(const simd_mask&, const simd_mask&) noexcept; + friend simd_mask operator|(const simd_mask&, const simd_mask&) noexcept; + friend simd_mask operator^(const simd_mask&, const simd_mask&) noexcept; - - - This function shall not participate in overload resolution unless -is_simd_flag_type_v<Flags>
istrue
. - -
simd_mask
subscript operators
+
+ The class template simd_mask
is a data-parallel type with the element type bool
. The width of a given simd_mask
specialization is a constant expression, determined by the template parameters. Specifically, simd_mask<T, Abi>::size() == simd<T, Abi>::size()
.
+
+
i < size()
.
-
-
+
+ Every specialization of simd_mask
shall be a complete type. The specialization simd_mask<T, Abi>
is supported if T
is a vectorizable type and
-
+
reference
(see [parallel.simd.reference]) referring to the i-th element.
+ Abi
is simd_abi::scalar
, or
- Abi
is simd_abi::fixed_size<N>
, with N
constrained as defined in (
Abi
is an extended ABI tag, it is implementation-defined whether simd_mask<T, Abi>
is supported. simd_mask<T, Abi>
is not supported, the specialization shall have a deleted default constructor, deleted destructor, deleted copy constructor, and deleted copy assignment.
+
+
+ i < size()
.
-
-
+
+ Default initialization performs no intialization of the elements; value-initialization initializes each element with false
.
simd<T, Abi>
.
+
+ simd_mask
unary operators
+
+ Implementations should enable explicit conversion from and to implementation-defined types. This adds one or more of the following declarations to class simd_mask
:
-
+
+explicit operator implementation-defined() const;
+explicit simd_mask(const implementation-defined& init) const;
+
- operator!
.
-
-
+
+ The member type reference
has the same interface as simd<T, Abi>::reference
, except its value_type
is bool
. (
simd_mask
constructorssimd_mask
non-member operationssimd_mask
binary operatorssimd_mask
object initialized with the results of the element-wise appliation of the indicated operator.
-
- simd_mask
compound assignmentx
.
+
+ simd_mask
where the i-th element equals x[i]
for all i
∊ [0, size())
.
lhs
.
+ This constructor shall not participate in overload resolution unless abi_type
is simd_abi::fixed_size<size()>
.
- Flags
is vector_aligned_tag
, mem
shall point to storage aligned by memory_alignment_v<simd_mask>
. If the template parameter Flags
is overaligned_tag<N>
, mem
shall point to storage aligned by N
. If the template parameter Flags
is element_aligned_tag
, mem
shall point to storage aligned by alignof(U)
. [mem, mem + size())
is a valid range.
- mem[i]
for all i
∊ [0, size())
.
+
+ is_simd_flag_type_v<Flags>
is true
.
+
+ simd_mask
copy functionstrue
if all boolean elements in k
are true
, false
otherwise.
+ If the template parameter Flags
is vector_aligned_tag
, mem
shall point to storage aligned by memory_alignment_v<simd_mask>
. If the template parameter Flags
is overaligned_tag<N>
, mem
shall point to storage aligned by N
. If the template parameter Flags
is element_aligned_tag
, mem
shall point to storage aligned by alignof(U)
. [mem, mem + size())
is a valid range.
- simd_mask
object such that the i-th element is replaced with mem[i]
for all i
∊ [0, size())
.
+
+
+ true
if at least one boolean element in k
is true
, false
otherwise.
+ This function shall not participate in overload resolution unless is_simd_flag_type_v<Flags>
is true
.
- true
if none of the one boolean elements in k
is true
, false
otherwise.
+ If the template parameter Flags
is vector_aligned_tag
, mem
shall point to storage aligned by memory_alignment_v<simd_mask>
. If the template parameter Flags
is overaligned_tag<N>
, mem
shall point to storage aligned by N
. If the template parameter Flags
is element_aligned_tag
, mem
shall point to storage aligned by alignof(U)
. [mem, mem + size())
is a valid range.
- true
if at least one of the one boolean elements in k
is true
and at least one of the boolean elements in k
is false
, false
otherwise.
+ Copies all simd_mask
elements as if mem[i] = operator[](i)
for all i
∊ [0, size())
.
- k
that are true
.
+ This function shall not participate in overload resolution unless is_simd_flag_type_v<Flags>
is true
.
- simd_mask
subscript operatorsany_of(k)
returns true
.
+ i < size()
.
i
where k[i]
is true
.
+ A reference
(see any_of(k)
returns true
.
+ i < size()
.
i
where k[i]
is true
.
+ The value of the i-th element.
simd_mask
unary operatorsall_of
and any_of
return their arguments; none_of
returns the negation of its argument; some_of
returns false
; popcount
returns the integral representation of its argument; find_first_set
and find_last_set
return 0
.
+ The result of the element-wise appliation of operator!
.
bool
.
-
- simd_mask
non-member operationssimd_mask
binary operatorstrue
.
-
- 0
.
+ A simd_mask
object initialized with the results of the element-wise appliation of the indicated operator.
bool
.
-
- simd_mask
compound assignmentlhs
.
+
+ simd_mask
comparisonssimd_mask
reductionstrue
if all boolean elements in k
are true
, false
otherwise.
+
+ true
if at least one boolean element in k
is true
, false
otherwise.
+
+ true
if none of the one boolean elements in k
is true
, false
otherwise.
+
+ true
if at least one of the one boolean elements in k
is true
and at least one of the boolean elements in k
is false
, false
otherwise.
+
+ k
that are true
.
+
+ any_of(k)
returns true
.
+
+ i
where k[i]
is true
.
+
+ any_of(k)
returns true
.
+
+ i
where k[i]
is true
.
+
+ all_of
and any_of
return their arguments; none_of
returns the negation of its argument; some_of
returns false
; popcount
returns the integral representation of its argument.
+
+ bool
.
+
+ true
.
+
+ 0
.
+
+ bool
.
+
+ mask
and data
initialized with k
and v
respectively.
-
- mask
and data
initialized with k
and v
respectively.
+
+ T
is neither a simd
nor a simd_mask
specialization, and
-
- T
is neither a simd
nor a simd_mask
specialization, and
+
+ bool
.
-
- bool
.
+
+ mask
and data
initialized with k
and v
respectively.
-
- mask
and data
initialized with k
and v
respectively.
+
+ define_task_block
define_task_block
function may return on a thread other than the one on which it was called
unless there are no task blocks active on entry to define_task_block
(see define_task_block
returns on a different thread,
- it synchronizes with operations following the call. define_task_block
define_task_block_restore_thread
function always returns on the same thread as the one on which it was called.