template<class T>
ordered_update_t<T> ordered_update(T& ref) noexcept;
}
+
+
+// Exposition only: Suppress template argument deduction.
+template<class T> struct no_deduce { using type = T; };
+template<class T> struct no_dedude_t = typename no_deduce<T>::type;
+
+ Support for reductions
+template<class T, class BinaryOperation>
+ unspecified reduction(T& var, const T& identity, BinaryOperation combiner);
+template<class T>
+ unspecified reduction_plus(T& var);
+template<class T>
+ unspecified reduction_multiplies(T& var);
+template<class T>
+ unspecified reduction_bit_and(T& var);
+template<class T>
+ unspecified reduction_bit_or(T& var);
+template<class T>
+ unspecified reduction_bit_xor(T& var);
+template<class T>
+ unspecified reduction_min(T& var);
+template<class T>
+ unspecified reduction_max(T& var);
+
+ Support for inductions
+template<class T>
+ unspecified induction(T&& var);
+template<class T>
+ unspecified induction(T&& var, S stride);
+
+ for_loop
+template<class I, class... Rest>
+ void for_loop(no_deduce_t<I> start, I finish, Rest&&... rest);
+template<class ExecutionPolicy,
+ class I, class... Rest>
+ void for_loop(ExecutionPolicy&& exec,
+ no_deduce_t<I> start, I finish, Rest&&... rest);
+template<class I, class S, class... Rest>
+ void for_loop_strided(no_deduce_t<I> start, I finish,
+ S stride, Rest&&... rest);
+template<class ExecutionPolicy,
+ class I, class S, class... Rest>
+ void for_loop_strided(ExecutionPolicy&& exec,
+ no_deduce_t<I> start, I finish,
+ S stride, Rest&&... rest);
+template<class I, class Size, class... Rest>
+ void for_loop_n(I start, Size n, Rest&&... rest);
+template<class ExecutionPolicy,
+ class I, class Size, class... Rest>
+ void for_loop_n(ExecutionPolicy&& exec,
+ I start, Size n, Rest&&... rest);
+template<class I, class Size, class S, class... Rest>
+ void for_loop_n_strided(I start, Size n, S stride, Rest&&... rest);
+template<class ExecutionPolicy,
+ class I, class Size, class S, class... Rest>
+ void for_loop_n_strided(ExecutionPolicy&& exec,
+ I start, Size n, S stride, Rest&&... rest);
+
}
}
}
@@ -555,6 +613,308 @@
Header <experimental/algorithm> synopsis
+
+
Reductions
+
+
+
+ Each of the function templates in this subclause ([parallel.alg.reductions]) returns a reduction object
+ of unspecified type having a reduction value type and encapsulating a reduction identity value for the reduction, a
+ combiner function object, and a live-out object from which the initial value is obtained and into which the final
+ value is stored.
+
+
+
+ An algorithm uses reduction objects by allocating an unspecified number of instances, known as accumulators, of the reduction value
+ type. An implementation might, for example, allocate an accumulator for each thread in its private thread pool.
+ Each accumulator is initialized with the object’s reduction identity, except that the live-out object (which was initialized by the
+ caller) comprises one of the accumulators. The algorithm passes a reference to an accumulator to each application of an element-access
+ function, ensuring that no two concurrently executing invocations share the same accumulator. An accumulator can be shared between two
+ applications that do not execute concurrently, but initialization is performed only once per accumulator.
+
+
+
+ Modifications to the accumulator by the application of element access functions accrue as partial results. At some point before the algorithm
+ returns, the partial results are combined, two at a time, using the reduction object’s combiner operation until a single value remains, which
+ is then assigned back to the live-out object. in order to produce useful results, modifications to the accumulator should be limited
+ to commutative operations closely related to the combiner operation. For example if the combiner is plus<T>, incrementing
+ the accumulator would be consistent with the combiner but doubling it or assigning to it would not.
+
+
+
+
+ template<class T, class BinaryOperation>
+unspecified reduction(T& var, const T& identity, BinaryOperation combiner);
+
+
+ T shall meet the requirements of CopyConstructible and MoveAssignable. The expression var = combiner(var, var) shall be well-formed.
+
+
+
+ a reduction object of unspecified type having reduction value type T, reduction identity identity, combiner function object combiner, and using the object referenced by var as its live-out object.
+
+
+
+
+ template<class T>
+unspecified reduction_plus(T& var);
+ template<class T>
+unspecified reduction_multiplies(T& var);
+ template<class T>
+unspecified reduction_bit_and(T& var);
+ template<class T>
+unspecified reduction_bit_or(T& var);
+ template<class T>
+unspecified reduction_bit_xor(T& var);
+ template<class T>
+unspecified reduction_min(T& var);
+ template<class T>
+unspecified reduction_max(T& var);
+
+
+ T shall meet the requirements of CopyConstructible and MoveAssignable.
+
+
+
+ a reduction object of unspecified type having reduction value type T, reduction identity and combiner operation as specified in table and using the object referenced by var as its live-out object.
+
+
+
+
Reduction identities and combiner operations
+
+
+
Function
+
Reduction Identity
+
Combiner Operation
+
+
+
reduction_plus
+
T()
+
x + y
+
+
+
reduction_multiplies
+
T(1)
+
x * y
+
+
+
reduction_bit_and
+
(~T())
+
X & y
+
+
+
reduction_bit_or
+
T()
+
x | y
+
+
+
reduction_bit_xor
+
T()
+
x ^ y
+
+
+
reduction_min
+
var
+
min(x, y)
+
+
+
reduction_max
+
var
+
max(x, y)
+
+
+
+
+
+ The following code updates each element of y and sets s ot the sum of the squares.
+
+extern int n;
+extern float x[], y[], a;
+float s = 0;
+for_loop(execution::vec, 0, n,
+ reduction(s, 0.0f, plus<>()),
+ [&](int i, float& accum) {
+ y[i] += a*x[i];
+ accum += y[i]*y[i];
+ }
+);
+
+
+
+
+
+
+
+
Inductions
+
+
+
+ Each of the function templates in this section return an induction object of unspecified type having an induction
+ value type and encapsulating an initial value i of that type and, optionally, a stride.
+
+
+
+ For each element in the input range, an algorithm over input sequence S computes an induction value from an induction variable
+ and ordinal position p within S by the formula i + p * stride if a stride was specified or i + p otherwise. This induction value is
+ passed to the element access function.
+
+
+
+ An induction object may refer to a live-out object to hold the final value of the induction sequence. When the algorithm using the induction
+ object completes, the live-out object is assigned the value i + n * stride, where n is the number of elements in the input range.
+
+
+
+
+ template<class T>
+unspecified induction(T&& var);
+
+ template<class T, class S>
+unspecified induction(T&& var, S stride);
+
+
+
+
+ an induction object with induction value type remove_cv_t>remove_reference_t>T<<,
+ initial value var, and (if specified) stride stride. If T is an lvalue reference
+ to non-const type, then the object referenced by var becomes the live-out object for the
+ induction object; otherwise there is no live-out object.
+
+
+
+
+
+
+
+
For loop
+
+
+ template<class I, class... Rest>
+void for_loop(no_deduce_t<I> start, I finish, Rest&&... rest);
+
+ template<class ExecutionPolicy,
+ class I, class... Rest>
+void for_loop(ExecutionPolicy&& exec,
+ no_deduce_t<I> start, I finish, Rest&&... rest);
+
+
+
+ template<class I, class S, class... Rest>
+void for_loop_strided(no_deduce_t<I> start, I finish,
+ S stride, Rest&&... rest);
+
+ template<class ExecutionPolicy,
+ class I, class S, class... Rest>
+void for_loop_strided(ExecutionPolicy&& exec,
+ no_deduce_t<I> start, I finish,
+ S stride, Rest&&... rest);
+
+
+
+ template<class I, class Size, class... Rest>
+void for_loop_n(I start, Size n, Rest&&... rest);
+
+ template<class ExecutionPolicy,
+ class I, class Size, class... Rest>
+void for_loop_n(ExecutionPolicy&& exec,
+ I start, Size n, Rest&&... rest);
+
+
+
+ template<class I, class Size, class S, class... Rest>
+void for_loop_n_strided(I start, Size n, S stride, Rest&&... rest);
+
+ template<class ExecutionPolicy,
+ class I, class Size, class S, class... Rest>
+void for_loop_n_strided(ExecutionPolicy&& exec,
+ I start, Size n, S stride, Rest&&... rest);
+
+
+
+
+ For the overloads with an ExecutionPolicy, I shall be an integral type
+ or meet the requirements of a forward iterator type; otherwise, I shall be an integral
+ type or meet the requirements of an input iterator type. Size shall be an integral type
+ and n shall be non-negative. S shall have integral type and stride
+ shall have non-zero value. stride shall be negative only if I has integral
+ type or meets the requirements of a bidirectional iterator. The rest parameter pack shall
+ have at least one element, comprising objects returned by invocations of reduction
+ ([parallel.alg.reduction]) and/or induction ([parallel.alg.induction]) function templates
+ followed by exactly one invocable element-access function, f. For the overloads with an
+ ExecutionPolicy, f shall meet the requirements of CopyConstructible;
+ otherwise, f shall meet the requirements of MoveConstructible.
+
+
+
+
+
+
+
+ Applies f to each element in the input sequence, as described below, with additional
+ arguments corresponding to the reductions and inductions in the rest parameter pack. The
+ length of the input sequence is:
+
+
+
+ n, if specified,
+
+
+
+ otherwise finish - start if neither n nor stride is specified,
+
+
+
+ otherwise 1 + (finish-start-1)/stride if stride is positive,
+
+
+
+ otherwise 1 + (start-finish-1)/-stride.
+
+
+
+ The first element in the input sequence is start. Each subsequent element is generated by adding
+ stride to the previous element, if stride is specified, otherwise by incrementing
+ the previous element. As described in the C++ standard, section [algorithms.general], arithmetic
+ on non-random-access iterators is performed using advance and distance.The order of the
+ elements of the input sequence is important for determining ordinal position of an application of f,
+ even though the applications themselves may be unordered.
+
+ The first argument to f is an element from the input sequence. if I is an
+ iterator type, the iterators in the input sequence are not dereferenced before
+ being passed to f. For each member of the rest parameter pack
+ excluding f, an additional argument is passed to each application of f as follows:
+
+
+
+ If the pack member is an object returned by a call to a reduction function listed in section
+ [parallel.alg.reductions], then the additional argument is a reference to an accumulator of that reduction
+ object.
+
+
+
+ If the pack member is an object returned by a call to induction, then the additional argument is the
+ induction value for that induction object corresponding to the position of the application of f in the input
+ sequence.
+
+
+
+
+
+
+
+
+ Applies f exactly once for each element of the input sequence.
+
+
+
+
+
+ If f returns a result, the result is ignored.
+
+
+
+
+
From 02367c4680eee86c865df0c6cfc13c7cf45b46fc Mon Sep 17 00:00:00 2001
From: Jared Hoberock
Date: Fri, 17 Nov 2017 15:26:11 -0600
Subject: [PATCH 2/3] Update README for post-Albuquerque mailing
---
README.md | 20 ++++++++------------
1 file changed, 8 insertions(+), 12 deletions(-)
diff --git a/README.md b/README.md
index 9e1e52a..006bfb5 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
- Document Number: N4699
- Date: 2017-10-16
+ Document Number: N4XXX+1
+ Date: 2017-11-XX
Revises:
Project: Programming Language C++
Project Number: TS 19570
@@ -9,20 +9,16 @@
# Parallelism TS Editor's Report, pre-Albuquerque mailing
-N4698 is the proposed working draft of Parallelism TS Version 2. It contains changes to the Parallelism TS as directed by the committee at the Toronto meeting, and editorial changes.
+NXXXX is the proposed working draft of Parallelism TS Version 2. It contains changes to the Parallelism TS as directed by the committee at the Albuquerque meeting.
-N4698 updates the previous draft, N4669, published in the pre-Toronto mailing.
+NXXXX updates the previous draft, N4696, published in the pre-Toronto mailing.
# Technical Changes
-* Apply P0076R4 - Vector and Wavefront Policies.
+* Apply P0776R1 - Rebase the Parallelism TS onto the C++17 Standard
+* Apply P0075R2 - Template Library for Parallel For Loops
-# Editorial Changes
+# Acknowledgements
-* Reformat Table 1 - Feature Test Macro(s), to match the style of the Library Fundamentals TS.
-
-# Notes
-
-* The pre-existing content of N4698 has not yet been harmonized with C++17. As a result, this content is named and namespaced inconsistently with the newly applied content of P0076R4. We anticipate that these inconsistencies will be harmonized by a future revision.
-* N4698 contains forward references to `for_loop` and `for_loop_strided`. We anticipate their introduction in a future revision.
+Thanks to Alisdair Meredith and Pablo Halpern for reviewing these changes.
From 255967e557c54faf090318dd5c46a88e10098003 Mon Sep 17 00:00:00 2001
From: Jared Hoberock
Date: Mon, 20 Nov 2017 14:16:22 -0600
Subject: [PATCH 3/3] Add paper numbers for post-Albuquerque mailing
---
README.md | 6 +++---
front_matter.html | 4 ++--
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/README.md b/README.md
index 006bfb5..5b5c497 100644
--- a/README.md
+++ b/README.md
@@ -1,4 +1,4 @@
- Document Number: N4XXX+1
+ Document Number: N4707
Date: 2017-11-XX
Revises:
Project: Programming Language C++
@@ -9,9 +9,9 @@
# Parallelism TS Editor's Report, pre-Albuquerque mailing
-NXXXX is the proposed working draft of Parallelism TS Version 2. It contains changes to the Parallelism TS as directed by the committee at the Albuquerque meeting.
+N4706 is the proposed working draft of Parallelism TS Version 2. It contains changes to the Parallelism TS as directed by the committee at the Albuquerque meeting.
-NXXXX updates the previous draft, N4696, published in the pre-Toronto mailing.
+N4706 updates the previous draft, N4696, published in the pre-Toronto mailing.
# Technical Changes
diff --git a/front_matter.html b/front_matter.html
index 9a1ad15..5498f05 100644
--- a/front_matter.html
+++ b/front_matter.html
@@ -1,8 +1,8 @@
-N4698
+N470619570
- N4669
+ N4698
Jared Hoberock
NVIDIA Corporation