From 12701f567fb1fac52487339d053f0b966a49f188 Mon Sep 17 00:00:00 2001 From: Taeguk Kwon Date: Sun, 25 Jun 2017 15:16:43 -0500 Subject: [PATCH] Avoid additional memory allocation and call predicate twice for that in parallel::partition_copy. --- hpx/parallel/algorithms/partition.hpp | 38 ++++++++++++--------------- 1 file changed, 17 insertions(+), 21 deletions(-) diff --git a/hpx/parallel/algorithms/partition.hpp b/hpx/parallel/algorithms/partition.hpp index efa415df3fc9..c208472211a1 100644 --- a/hpx/parallel/algorithms/partition.hpp +++ b/hpx/parallel/algorithms/partition.hpp @@ -29,7 +29,6 @@ #include #include #include -#include #include #include @@ -344,7 +343,6 @@ namespace hpx { namespace parallel { inline namespace v1 parallel(ExPolicy && policy, FwdIter1 first, FwdIter1 last, FwdIter2 dest_true, FwdIter3 dest_false, Pred && pred, Proj && proj) { - typedef hpx::util::zip_iterator zip_iterator; typedef util::detail::algorithm_result< ExPolicy, std::pair > result; @@ -358,23 +356,20 @@ namespace hpx { namespace parallel { inline namespace v1 difference_type count = std::distance(first, last); - boost::shared_array flags(new bool[count]); output_iterator_offset init = { 0, 0 }; using hpx::util::get; - using hpx::util::make_zip_iterator; typedef util::scan_partitioner< ExPolicy, std::pair, output_iterator_offset > scan_partitioner_type; auto f1 = - [pred, proj, flags, policy] + [pred, proj, policy] ( - zip_iterator part_begin, std::size_t part_size + FwdIter1 part_begin, std::size_t part_size ) -> output_iterator_offset { - HPX_UNUSED(flags); HPX_UNUSED(policy); std::size_t true_count = 0; @@ -382,25 +377,24 @@ namespace hpx { namespace parallel { inline namespace v1 // MSVC complains if pred or proj is captured by ref below util::loop_n( part_begin, part_size, - [pred, proj, &true_count](zip_iterator it) mutable + [pred, proj, &true_count](FwdIter1 it) mutable { using hpx::util::invoke; - bool f = invoke(pred, invoke(proj, get<0>(*it))); + bool f = invoke(pred, invoke(proj, *it)); - if ((get<1>(*it) = f)) + if (f) ++true_count; }); return output_iterator_offset( true_count, part_size - true_count ); }; auto f3 = - [dest_true, dest_false, flags, policy]( - zip_iterator part_begin, std::size_t part_size, + [pred, proj, policy, dest_true, dest_false]( + FwdIter1 part_begin, std::size_t part_size, hpx::shared_future curr, hpx::shared_future next ) mutable { - HPX_UNUSED(flags); HPX_UNUSED(policy); next.get(); // rethrow exceptions @@ -411,20 +405,24 @@ namespace hpx { namespace parallel { inline namespace v1 std::advance(dest_true, count_true); std::advance(dest_false, count_false); + // MSVC complains if pred or proj is captured by ref below util::loop_n( part_begin, part_size, - [&dest_true, &dest_false](zip_iterator it) mutable + [pred, proj, &dest_true, &dest_false](FwdIter1 it) mutable { - if(get<1>(*it)) - *dest_true++ = get<0>(*it); + using hpx::util::invoke; + bool f = invoke(pred, invoke(proj, *it)); + + if (f) + *dest_true++ = *it; else - *dest_false++ = get<0>(*it); + *dest_false++ = *it; }); }; return scan_partitioner_type::call( std::forward(policy), - make_zip_iterator(first, flags.get()), count, init, + first, count, init, // step 1 performs first part of scan algorithm std::move(f1), // step 2 propagates the partition results from left @@ -441,15 +439,13 @@ namespace hpx { namespace parallel { inline namespace v1 // step 3 runs final accumulation on each partition std::move(f3), // step 4 use this return value - [dest_true, dest_false, count, flags]( + [dest_true, dest_false, count]( std::vector< hpx::shared_future > && items, std::vector > &&) mutable -> std::pair { - HPX_UNUSED(flags); - output_iterator_offset count_pair = items.back().get(); std::size_t count_true = get<0>(count_pair); std::size_t count_false = get<1>(count_pair);