Skip to content

Commit 43c31b3

Browse files
committed
Add distributed::exclusive_scan()
1 parent 6067c3b commit 43c31b3

File tree

3 files changed

+353
-0
lines changed

3 files changed

+353
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
//---------------------------------------------------------------------------//
2+
// Copyright (c) 2013 Kyle Lutz <kyle.r.lutz@gmail.com>
3+
//
4+
// Distributed under the Boost Software License, Version 1.0
5+
// See accompanying file LICENSE_1_0.txt or copy at
6+
// http://www.boost.org/LICENSE_1_0.txt
7+
//
8+
// See http://boostorg.github.com/compute for more information.
9+
//---------------------------------------------------------------------------//
10+
11+
#ifndef BOOST_COMPUTE_DISTRIBUTED_EXCLUSIVE_SCAN_HPP
12+
#define BOOST_COMPUTE_DISTRIBUTED_EXCLUSIVE_SCAN_HPP
13+
14+
#include <vector>
15+
16+
#include <boost/compute/container/vector.hpp>
17+
#include <boost/compute/algorithm/copy.hpp>
18+
#include <boost/compute/algorithm/exclusive_scan.hpp>
19+
#include <boost/compute/algorithm/inclusive_scan.hpp>
20+
#include <boost/compute/algorithm/merge.hpp>
21+
#include <boost/compute/iterator/buffer_iterator.hpp>
22+
#include <boost/compute/allocator/pinned_allocator.hpp>
23+
24+
#include <boost/compute/distributed/command_queue.hpp>
25+
#include <boost/compute/distributed/vector.hpp>
26+
27+
namespace boost {
28+
namespace compute {
29+
namespace distributed {
30+
31+
template<
32+
class InputType, weight_func weight, class Alloc,
33+
class OutputType,
34+
class BinaryOperator
35+
>
36+
inline void
37+
exclusive_scan(const vector<InputType, weight, Alloc> &input,
38+
vector<OutputType, weight, Alloc> &result,
39+
OutputType init,
40+
BinaryOperator binary_op,
41+
command_queue &queue)
42+
{
43+
BOOST_ASSERT(input.parts() == result.parts());
44+
BOOST_ASSERT(input.size() == result.size());
45+
46+
std::vector<OutputType> input_tails;
47+
input_tails.reserve(input.parts() - 1);
48+
for(size_t i = 0; i < input.parts(); i++)
49+
{
50+
if(input.begin(i) != input.end(i) && i < (input.parts() - 1))
51+
{
52+
input_tails.push_back(
53+
static_cast<OutputType>(
54+
(input.end(i) - 1).read(queue.get(i))
55+
)
56+
);
57+
}
58+
59+
if(i == 0)
60+
{
61+
::boost::compute::exclusive_scan(
62+
input.begin(i),
63+
input.end(i),
64+
result.begin(i),
65+
init,
66+
binary_op,
67+
queue.get(i)
68+
);
69+
}
70+
else
71+
{
72+
::boost::compute::exclusive_scan(
73+
input.begin(i),
74+
input.end(i),
75+
result.begin(i),
76+
input_tails[i - 1],
77+
binary_op,
78+
queue.get(i)
79+
);
80+
}
81+
}
82+
83+
// find device for calculating partial sum of last elements of input vector
84+
::boost::compute::command_queue& device_queue = queue.get(0);
85+
// CPU device is preferred, however if there is none, the first device
86+
// queue is used
87+
for(size_t i = 0; i < queue.size(); i++)
88+
{
89+
if(queue.get(i).get_device().type() & ::boost::compute::device::cpu)
90+
{
91+
device_queue = queue.get(i);
92+
break;
93+
}
94+
}
95+
96+
std::vector<OutputType> output_tails(input_tails.size());
97+
for(size_t i = 0; i < input.parts() - 1; i++)
98+
{
99+
if(input.begin(i) != input.end(i))
100+
{
101+
output_tails[i] = (result.end(i) - 1).read(queue.get(i));
102+
}
103+
}
104+
::boost::compute::vector<OutputType> output_tails_device(
105+
output_tails.size(), device_queue.get_context()
106+
);
107+
::boost::compute::copy_async(
108+
output_tails.begin(),
109+
output_tails.end(),
110+
output_tails_device.begin(),
111+
device_queue
112+
);
113+
::boost::compute::inclusive_scan(
114+
output_tails_device.begin(),
115+
output_tails_device.end(),
116+
output_tails_device.begin(),
117+
device_queue
118+
);
119+
::boost::compute::copy(
120+
output_tails_device.begin(),
121+
output_tails_device.end(),
122+
output_tails.begin(),
123+
device_queue
124+
);
125+
for(size_t i = 1; i < input.parts(); i++)
126+
{
127+
::boost::compute::transform(
128+
result.begin(i),
129+
result.end(i),
130+
::boost::compute::make_constant_iterator(
131+
output_tails[i - 1]
132+
),
133+
result.begin(i),
134+
binary_op,
135+
queue.get(i)
136+
);
137+
}
138+
}
139+
140+
/// \overload
141+
template<
142+
class InputType, weight_func weight, class Alloc,
143+
class OutputType
144+
>
145+
inline void
146+
exclusive_scan(const vector<InputType, weight, Alloc> &input,
147+
vector<OutputType, weight, Alloc> &result,
148+
OutputType init,
149+
command_queue &queue)
150+
{
151+
::boost::compute::distributed::exclusive_scan(
152+
input,
153+
result,
154+
init,
155+
boost::compute::plus<OutputType>(),
156+
queue
157+
);
158+
}
159+
160+
/// \overload
161+
template<
162+
class InputType, weight_func weight, class Alloc,
163+
class OutputType
164+
>
165+
inline void
166+
exclusive_scan(const vector<InputType, weight, Alloc> &input,
167+
vector<OutputType, weight, Alloc> &result,
168+
command_queue &queue)
169+
{
170+
::boost::compute::distributed::exclusive_scan(
171+
input,
172+
result,
173+
OutputType(0),
174+
boost::compute::plus<OutputType>(),
175+
queue
176+
);
177+
}
178+
179+
} // end distributed namespace
180+
} // end compute namespace
181+
} // end boost namespace
182+
183+
#endif /* BOOST_COMPUTE_DISTRIBUTED_SCAN_HPP */

test/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,7 @@ add_compute_test("distributed.vector" test_distributed_vector.cpp)
8787
add_compute_test("distributed.copy" test_distributed_copy.cpp)
8888
add_compute_test("distributed.reduce" test_distributed_reduce.cpp)
8989
add_compute_test("distributed.transform" test_distributed_transform.cpp)
90+
add_compute_test("distributed.transform" test_distributed_scan.cpp)
9091

9192
add_compute_test("utility.extents" test_extents.cpp)
9293
add_compute_test("utility.invoke" test_invoke.cpp)

test/test_distributed_scan.cpp

+169
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
//---------------------------------------------------------------------------//
2+
// Copyright (c) 2016 Jakub Szuppe <j.szuppe@gmail.com>
3+
//
4+
// Distributed under the Boost Software License, Version 1.0
5+
// See accompanying file LICENSE_1_0.txt or copy at
6+
// http://www.boost.org/LICENSE_1_0.txt
7+
//
8+
// See http://boostorg.github.com/compute for more information.
9+
//---------------------------------------------------------------------------//
10+
11+
#define BOOST_TEST_MODULE TestDistributedScan
12+
#include <boost/test/unit_test.hpp>
13+
14+
#include <algorithm>
15+
16+
#include <boost/compute/algorithm.hpp>
17+
#include <boost/compute/functional.hpp>
18+
#include <boost/compute/algorithm.hpp>
19+
#include <boost/compute/function.hpp>
20+
#include <boost/compute/container/vector.hpp>
21+
22+
#include <boost/compute/distributed/context.hpp>
23+
#include <boost/compute/distributed/command_queue.hpp>
24+
#include <boost/compute/distributed/vector.hpp>
25+
#include <boost/compute/distributed/exclusive_scan.hpp>
26+
#include <boost/compute/distributed/copy.hpp>
27+
28+
#include "check_macros.hpp"
29+
#include "context_setup.hpp"
30+
31+
#include "distributed_check_functions.hpp"
32+
#include "distributed_queue_setup.hpp"
33+
34+
namespace bc = boost::compute;
35+
36+
BOOST_AUTO_TEST_CASE(exclusive_scan_int)
37+
{
38+
// construct distributed::command_queue
39+
bc::distributed::command_queue distributed_queue =
40+
get_distributed_queue(queue, 4);
41+
42+
std::vector<bc::int_> data(size_t(128));
43+
for(size_t i = 0; i < data.size(); i++) {
44+
data[i] = i;
45+
}
46+
47+
bc::distributed::vector<bc::int_> distributed_input(
48+
data.begin(), data.end(), distributed_queue
49+
);
50+
bc::distributed::vector<bc::int_> distributed_result(
51+
data.size(), distributed_queue
52+
);
53+
distributed_queue.finish();
54+
55+
BOOST_CHECK(
56+
distributed_equal(
57+
distributed_input,
58+
data.begin(), data.end(),
59+
distributed_queue
60+
)
61+
);
62+
63+
bc::distributed::exclusive_scan(
64+
distributed_input,
65+
distributed_result,
66+
bc::int_(10),
67+
distributed_queue
68+
);
69+
distributed_queue.finish();
70+
71+
bc::vector<bc::int_> device_input(data.begin(), data.end(), queue);
72+
bc::vector<bc::int_> device_expected(data.size(), context);
73+
std::vector<bc::int_> host_expected(device_expected.size());
74+
bc::exclusive_scan(
75+
device_input.begin(),
76+
device_input.end(),
77+
device_expected.begin(),
78+
bc::int_(10),
79+
queue
80+
);
81+
bc::copy(
82+
device_expected.begin(),
83+
device_expected.end(),
84+
host_expected.begin(),
85+
queue
86+
);
87+
queue.finish();
88+
89+
BOOST_CHECK(
90+
distributed_equal(
91+
distributed_input,
92+
data.begin(), data.end(),
93+
distributed_queue
94+
)
95+
);
96+
BOOST_CHECK(
97+
distributed_equal(
98+
distributed_result,
99+
host_expected.begin(), host_expected.end(),
100+
distributed_queue
101+
)
102+
);
103+
}
104+
105+
BOOST_AUTO_TEST_CASE(exclusive_scan_custom_function_int)
106+
{
107+
// construct distributed::command_queue
108+
bc::distributed::command_queue distributed_queue =
109+
get_distributed_queue(queue, 3);
110+
111+
BOOST_COMPUTE_FUNCTION(bc::int_, custom_sum, (bc::int_ x, bc::int_ y),
112+
{
113+
return x + y;
114+
});
115+
116+
std::vector<bc::int_> data(size_t(128));
117+
for(size_t i = 0; i < data.size(); i++) {
118+
data[i] = i;
119+
}
120+
121+
bc::distributed::vector<bc::int_> distributed_input(
122+
data.begin(), data.end(), distributed_queue
123+
);
124+
distributed_queue.finish();
125+
126+
BOOST_CHECK(
127+
distributed_equal(
128+
distributed_input,
129+
data.begin(), data.end(),
130+
distributed_queue
131+
)
132+
);
133+
134+
bc::distributed::exclusive_scan(
135+
distributed_input,
136+
distributed_input,
137+
bc::int_(10),
138+
custom_sum,
139+
distributed_queue
140+
);
141+
distributed_queue.finish();
142+
143+
bc::vector<bc::int_> device_input(data.begin(), data.end(), queue);
144+
bc::vector<bc::int_> device_expected(data.size(), context);
145+
std::vector<bc::int_> host_expected(device_expected.size());
146+
bc::exclusive_scan(
147+
device_input.begin(),
148+
device_input.end(),
149+
device_expected.begin(),
150+
bc::int_(10),
151+
queue
152+
);
153+
bc::copy(
154+
device_expected.begin(),
155+
device_expected.end(),
156+
host_expected.begin(),
157+
queue
158+
);
159+
queue.finish();
160+
BOOST_CHECK(
161+
distributed_equal(
162+
distributed_input,
163+
host_expected.begin(), host_expected.end(),
164+
distributed_queue
165+
)
166+
);
167+
}
168+
169+
BOOST_AUTO_TEST_SUITE_END()

0 commit comments

Comments
 (0)