Skip to content

Commit 1f96a4e

Browse files
authored
Merge pull request #843 from krasznaa/AsyncThrust-main-20250205
Asynchronous Thrust, main branch (2025.02.05.)
2 parents ae79eb3 + e57b3ac commit 1f96a4e

File tree

3 files changed

+30
-27
lines changed

3 files changed

+30
-27
lines changed

device/cuda/src/clusterization/measurement_sorting_algorithm.cu

+5-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/** TRACCC library, part of the ACTS project (R&D line)
22
*
3-
* (c) 2024 CERN for the benefit of the ACTS project
3+
* (c) 2024-2025 CERN for the benefit of the ACTS project
44
*
55
* Mozilla Public License Version 2.0
66
*/
@@ -12,6 +12,9 @@
1212
// Thrust include(s).
1313
#include <thrust/sort.h>
1414

15+
// System include(s).
16+
#include <memory_resource>
17+
1518
namespace traccc::cuda {
1619

1720
measurement_sorting_algorithm::measurement_sorting_algorithm(
@@ -33,7 +36,7 @@ measurement_sorting_algorithm::operator()(
3336

3437
// Sort the measurements in place
3538
thrust::sort(
36-
thrust::cuda::par(std::pmr::polymorphic_allocator(&(m_mr.main)))
39+
thrust::cuda::par_nosync(std::pmr::polymorphic_allocator(&(m_mr.main)))
3740
.on(stream),
3841
measurements_view.ptr(), measurements_view.ptr() + n_measurements,
3942
measurement_sort_comp());

device/cuda/src/finding/finding_algorithm.cu

+20-21
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@
4949

5050
// System include(s).
5151
#include <cassert>
52+
#include <memory_resource>
5253
#include <vector>
5354

5455
namespace traccc::cuda {
@@ -77,6 +78,11 @@ finding_algorithm<stepper_t, navigator_t>::operator()(
7778
// Copy setup
7879
m_copy.setup(seeds_buffer)->ignore();
7980

81+
// The Thrust policy to use.
82+
auto thrust_policy =
83+
thrust::cuda::par_nosync(std::pmr::polymorphic_allocator(&(m_mr.main)))
84+
.on(stream);
85+
8086
/*****************************************************************
8187
* Measurement Operations
8288
*****************************************************************/
@@ -97,11 +103,11 @@ finding_algorithm<stepper_t, navigator_t>::operator()(
97103

98104
measurement_collection_types::device uniques(uniques_buffer);
99105

100-
measurement* uniques_end = thrust::unique_copy(
101-
thrust::cuda::par(std::pmr::polymorphic_allocator(&(m_mr.main)))
102-
.on(stream),
103-
measurements.ptr(), measurements.ptr() + n_measurements,
104-
uniques.begin(), measurement_equal_comp());
106+
measurement* uniques_end =
107+
thrust::unique_copy(thrust_policy, measurements.ptr(),
108+
measurements.ptr() + n_measurements,
109+
uniques.begin(), measurement_equal_comp());
110+
m_stream.synchronize();
105111
n_modules = static_cast<unsigned int>(uniques_end - uniques.begin());
106112
}
107113

@@ -115,12 +121,10 @@ finding_algorithm<stepper_t, navigator_t>::operator()(
115121

116122
measurement_collection_types::device uniques(uniques_buffer);
117123

118-
thrust::upper_bound(
119-
thrust::cuda::par(std::pmr::polymorphic_allocator(&(m_mr.main)))
120-
.on(stream),
121-
measurements.ptr(), measurements.ptr() + n_measurements,
122-
uniques.begin(), uniques.begin() + n_modules, upper_bounds.begin(),
123-
measurement_sort_comp());
124+
thrust::upper_bound(thrust_policy, measurements.ptr(),
125+
measurements.ptr() + n_measurements,
126+
uniques.begin(), uniques.begin() + n_modules,
127+
upper_bounds.begin(), measurement_sort_comp());
124128
}
125129

126130
/*****************************************************************
@@ -285,12 +289,9 @@ finding_algorithm<stepper_t, navigator_t>::operator()(
285289
keys_buffer);
286290
vecmem::device_vector<unsigned int> param_ids_device(
287291
param_ids_buffer);
288-
thrust::sort_by_key(
289-
thrust::cuda::par(
290-
std::pmr::polymorphic_allocator(&(m_mr.main)))
291-
.on(stream),
292-
keys_device.begin(), keys_device.end(),
293-
param_ids_device.begin());
292+
thrust::sort_by_key(thrust_policy, keys_device.begin(),
293+
keys_device.end(),
294+
param_ids_device.begin());
294295

295296
m_stream.synchronize();
296297
}
@@ -338,10 +339,8 @@ finding_algorithm<stepper_t, navigator_t>::operator()(
338339
vecmem::device_vector<candidate_link> out(
339340
*(links_buffer.host_ptr() + it));
340341

341-
thrust::copy(
342-
thrust::cuda::par(std::pmr::polymorphic_allocator(&(m_mr.main)))
343-
.on(stream),
344-
in.begin(), in.begin() + n_candidates_per_step[it], out.begin());
342+
thrust::copy(thrust_policy, in.begin(),
343+
in.begin() + n_candidates_per_step[it], out.begin());
345344
}
346345

347346
/*****************************************************************

device/cuda/src/fitting/fitting_algorithm.cu

+5-4
Original file line numberDiff line numberDiff line change
@@ -111,10 +111,11 @@ track_state_container_types::buffer fitting_algorithm<fitter_t>::operator()(
111111
vecmem::device_vector<device::sort_key> keys_device(keys_buffer);
112112
vecmem::device_vector<unsigned int> param_ids_device(param_ids_buffer);
113113

114-
thrust::sort_by_key(
115-
thrust::cuda::par(std::pmr::polymorphic_allocator(&m_mr.main))
116-
.on(stream),
117-
keys_device.begin(), keys_device.end(), param_ids_device.begin());
114+
thrust::sort_by_key(thrust::cuda::par_nosync(
115+
std::pmr::polymorphic_allocator(&m_mr.main))
116+
.on(stream),
117+
keys_device.begin(), keys_device.end(),
118+
param_ids_device.begin());
118119

119120
// Run the track fitting
120121
kernels::fit<fitter_t><<<nBlocks, nThreads, 0, stream>>>(

0 commit comments

Comments
 (0)