Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Asynchronous Thrust, main branch (2025.02.05.) #843

Merged
merged 1 commit into from
Feb 5, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/** TRACCC library, part of the ACTS project (R&D line)
*
* (c) 2024 CERN for the benefit of the ACTS project
* (c) 2024-2025 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/
Expand All @@ -12,6 +12,9 @@
// Thrust include(s).
#include <thrust/sort.h>

// System include(s).
#include <memory_resource>

namespace traccc::cuda {

measurement_sorting_algorithm::measurement_sorting_algorithm(
Expand All @@ -33,7 +36,7 @@ measurement_sorting_algorithm::operator()(

// Sort the measurements in place
thrust::sort(
thrust::cuda::par(std::pmr::polymorphic_allocator(&(m_mr.main)))
thrust::cuda::par_nosync(std::pmr::polymorphic_allocator(&(m_mr.main)))
.on(stream),
measurements_view.ptr(), measurements_view.ptr() + n_measurements,
measurement_sort_comp());
Expand Down
41 changes: 20 additions & 21 deletions device/cuda/src/finding/finding_algorithm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@

// System include(s).
#include <cassert>
#include <memory_resource>
#include <vector>

namespace traccc::cuda {
Expand Down Expand Up @@ -77,6 +78,11 @@ finding_algorithm<stepper_t, navigator_t>::operator()(
// Copy setup
m_copy.setup(seeds_buffer)->ignore();

// The Thrust policy to use.
auto thrust_policy =
thrust::cuda::par_nosync(std::pmr::polymorphic_allocator(&(m_mr.main)))
.on(stream);

/*****************************************************************
* Measurement Operations
*****************************************************************/
Expand All @@ -97,11 +103,11 @@ finding_algorithm<stepper_t, navigator_t>::operator()(

measurement_collection_types::device uniques(uniques_buffer);

measurement* uniques_end = thrust::unique_copy(
thrust::cuda::par(std::pmr::polymorphic_allocator(&(m_mr.main)))
.on(stream),
measurements.ptr(), measurements.ptr() + n_measurements,
uniques.begin(), measurement_equal_comp());
measurement* uniques_end =
thrust::unique_copy(thrust_policy, measurements.ptr(),
measurements.ptr() + n_measurements,
uniques.begin(), measurement_equal_comp());
m_stream.synchronize();
n_modules = static_cast<unsigned int>(uniques_end - uniques.begin());
}

Expand All @@ -115,12 +121,10 @@ finding_algorithm<stepper_t, navigator_t>::operator()(

measurement_collection_types::device uniques(uniques_buffer);

thrust::upper_bound(
thrust::cuda::par(std::pmr::polymorphic_allocator(&(m_mr.main)))
.on(stream),
measurements.ptr(), measurements.ptr() + n_measurements,
uniques.begin(), uniques.begin() + n_modules, upper_bounds.begin(),
measurement_sort_comp());
thrust::upper_bound(thrust_policy, measurements.ptr(),
measurements.ptr() + n_measurements,
uniques.begin(), uniques.begin() + n_modules,
upper_bounds.begin(), measurement_sort_comp());
}

/*****************************************************************
Expand Down Expand Up @@ -285,12 +289,9 @@ finding_algorithm<stepper_t, navigator_t>::operator()(
keys_buffer);
vecmem::device_vector<unsigned int> param_ids_device(
param_ids_buffer);
thrust::sort_by_key(
thrust::cuda::par(
std::pmr::polymorphic_allocator(&(m_mr.main)))
.on(stream),
keys_device.begin(), keys_device.end(),
param_ids_device.begin());
thrust::sort_by_key(thrust_policy, keys_device.begin(),
keys_device.end(),
param_ids_device.begin());

m_stream.synchronize();
}
Expand Down Expand Up @@ -338,10 +339,8 @@ finding_algorithm<stepper_t, navigator_t>::operator()(
vecmem::device_vector<candidate_link> out(
*(links_buffer.host_ptr() + it));

thrust::copy(
thrust::cuda::par(std::pmr::polymorphic_allocator(&(m_mr.main)))
.on(stream),
in.begin(), in.begin() + n_candidates_per_step[it], out.begin());
thrust::copy(thrust_policy, in.begin(),
in.begin() + n_candidates_per_step[it], out.begin());
}

/*****************************************************************
Expand Down
9 changes: 5 additions & 4 deletions device/cuda/src/fitting/fitting_algorithm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,11 @@ track_state_container_types::buffer fitting_algorithm<fitter_t>::operator()(
vecmem::device_vector<device::sort_key> keys_device(keys_buffer);
vecmem::device_vector<unsigned int> param_ids_device(param_ids_buffer);

thrust::sort_by_key(
thrust::cuda::par(std::pmr::polymorphic_allocator(&m_mr.main))
.on(stream),
keys_device.begin(), keys_device.end(), param_ids_device.begin());
thrust::sort_by_key(thrust::cuda::par_nosync(
std::pmr::polymorphic_allocator(&m_mr.main))
.on(stream),
keys_device.begin(), keys_device.end(),
param_ids_device.begin());

// Run the track fitting
kernels::fit<fitter_t><<<nBlocks, nThreads, 0, stream>>>(
Expand Down
Loading