Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use caching allocators in Thrust #835

Merged
merged 1 commit into from
Feb 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// Project include(s).
#include "traccc/edm/measurement.hpp"
#include "traccc/utils/algorithm.hpp"
#include "traccc/utils/memory_resource.hpp"

// VecMem include(s).
#include <vecmem/utils/copy.hpp>
Expand Down Expand Up @@ -40,7 +41,8 @@ class measurement_sorting_algorithm
/// @param copy The copy object to use in the algorithm
/// @param str The CUDA stream to schedule the measurement sorting in
///
measurement_sorting_algorithm(vecmem::copy& copy, stream& str);
measurement_sorting_algorithm(const traccc::memory_resource& mr,
vecmem::copy& copy, stream& str);

/// Callable operator performing the sorting on a container
///
Expand All @@ -50,6 +52,8 @@ class measurement_sorting_algorithm
measurements_view) const override;

private:
/// The memory resource(s) to use
traccc::memory_resource m_mr;
/// Copy object to use in the algorithm
std::reference_wrapper<vecmem::copy> m_copy;
/// CUDA stream used by the algorithm
Expand Down
14 changes: 8 additions & 6 deletions device/cuda/src/clusterization/measurement_sorting_algorithm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@

namespace traccc::cuda {

measurement_sorting_algorithm::measurement_sorting_algorithm(vecmem::copy& copy,
stream& str)
: m_copy{copy}, m_stream{str} {}
measurement_sorting_algorithm::measurement_sorting_algorithm(
const traccc::memory_resource& mr, vecmem::copy& copy, stream& str)
: m_mr{mr}, m_copy{copy}, m_stream{str} {}

measurement_sorting_algorithm::output_type
measurement_sorting_algorithm::operator()(
Expand All @@ -32,9 +32,11 @@ measurement_sorting_algorithm::operator()(
m_copy.get().get_size(measurements_view);

// Sort the measurements in place
thrust::sort(thrust::cuda::par.on(stream), measurements_view.ptr(),
measurements_view.ptr() + n_measurements,
measurement_sort_comp());
thrust::sort(
thrust::cuda::par(std::pmr::polymorphic_allocator(&(m_mr.main)))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Strange that you needed to be so very explicit. 🤔 std::pmr::polymoprhic_allocator doesn't put explicit on its constructor for sure. That's how we can create vecmem::vector objects without always writing out the full allocator name in their constructors.

I guess I'll try thrust::cuda::par(&(m_mr.main)).on(stream) tomorrow, to see if that would also work...

.on(stream),
measurements_view.ptr(), measurements_view.ptr() + n_measurements,
measurement_sort_comp());

// Return the view of the sorted measurements.
return measurements_view;
Expand Down
32 changes: 20 additions & 12 deletions device/cuda/src/finding/finding_algorithm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,10 @@ finding_algorithm<stepper_t, navigator_t>::operator()(
measurement_collection_types::device uniques(uniques_buffer);

measurement* uniques_end = thrust::unique_copy(
thrust::cuda::par.on(stream), measurements.ptr(),
measurements.ptr() + n_measurements, uniques.begin(),
measurement_equal_comp());
thrust::cuda::par(std::pmr::polymorphic_allocator(&(m_mr.main)))
.on(stream),
measurements.ptr(), measurements.ptr() + n_measurements,
uniques.begin(), measurement_equal_comp());
n_modules = static_cast<unsigned int>(uniques_end - uniques.begin());
}

Expand All @@ -114,10 +115,12 @@ finding_algorithm<stepper_t, navigator_t>::operator()(

measurement_collection_types::device uniques(uniques_buffer);

thrust::upper_bound(thrust::cuda::par.on(stream), measurements.ptr(),
measurements.ptr() + n_measurements,
uniques.begin(), uniques.begin() + n_modules,
upper_bounds.begin(), measurement_sort_comp());
thrust::upper_bound(
thrust::cuda::par(std::pmr::polymorphic_allocator(&(m_mr.main)))
.on(stream),
measurements.ptr(), measurements.ptr() + n_measurements,
uniques.begin(), uniques.begin() + n_modules, upper_bounds.begin(),
measurement_sort_comp());
}

/*****************************************************************
Expand Down Expand Up @@ -282,9 +285,12 @@ finding_algorithm<stepper_t, navigator_t>::operator()(
keys_buffer);
vecmem::device_vector<unsigned int> param_ids_device(
param_ids_buffer);
thrust::sort_by_key(thrust::cuda::par.on(stream),
keys_device.begin(), keys_device.end(),
param_ids_device.begin());
thrust::sort_by_key(
thrust::cuda::par(
std::pmr::polymorphic_allocator(&(m_mr.main)))
.on(stream),
keys_device.begin(), keys_device.end(),
param_ids_device.begin());

m_stream.synchronize();
}
Expand Down Expand Up @@ -332,8 +338,10 @@ finding_algorithm<stepper_t, navigator_t>::operator()(
vecmem::device_vector<candidate_link> out(
*(links_buffer.host_ptr() + it));

thrust::copy(thrust::cuda::par.on(stream), in.begin(),
in.begin() + n_candidates_per_step[it], out.begin());
thrust::copy(
thrust::cuda::par(std::pmr::polymorphic_allocator(&(m_mr.main)))
.on(stream),
in.begin(), in.begin() + n_candidates_per_step[it], out.begin());
}

/*****************************************************************
Expand Down
7 changes: 5 additions & 2 deletions device/cuda/src/fitting/fitting_algorithm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <thrust/sort.h>

// System include(s).
#include <memory_resource>
#include <vector>

namespace traccc::cuda {
Expand Down Expand Up @@ -110,8 +111,10 @@ track_state_container_types::buffer fitting_algorithm<fitter_t>::operator()(
vecmem::device_vector<device::sort_key> keys_device(keys_buffer);
vecmem::device_vector<unsigned int> param_ids_device(param_ids_buffer);

thrust::sort_by_key(thrust::cuda::par.on(stream), keys_device.begin(),
keys_device.end(), param_ids_device.begin());
thrust::sort_by_key(
thrust::cuda::par(std::pmr::polymorphic_allocator(&m_mr.main))
.on(stream),
keys_device.begin(), keys_device.end(), param_ids_device.begin());

// Run the track fitting
kernels::fit<fitter_t><<<nBlocks, nThreads, 0, stream>>>(
Expand Down
6 changes: 4 additions & 2 deletions examples/run/cuda/full_chain_algorithm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,8 @@ full_chain_algorithm::full_chain_algorithm(
m_detector(detector),
m_clusterization(memory_resource{*m_cached_device_mr, &m_host_mr}, m_copy,
m_stream, clustering_config),
m_measurement_sorting(m_copy, m_stream),
m_measurement_sorting(memory_resource{*m_cached_device_mr, &m_host_mr},
m_copy, m_stream),
m_spacepoint_formation(memory_resource{*m_cached_device_mr, &m_host_mr},
m_copy, m_stream),
m_seeding(finder_config, grid_config, filter_config,
Expand Down Expand Up @@ -111,7 +112,8 @@ full_chain_algorithm::full_chain_algorithm(const full_chain_algorithm& parent)
m_detector(parent.m_detector),
m_clusterization(memory_resource{*m_cached_device_mr, &m_host_mr}, m_copy,
m_stream, parent.m_clustering_config),
m_measurement_sorting(m_copy, m_stream),
m_measurement_sorting(memory_resource{*m_cached_device_mr, &m_host_mr},
m_copy, m_stream),
m_spacepoint_formation(memory_resource{*m_cached_device_mr, &m_host_mr},
m_copy, m_stream),
m_seeding(
Expand Down
2 changes: 1 addition & 1 deletion examples/run/cuda/seq_example_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ int seq_run(const traccc::opts::detector& detector_opts,

traccc::cuda::clusterization_algorithm ca_cuda(mr, copy, stream,
clusterization_opts);
traccc::cuda::measurement_sorting_algorithm ms_cuda(copy, stream);
traccc::cuda::measurement_sorting_algorithm ms_cuda(mr, copy, stream);
device_spacepoint_formation_algorithm sf_cuda(mr, copy, stream);
traccc::cuda::seeding_algorithm sa_cuda(
seeding_opts.seedfinder, {seeding_opts.seedfinder},
Expand Down
Loading