Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

VecMem Update, main branch (2024.10.29.) #757

Merged
merged 2 commits into from
Oct 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions benchmarks/cuda/toy_detector_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,15 +111,19 @@ BENCHMARK_F(ToyDetectorBenchmark, CUDA)(benchmark::State& state) {
traccc::spacepoint_collection_types::buffer spacepoints_cuda_buffer(
static_cast<unsigned int>(spacepoints_per_event.size()),
mr.main);
async_copy.setup(spacepoints_cuda_buffer)->ignore();
async_copy(vecmem::get_data(spacepoints_per_event),
spacepoints_cuda_buffer);
spacepoints_cuda_buffer)
->ignore();

traccc::measurement_collection_types::buffer
measurements_cuda_buffer(
static_cast<unsigned int>(measurements_per_event.size()),
mr.main);
async_copy.setup(measurements_cuda_buffer)->ignore();
async_copy(vecmem::get_data(measurements_per_event),
measurements_cuda_buffer);
measurements_cuda_buffer)
->ignore();

// Run seeding
traccc::seed_collection_types::buffer seeds_cuda_buffer =
Expand Down
4 changes: 2 additions & 2 deletions device/alpaka/src/utils/make_prefix_sum_buff.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
/** TRACCC library, part of the ACTS project (R&D line)
*
* (c) 2023 CERN for the benefit of the ACTS project
* (c) 2023-2024 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/
Expand Down Expand Up @@ -39,7 +39,7 @@ vecmem::data::vector_buffer<device::prefix_sum_element_t> make_prefix_sum_buff(
// Create buffer and view objects for prefix sum vector
vecmem::data::vector_buffer<device::prefix_sum_element_t> prefix_sum_buff(
totalSize, mr.main);
copy.setup(prefix_sum_buff);
copy.setup(prefix_sum_buff)->wait();
auto data_prefix_sum_buff = vecmem::get_data(prefix_sum_buff);

// Setup Alpaka
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,17 @@ container_d2h_copy_alg<CONTAINER_TYPES>::operator()(input_type input) const {
[](const auto& view) { return view.capacity(); });
typename CONTAINER_TYPES::buffer hostBuffer{{size, *host_mr},
{capacities, *host_mr}};
m_hostCopy.setup(hostBuffer.headers);
m_hostCopy.setup(hostBuffer.items);
vecmem::copy::event_type host_header_setup_event =
m_hostCopy.setup(hostBuffer.headers);
vecmem::copy::event_type host_item_setup_event =
m_hostCopy.setup(hostBuffer.items);
host_header_setup_event->wait();
host_item_setup_event->wait();

// Copy the device container into this temporary host buffer.
vecmem::copy::event_type header_event = m_deviceCopy(
vecmem::copy::event_type device_header_copy_event = m_deviceCopy(
input.headers, hostBuffer.headers, vecmem::copy::type::device_to_host);
vecmem::copy::event_type item_event = m_deviceCopy(
vecmem::copy::event_type device_item_copy_event = m_deviceCopy(
input.items, hostBuffer.items, vecmem::copy::type::device_to_host);

// Create the result object, giving it the appropriate memory resource for
Expand All @@ -59,12 +63,16 @@ container_d2h_copy_alg<CONTAINER_TYPES>::operator()(input_type input) const {
}

// Wait for the D->H copies to finish.
header_event->wait();
item_event->wait();
device_header_copy_event->wait();
device_item_copy_event->wait();

// Perform the H->H copy.
m_hostCopy(hostBuffer.headers, result.get_headers())->wait();
m_hostCopy(hostBuffer.items, result.get_items())->wait();
vecmem::copy::event_type host_header_copy_event =
m_hostCopy(hostBuffer.headers, result.get_headers());
vecmem::copy::event_type host_item_copy_event =
m_hostCopy(hostBuffer.items, result.get_items());
host_header_copy_event->wait();
host_item_copy_event->wait();

// Return the host object.
return result;
Expand Down
51 changes: 36 additions & 15 deletions device/common/include/traccc/device/impl/container_h2d_copy_alg.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,20 @@ container_h2d_copy_alg<CONTAINER_TYPES>::operator()(input_type input) const {
// Create the output buffer with the correct sizes.
output_type result{{static_cast<header_size_type>(sizes.size()), m_mr.main},
{sizes, m_mr.main, m_mr.host}};
m_deviceCopy.setup(result.headers);
m_deviceCopy.setup(result.items);
vecmem::copy::event_type header_setup_event =
m_deviceCopy.setup(result.headers);
vecmem::copy::event_type items_setup_event =
m_deviceCopy.setup(result.items);
header_setup_event->wait();
items_setup_event->wait();

// Copy data straight into it.
m_deviceCopy(input.headers, result.headers,
vecmem::copy::type::host_to_device);
m_deviceCopy(input.items, result.items, vecmem::copy::type::host_to_device);
vecmem::copy::event_type header_copy_event = m_deviceCopy(
input.headers, result.headers, vecmem::copy::type::host_to_device);
vecmem::copy::event_type items_copy_event = m_deviceCopy(
input.items, result.items, vecmem::copy::type::host_to_device);
header_copy_event->wait();
items_copy_event->wait();

// Return the created buffer.
return result;
Expand All @@ -56,23 +63,37 @@ container_h2d_copy_alg<CONTAINER_TYPES>::operator()(
// Create/set the host buffer.
hostBuffer =
typename CONTAINER_TYPES::buffer{{size, *host_mr}, {sizes, *host_mr}};
m_hostCopy.setup(hostBuffer.headers);
m_hostCopy.setup(hostBuffer.items);
vecmem::copy::event_type host_header_setup_event =
m_hostCopy.setup(hostBuffer.headers);
vecmem::copy::event_type host_items_setup_event =
m_hostCopy.setup(hostBuffer.items);
host_header_setup_event->wait();
host_items_setup_event->wait();

// Copy the data into the host buffer.
m_hostCopy(input.headers, hostBuffer.headers);
m_hostCopy(input.items, hostBuffer.items);
vecmem::copy::event_type host_header_copy_event =
m_hostCopy(input.headers, hostBuffer.headers);
vecmem::copy::event_type host_items_copy_event =
m_hostCopy(input.items, hostBuffer.items);
host_header_copy_event->wait();
host_items_copy_event->wait();

// Create the output buffer with the correct sizes.
output_type result{{size, m_mr.main}, {sizes, m_mr.main, m_mr.host}};
m_deviceCopy.setup(result.headers);
m_deviceCopy.setup(result.items);
vecmem::copy::event_type device_header_setup_event =
m_deviceCopy.setup(result.headers);
vecmem::copy::event_type device_items_setup_event =
m_deviceCopy.setup(result.items);
device_header_setup_event->wait();
device_items_setup_event->wait();

// Copy data from the host buffer into the device/result buffer.
m_deviceCopy(hostBuffer.headers, result.headers,
vecmem::copy::type::host_to_device);
m_deviceCopy(hostBuffer.items, result.items,
vecmem::copy::type::host_to_device);
vecmem::copy::event_type device_header_copy_event = m_deviceCopy(
hostBuffer.headers, result.headers, vecmem::copy::type::host_to_device);
vecmem::copy::event_type device_items_copy_event = m_deviceCopy(
hostBuffer.items, result.items, vecmem::copy::type::host_to_device);
device_header_copy_event->wait();
device_items_copy_event->wait();

// Return the created buffer.
return result;
Expand Down
2 changes: 1 addition & 1 deletion device/common/src/make_prefix_sum_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ prefix_sum_buffer_t make_prefix_sum_buffer(
// Create buffer and view objects
vecmem::data::vector_buffer<prefix_sum_size_t> sizes_sum_buff(
static_cast<unsigned int>(sizes_sum.size()), mr.main);
copy.setup(sizes_sum_buff);
copy.setup(sizes_sum_buff)->ignore();
(copy)(vecmem::get_data(sizes_sum), sizes_sum_buff)->wait();
vecmem::data::vector_view<prefix_sum_size_t> sizes_sum_view(
sizes_sum_buff);
Expand Down
4 changes: 2 additions & 2 deletions device/cuda/src/fitting/fitting_algorithm.cu
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ track_state_container_types::buffer fitting_algorithm<fitter_t>::operator()(
{candidate_sizes, m_mr.main, m_mr.host,
vecmem::data::buffer_type::resizable}};

m_copy.setup(track_states_buffer.headers);
m_copy.setup(track_states_buffer.items);
m_copy.setup(track_states_buffer.headers)->ignore();
m_copy.setup(track_states_buffer.items)->ignore();

// Calculate the number of threads and thread blocks to run the track
// fitting
Expand Down
4 changes: 2 additions & 2 deletions device/cuda/src/utils/make_prefix_sum_buff.cu
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ vecmem::data::vector_buffer<device::prefix_sum_element_t> make_prefix_sum_buff(
// Create buffer and view objects for prefix sum vector
vecmem::data::vector_buffer<device::prefix_sum_element_t> prefix_sum_buff(
totalSize, mr.main);
copy.setup(prefix_sum_buff);
copy.setup(prefix_sum_buff)->wait();

// Fill the prefix sum vector
static const unsigned int threadsPerBlock = 32;
Expand Down Expand Up @@ -76,7 +76,7 @@ vecmem::data::vector_buffer<device::prefix_sum_element_t> make_prefix_sum_buff(
// Create buffer and view objects for prefix sum vector
vecmem::data::vector_buffer<device::prefix_sum_element_t> prefix_sum_buff(
totalSize, mr.main);
copy.setup(prefix_sum_buff);
copy.setup(prefix_sum_buff)->ignore();

// Fill the prefix sum vector
static const unsigned int threadsPerBlock = 32;
Expand Down
8 changes: 4 additions & 4 deletions device/kokkos/src/seeding/spacepoint_binning.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ spacepoint_binning::output_type spacepoint_binning::operator()(
const std::size_t grid_bins = m_axes.first.n_bins * m_axes.second.n_bins;
vecmem::data::vector_buffer<unsigned int> grid_capacities_buff(
static_cast<unsigned int>(grid_bins), m_mr.main);
m_copy->setup(grid_capacities_buff);
m_copy->memset(grid_capacities_buff, 0);
m_copy->setup(grid_capacities_buff)->wait();
m_copy->memset(grid_capacities_buff, 0)->wait();
vecmem::data::vector_view<unsigned int> grid_capacities_view =
grid_capacities_buff;

Expand Down Expand Up @@ -68,15 +68,15 @@ spacepoint_binning::output_type spacepoint_binning::operator()(
// Copy grid capacities back to the host
vecmem::vector<unsigned int> grid_capacities_host(m_mr.host ? m_mr.host
: &(m_mr.main));
(*m_copy)(grid_capacities_buff, grid_capacities_host);
(*m_copy)(grid_capacities_buff, grid_capacities_host)->wait();

// Create the grid buffer.
sp_grid_buffer grid_buffer(
m_axes.first, m_axes.second,
std::vector<std::size_t>(grid_capacities_host.begin(),
grid_capacities_host.end()),
m_mr.main, m_mr.host, vecmem::data::buffer_type::resizable);
m_copy->setup(grid_buffer._buffer);
m_copy->setup(grid_buffer._buffer)->wait();
sp_grid_view grid_view = grid_buffer;

// Populate the grid.
Expand Down
2 changes: 1 addition & 1 deletion device/kokkos/src/utils/make_prefix_sum_buff.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ vecmem::data::vector_buffer<device::prefix_sum_element_t> make_prefix_sum_buff(
// Create buffer and view objects for prefix sum vector
vecmem::data::vector_buffer<device::prefix_sum_element_t> prefix_sum_buff(
totalSize, mr.main);
copy.setup(prefix_sum_buff);
copy.setup(prefix_sum_buff)->wait();

// Fill the prefix sum vector
// kernels::fill_prefix_sum<<<(sizes_sum_view.size() / 32) + 1, 32>>>(
Expand Down
8 changes: 6 additions & 2 deletions device/sycl/src/clusterization/clusterization_algorithm.sycl
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,8 @@ clusterization_algorithm::output_type clusterization_algorithm::operator()(
// Create the result object, overestimating the number of measurements.
measurement_collection_types::buffer measurements{
num_cells, m_mr.main, vecmem::data::buffer_type::resizable};
m_copy.get().setup(measurements);
vecmem::copy::event_type measurements_setup_event =
m_copy.get().setup(measurements);
measurement_collection_types::view measurements_view(measurements);

// If there are no cells, return right away.
Expand Down Expand Up @@ -103,7 +104,8 @@ clusterization_algorithm::output_type clusterization_algorithm::operator()(
// debugging.
//
vecmem::data::vector_buffer<unsigned int> cell_links(num_cells, m_mr.main);
m_copy.get().setup(cell_links)->wait();
vecmem::copy::event_type cell_links_setup_event =
m_copy.get().setup(cell_links);
vecmem::data::vector_view<unsigned int> cell_links_view(cell_links);

using vector_size_t =
Expand All @@ -115,6 +117,8 @@ clusterization_algorithm::output_type clusterization_algorithm::operator()(
device::details::CELLS_PER_THREAD_STACK_LIMIT);

// Run ccl kernel
measurements_setup_event->wait();
cell_links_setup_event->wait();
details::get_queue(m_queue)
.submit([&](::sycl::handler& h) {
// Allocate shared memory for the kernel.
Expand Down
8 changes: 6 additions & 2 deletions device/sycl/src/fitting/fitting_algorithm.sycl
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,10 @@ track_state_container_types::buffer fitting_algorithm<fitter_t>::operator()(
{candidate_sizes, m_mr.main, m_mr.host,
vecmem::data::buffer_type::resizable}};

m_copy->setup(track_states_buffer.headers);
m_copy->setup(track_states_buffer.items);
vecmem::copy::event_type track_states_headers_setup_event =
m_copy->setup(track_states_buffer.headers);
vecmem::copy::event_type track_states_items_setup_event =
m_copy->setup(track_states_buffer.items);

track_state_container_types::view track_states_view(track_states_buffer);

Expand Down Expand Up @@ -113,6 +115,8 @@ track_state_container_types::buffer fitting_algorithm<fitter_t>::operator()(
keys_device.begin(), keys_device.end(),
param_ids_device.begin());

track_states_headers_setup_event->wait();
track_states_items_setup_event->wait();
details::get_queue(m_queue)
.submit([&](::sycl::handler& h) {
h.parallel_for<kernels::fit>(
Expand Down
4 changes: 3 additions & 1 deletion device/sycl/src/seeding/spacepoint_formation_algorithm.sycl
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ spacepoint_formation_algorithm<detector_t>::operator()(
// Create the result buffer.
spacepoint_collection_types::buffer spacepoints_buffer(
num_measurements, m_mr.main, vecmem::data::buffer_type::resizable);
m_copy.get().setup(spacepoints_buffer);
vecmem::copy::event_type spacepoints_setup_event =
m_copy.get().setup(spacepoints_buffer);

// If there are no measurements, we can conclude here.
if (num_measurements == 0) {
Expand All @@ -53,6 +54,7 @@ spacepoint_formation_algorithm<detector_t>::operator()(
auto measCountRange =
traccc::sycl::calculate1DimNdRange(num_measurements, measLocalSize);

spacepoints_setup_event->wait();
details::get_queue(m_queue)
.submit([&](::sycl::handler& h) {
h.parallel_for(
Expand Down
7 changes: 4 additions & 3 deletions examples/run/alpaka/seeding_example_alpaka.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,7 +195,8 @@ int seq_run(const traccc::opts::track_seeding& seeding_opts,
static_cast<unsigned int>(spacepoints_per_event.size()),
mr.main);
copy(vecmem::get_data(spacepoints_per_event),
spacepoints_alpaka_buffer);
spacepoints_alpaka_buffer)
->wait();

{
traccc::performance::timer t("Seeding (alpaka)", elapsedTimes);
Expand Down Expand Up @@ -242,8 +243,8 @@ int seq_run(const traccc::opts::track_seeding& seeding_opts,
// Copy the seeds to the host for comparisons
traccc::seed_collection_types::host seeds_alpaka;
traccc::bound_track_parameters_collection_types::host params_alpaka;
copy(seeds_alpaka_buffer, seeds_alpaka);
copy(params_alpaka_buffer, params_alpaka);
copy(seeds_alpaka_buffer, seeds_alpaka)->wait();
copy(params_alpaka_buffer, params_alpaka)->wait();

if (accelerator_opts.compare_with_cpu) {
// Show which event we are currently presenting the results for.
Expand Down
4 changes: 2 additions & 2 deletions examples/run/alpaka/seq_example_alpaka.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ int seq_run(const traccc::opts::detector& detector_opts,
static_cast<traccc::silicon_detector_description::buffer::size_type>(
host_det_descr.size()),
mr.main};
copy(host_det_descr_data, device_det_descr);
copy(host_det_descr_data, device_det_descr)->wait();

// Construct a Detray detector object, if supported by the configuration.
traccc::default_detector::host host_detector{host_mr};
Expand Down Expand Up @@ -187,7 +187,7 @@ int seq_run(const traccc::opts::detector& detector_opts,
// Create device copy of input collections
traccc::edm::silicon_cell_collection::buffer cells_buffer(
static_cast<unsigned int>(cells_per_event.size()), mr.main);
copy(vecmem::get_data(cells_per_event), cells_buffer);
copy(vecmem::get_data(cells_per_event), cells_buffer)->wait();

// Alpaka
{
Expand Down
8 changes: 6 additions & 2 deletions examples/run/cuda/seeding_example_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,15 +245,19 @@ int seq_run(const traccc::opts::track_seeding& seeding_opts,
traccc::spacepoint_collection_types::buffer spacepoints_cuda_buffer(
static_cast<unsigned int>(spacepoints_per_event.size()),
mr.main);
async_copy.setup(spacepoints_cuda_buffer)->wait();
async_copy(vecmem::get_data(spacepoints_per_event),
spacepoints_cuda_buffer);
spacepoints_cuda_buffer)
->wait();

traccc::measurement_collection_types::buffer
measurements_cuda_buffer(
static_cast<unsigned int>(measurements_per_event.size()),
mr.main);
async_copy.setup(measurements_cuda_buffer)->wait();
async_copy(vecmem::get_data(measurements_per_event),
measurements_cuda_buffer);
measurements_cuda_buffer)
->wait();

{
traccc::performance::timer t("Seeding (cuda)", elapsedTimes);
Expand Down
6 changes: 4 additions & 2 deletions examples/run/cuda/seq_example_cuda.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ int seq_run(const traccc::opts::detector& detector_opts,
static_cast<traccc::silicon_detector_description::buffer::size_type>(
host_det_descr.size()),
device_mr};
copy(host_det_descr_data, device_det_descr);
copy.setup(device_det_descr)->wait();
copy(host_det_descr_data, device_det_descr)->wait();

// Construct a Detray detector object, if supported by the configuration.
traccc::default_detector::host host_detector{host_mr};
Expand Down Expand Up @@ -234,7 +235,8 @@ int seq_run(const traccc::opts::detector& detector_opts,
// Create device copy of input collections
traccc::edm::silicon_cell_collection::buffer cells_buffer(
static_cast<unsigned int>(cells_per_event.size()), mr.main);
copy(vecmem::get_data(cells_per_event), cells_buffer);
copy.setup(cells_buffer)->wait();
copy(vecmem::get_data(cells_per_event), cells_buffer)->wait();

// CUDA
{
Expand Down
Loading
Loading