Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add helper class for thread and block identifiers #596

Merged
merged 1 commit into from
Aug 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions device/alpaka/include/traccc/alpaka/utils/thread_id.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
/**
* traccc library, part of the ACTS project (R&D line)
*
* (c) 2024 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/

#pragma once

#include <alpaka/alpaka.hpp>

#include "traccc/definitions/qualifiers.hpp"

namespace traccc::alpaka {
template <typename Acc>
struct thread_id1 {
TRACCC_DEVICE thread_id1(const Acc& acc) : m_acc(acc) {}

auto inline TRACCC_DEVICE getLocalThreadId() const {
return ::alpaka::getIdx<::alpaka::Block, ::alpaka::Threads>(m_acc)[0u];
}

auto inline TRACCC_DEVICE getLocalThreadIdX() const {
return getLocalThreadId();
}

auto inline TRACCC_DEVICE getGlobalThreadId() const {
return getLocalThreadId() + getBlockIdX() * getBlockDimX();
}

auto inline TRACCC_DEVICE getGlobalThreadIdX() const {
return getLocalThreadId() + getBlockIdX() * getBlockDimX();
}

auto inline TRACCC_DEVICE getBlockIdX() const {
return ::alpaka::getIdx<::alpaka::Grid, ::alpaka::Blocks>(m_acc)[0u];
}

auto inline TRACCC_DEVICE getBlockDimX() const {
return ::alpaka::getWorkDiv<::alpaka::Block, ::alpaka::Threads>(
m_acc)[0u];
}

auto inline TRACCC_DEVICE getGridDimX() const {
return ::alpaka::getWorkDiv<::alpaka::Grid, ::alpaka::Blocks>(
m_acc)[0u];
}

private:
const Acc& m_acc;
};
} // namespace traccc::alpaka
18 changes: 7 additions & 11 deletions device/alpaka/src/clusterization/clusterization_algorithm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
#include "../utils/utils.hpp"

// Project include(s)
#include "traccc/alpaka/utils/thread_id.hpp"
#include "traccc/clusterization/clustering_config.hpp"
#include "traccc/clusterization/device/ccl_kernel.hpp"

Expand All @@ -35,12 +36,7 @@ struct CCLKernel {
measurement_collection_types::view measurements_view,
vecmem::data::vector_view<unsigned int> cell_links) const {

auto const localThreadIdx =
::alpaka::getIdx<::alpaka::Block, ::alpaka::Threads>(acc)[0u];
auto const localBlockIdx =
::alpaka::getIdx<::alpaka::Grid, ::alpaka::Blocks>(acc)[0u];
auto const blockExtent =
::alpaka::getWorkDiv<::alpaka::Block, ::alpaka::Threads>(acc)[0u];
traccc::alpaka::thread_id1 thread_id(acc);

auto& partition_start =
::alpaka::declareSharedVar<std::size_t, __COUNTER__>(acc);
Expand All @@ -60,11 +56,11 @@ struct CCLKernel {

alpaka::barrier<TAcc> barry_r(&acc);

device::ccl_kernel(
cfg, localThreadIdx, blockExtent, localBlockIdx, cells_view,
modules_view, partition_start, partition_end, outi, f_view, gf_view,
f_backup_view, gf_backup_view, adjc_backup_view, adjv_backup_view,
backup_mutex, barry_r, measurements_view, cell_links);
device::ccl_kernel(cfg, thread_id, cells_view, modules_view,
partition_start, partition_end, outi, f_view,
gf_view, f_backup_view, gf_backup_view,
adjc_backup_view, adjv_backup_view, backup_mutex,
barry_r, measurements_view, cell_links);
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "traccc/definitions/hints.hpp"
#include "traccc/definitions/qualifiers.hpp"
#include "traccc/device/concepts/barrier.hpp"
#include "traccc/device/concepts/thread_id.hpp"
#include "traccc/edm/cell.hpp"
#include "traccc/edm/measurement.hpp"
#include "traccc/edm/spacepoint.hpp"
Expand All @@ -29,9 +30,7 @@ namespace traccc::device {
/// Function which reads raw detector cells and turns them into measurements.
///
/// @param[in] cfg clustering configuration
/// @param[in] threadId current thread index
/// @param[in] blckDim current thread block size
/// @param[in] blckId current thread block index
/// @param[in] thread_id a thread identifier object
/// @param[in] cells_view collection of cells
/// @param[in] modules_view collection of modules to which the cells are linked
/// @param partition_start partition start point for this thread block
Expand All @@ -54,10 +53,10 @@ namespace traccc::device {
/// @param[out] measurements_view collection of measurements
/// @param[out] cell_links collection of links to measurements each cell is
/// put into
template <device::concepts::barrier barrier_t>
template <device::concepts::barrier barrier_t,
device::concepts::thread_id1 thread_id_t>
TRACCC_DEVICE inline void ccl_kernel(
const clustering_config cfg, details::index_t threadId,
details::index_t blckDim, unsigned int blockId,
const clustering_config cfg, const thread_id_t& thread_id,
const cell_collection_types::const_view cells_view,
const cell_module_collection_types::const_view modules_view,
std::size_t& partition_start, std::size_t& partition_end, std::size_t& outi,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
#include "traccc/clusterization/device/aggregate_cluster.hpp"
#include "traccc/clusterization/device/ccl_kernel_definitions.hpp"
#include "traccc/clusterization/device/reduce_problem_cell.hpp"
#include "traccc/device/concepts/barrier.hpp"
#include "traccc/device/concepts/thread_id.hpp"
#include "traccc/device/mutex.hpp"
#include "traccc/device/unique_lock.hpp"
#include "traccc/edm/cell.hpp"
Expand Down Expand Up @@ -40,13 +42,13 @@ namespace traccc::device {
/// iteration.
/// @param[in] barrier A generic object for block-wide synchronisation
///
template <device::concepts::barrier barrier_t>
TRACCC_DEVICE void fast_sv_1(vecmem::device_vector<details::index_t>& f,
template <device::concepts::barrier barrier_t,
device::concepts::thread_id1 thread_id_t>
TRACCC_DEVICE void fast_sv_1(const thread_id_t& thread_id,
vecmem::device_vector<details::index_t>& f,
vecmem::device_vector<details::index_t>& gf,
unsigned char* adjc, details::index_t* adjv,
details::index_t thread_cell_count,
const details::index_t tid,
const details::index_t blckDim,
barrier_t& barrier) {
/*
* The algorithm finishes if an iteration leaves the arrays unchanged.
Expand All @@ -70,7 +72,8 @@ TRACCC_DEVICE void fast_sv_1(vecmem::device_vector<details::index_t>& f,
* together.
*/
for (details::index_t tst = 0; tst < thread_cell_count; ++tst) {
const details::index_t cid = tst * blckDim + tid;
const details::index_t cid =
tst * thread_id.getBlockDimX() + thread_id.getLocalThreadIdX();

TRACCC_ASSUME(adjc[tst] <= 8);
for (unsigned char k = 0; k < adjc[tst]; ++k) {
Expand All @@ -90,7 +93,8 @@ TRACCC_DEVICE void fast_sv_1(vecmem::device_vector<details::index_t>& f,
barrier.blockBarrier();

for (details::index_t tst = 0; tst < thread_cell_count; ++tst) {
const details::index_t cid = tst * blckDim + tid;
const details::index_t cid =
tst * thread_id.getBlockDimX() + thread_id.getLocalThreadIdX();
/*
* The second stage is shortcutting, which is an optimisation that
* allows us to look at any shortcuts in the cluster IDs that we
Expand All @@ -107,7 +111,8 @@ TRACCC_DEVICE void fast_sv_1(vecmem::device_vector<details::index_t>& f,
barrier.blockBarrier();

for (details::index_t tst = 0; tst < thread_cell_count; ++tst) {
const details::index_t cid = tst * blckDim + tid;
const details::index_t cid =
tst * thread_id.getBlockDimX() + thread_id.getLocalThreadIdX();
/*
* Update the array for the next generation, keeping track of any
* changes we make.
Expand All @@ -128,11 +133,11 @@ TRACCC_DEVICE void fast_sv_1(vecmem::device_vector<details::index_t>& f,
} while (barrier.blockOr(gf_changed));
}

template <device::concepts::barrier barrier_t>
template <device::concepts::barrier barrier_t,
device::concepts::thread_id1 thread_id_t>
TRACCC_DEVICE inline void ccl_core(
const details::index_t threadId, const details::index_t blckDim,
std::size_t& partition_start, std::size_t& partition_end,
vecmem::device_vector<details::index_t> f,
const thread_id_t& thread_id, std::size_t& partition_start,
std::size_t& partition_end, vecmem::device_vector<details::index_t> f,
vecmem::device_vector<details::index_t> gf,
vecmem::data::vector_view<unsigned int> cell_links, details::index_t* adjv,
unsigned char* adjc, const cell_collection_types::const_device cells_device,
Expand All @@ -145,20 +150,23 @@ TRACCC_DEVICE inline void ccl_core(
assert(size <= gf.size());

details::index_t thread_cell_count =
(size - threadId + blckDim - 1) / blckDim;
(size - thread_id.getLocalThreadIdX() + thread_id.getBlockDimX() - 1) /
thread_id.getBlockDimX();

for (details::index_t tst = 0; tst < thread_cell_count; ++tst) {
/*
* Look for adjacent cells to the current one.
*/
const details::index_t cid = tst * blckDim + threadId;
const details::index_t cid =
tst * thread_id.getBlockDimX() + thread_id.getLocalThreadIdX();
adjc[tst] = 0;
reduce_problem_cell(cells_device, cid, partition_start, partition_end,
adjc[tst], &adjv[8 * tst]);
}

for (details::index_t tst = 0; tst < thread_cell_count; ++tst) {
const details::index_t cid = tst * blckDim + threadId;
const details::index_t cid =
tst * thread_id.getBlockDimX() + thread_id.getLocalThreadIdX();
/*
* At the start, the values of f and gf should be equal to the
* ID of the cell.
Expand All @@ -177,12 +185,13 @@ TRACCC_DEVICE inline void ccl_core(
* Run FastSV algorithm, which will update the father index to that of
* the cell belonging to the same cluster with the lowest index.
*/
fast_sv_1(f, gf, adjc, adjv, thread_cell_count, threadId, blckDim, barrier);
fast_sv_1(thread_id, f, gf, adjc, adjv, thread_cell_count, barrier);

barrier.blockBarrier();

for (details::index_t tst = 0; tst < thread_cell_count; ++tst) {
const details::index_t cid = tst * blckDim + threadId;
const details::index_t cid =
tst * thread_id.getBlockDimX() + thread_id.getLocalThreadIdX();
if (f.at(cid) == cid) {
// Add a new measurement to the output buffer. Remembering its
// position inside of the container.
Expand All @@ -196,10 +205,10 @@ TRACCC_DEVICE inline void ccl_core(
}
}

template <device::concepts::barrier barrier_t>
template <device::concepts::barrier barrier_t,
device::concepts::thread_id1 thread_id_t>
TRACCC_DEVICE inline void ccl_kernel(
const clustering_config cfg, const details::index_t threadId,
const details::index_t blckDim, const unsigned int blockId,
const clustering_config cfg, const thread_id_t& thread_id,
const cell_collection_types::const_view cells_view,
const cell_module_collection_types::const_view modules_view,
std::size_t& partition_start, std::size_t& partition_end, std::size_t& outi,
Expand Down Expand Up @@ -237,8 +246,9 @@ TRACCC_DEVICE inline void ccl_kernel(
* (to a later point in the array); start and end may be moved different
* amounts.
*/
if (threadId == 0) {
std::size_t start = blockId * cfg.target_partition_size();
if (thread_id.getLocalThreadIdX() == 0) {
std::size_t start =
thread_id.getBlockIdX() * cfg.target_partition_size();
assert(start < num_cells);
std::size_t end =
std::min(num_cells, start + cfg.target_partition_size());
Expand Down Expand Up @@ -313,24 +323,26 @@ TRACCC_DEVICE inline void ccl_kernel(
* rare edge case.
*/
if (size > cfg.max_partition_size()) {
if (threadId == 0) {
if (thread_id.getLocalThreadIdX() == 0) {
lock.lock();
}

barrier.blockBarrier();

adjc = adjc_backup.data() + (threadId * cfg.max_cells_per_thread *
cfg.backup_size_multiplier);
adjv = adjv_backup.data() + (threadId * 8 * cfg.max_cells_per_thread *
cfg.backup_size_multiplier);
adjc = adjc_backup.data() +
(thread_id.getLocalThreadIdX() * cfg.max_cells_per_thread *
cfg.backup_size_multiplier);
adjv = adjv_backup.data() +
(thread_id.getLocalThreadIdX() * 8 * cfg.max_cells_per_thread *
cfg.backup_size_multiplier);
use_scratch = true;
} else {
adjc = _adjc;
adjv = _adjv;
use_scratch = false;
}

ccl_core(threadId, blckDim, partition_start, partition_end,
ccl_core(thread_id, partition_start, partition_end,
use_scratch ? f_backup : f_primary,
use_scratch ? gf_backup : gf_primary, cell_links, adjv, adjc,
cells_device, modules_device, measurements_device, barrier);
Expand Down
68 changes: 68 additions & 0 deletions device/common/include/traccc/device/concepts/thread_id.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
/**
* traccc library, part of the ACTS project (R&D line)
*
* (c) 2024 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/

#pragma once

#include <concepts>

namespace traccc::device::concepts {
/**
* @brief Concept to ensure that a type behaves like a thread identification
* type which allows us to access thread and block IDs. This concept assumes
* one-dimensional grids.
*
* @tparam T The thread identifier-like type.
*/
template <typename T>
concept thread_id1 = requires(T& i) {
/*
* This function should return the local thread identifier in a *flat* way,
* e.g. compressing two or three dimensional blocks into one dimension.
*/
{ i.getLocalThreadId() }
->std::integral;

/*
* This function should return the local thread identifier in the X-axis.
*/
{ i.getLocalThreadIdX() }
->std::integral;

/*
* This function should return the global thread identifier in a *flat*
* way, e.g. compressing two or three dimensional blocks into one
* dimension.
*/
{ i.getGlobalThreadId() }
->std::integral;

/*
* This function should return the global thread identifier in the X-axis.
*/
{ i.getGlobalThreadIdX() }
->std::integral;

/*
* This function should return the block identifier in the X-axis.
*/
{ i.getBlockIdX() }
->std::integral;

/*
* This function should return the block size in the X-axis.
*/
{ i.getBlockIdX() }
->std::integral;

/*
* This function should return the grid identifier in the X-axis.
*/
{ i.getBlockIdX() }
->std::integral;
};
} // namespace traccc::device::concepts
Loading
Loading