Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add blockwise add and count in CUDA and SYCL #610

Merged
merged 2 commits into from
Jun 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions device/cuda/src/utils/barrier.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,14 @@ struct barrier {
TRACCC_DEVICE
void blockBarrier() { __syncthreads(); }

TRACCC_DEVICE
bool blockAnd(bool predicate) { return __syncthreads_and(predicate); }

TRACCC_DEVICE
bool blockOr(bool predicate) { return __syncthreads_or(predicate); }

TRACCC_DEVICE
int blockCount(bool predicate) { return __syncthreads_count(predicate); }
};

} // namespace traccc::cuda
2 changes: 1 addition & 1 deletion device/sycl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@ traccc_add_library( traccc_sycl sycl TYPE SHARED
"include/traccc/sycl/utils/queue_wrapper.hpp"
"include/traccc/sycl/utils/calculate1DimNdRange.hpp"
"include/traccc/sycl/utils/make_prefix_sum_buff.hpp"
"include/traccc/sycl/utils/barrier.hpp"
# implementation files
"src/clusterization/clusterization_algorithm.sycl"
"src/clusterization/spacepoint_formation_algorithm.sycl"
Expand All @@ -35,6 +34,7 @@ traccc_add_library( traccc_sycl sycl TYPE SHARED
"src/seeding/seeding_algorithm.cpp"
"src/seeding/spacepoint_binning.sycl"
"src/seeding/track_params_estimation.sycl"
"src/utils/barrier.hpp"
"src/utils/get_queue.hpp"
"src/utils/get_queue.sycl"
"src/utils/queue_wrapper.cpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@
*/

// Local include(s).
#include "../utils/barrier.hpp"
#include "../utils/get_queue.hpp"
#include "traccc/sycl/clusterization/clusterization_algorithm.hpp"
#include "traccc/sycl/utils/barrier.hpp"

// Project include(s)
#include "traccc/clusterization/device/ccl_kernel.hpp"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,9 @@
// Project include(s).
#include "traccc/definitions/qualifiers.hpp"

// SYCL includes
#include <CL/sycl.hpp>

namespace traccc::sycl {

struct barrier {
Expand All @@ -18,14 +21,27 @@ struct barrier {
TRACCC_DEVICE
void blockBarrier() { m_item.barrier(); }

TRACCC_DEVICE
bool blockAnd(bool predicate) {
m_item.barrier();
return ::sycl::all_of_group(m_item.get_group(), predicate);
}

TRACCC_DEVICE
bool blockOr(bool predicate) {
m_item.barrier();
return ::sycl::any_of_group(m_item.get_group(), predicate);
}

TRACCC_DEVICE
unsigned int blockCount(bool predicate) {
m_item.barrier();
return ::sycl::reduce_over_group(m_item.get_group(),
predicate ? 1u : 0u, ::sycl::plus<>());
}

private:
::sycl::nd_item<1> m_item;
};

} // namespace traccc::sycl
} // namespace traccc::sycl
1 change: 1 addition & 0 deletions tests/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ traccc_add_test(

# Define the sources for the test.
test_basic.cu
test_barrier.cu
test_cca.cpp
test_ckf_combinatorics_telescope.cpp
test_ckf_toy_detector.cpp
Expand Down
146 changes: 146 additions & 0 deletions tests/cuda/test_barrier.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
/**
* traccc library, part of the ACTS project (R&D line)
*
* (c) 2024 CERN for the benefit of the ACTS project
*
* Mozilla Public License Version 2.0
*/

#include <gtest/gtest.h>

#include <vecmem/memory/cuda/managed_memory_resource.hpp>
#include <vecmem/memory/unique_ptr.hpp>

#include "../../device/cuda/src/utils/barrier.hpp"

__global__ void testBarrierAnd(bool* out) {
traccc::cuda::barrier bar;

bool v;

v = bar.blockAnd(false);
if (threadIdx.x == 0) {
out[0] = v;
}

v = bar.blockAnd(true);
if (threadIdx.x == 0) {
out[1] = v;
}

v = bar.blockAnd(threadIdx.x % 2 == 0);
if (threadIdx.x == 0) {
out[2] = v;
}

v = bar.blockAnd(threadIdx.x < 32);
if (threadIdx.x == 0) {
out[3] = v;
}
}

TEST(CUDABarrier, BarrierAnd) {
vecmem::cuda::managed_memory_resource mr;
constexpr std::size_t n_bools = 4;

vecmem::unique_alloc_ptr<bool[]> out =
vecmem::make_unique_alloc<bool[]>(mr, n_bools);

testBarrierAnd<<<1, 1024>>>(out.get());

ASSERT_EQ(cudaGetLastError(), cudaSuccess);
ASSERT_EQ(cudaDeviceSynchronize(), cudaSuccess);

EXPECT_FALSE(out.get()[0]);
EXPECT_TRUE(out.get()[1]);
EXPECT_FALSE(out.get()[2]);
EXPECT_FALSE(out.get()[3]);
}

__global__ void testBarrierOr(bool* out) {
traccc::cuda::barrier bar;

bool v;

v = bar.blockOr(false);
if (threadIdx.x == 0) {
out[0] = v;
}

v = bar.blockOr(true);
if (threadIdx.x == 0) {
out[1] = v;
}

v = bar.blockOr(threadIdx.x % 2 == 0);
if (threadIdx.x == 0) {
out[2] = v;
}

v = bar.blockOr(threadIdx.x < 32);
if (threadIdx.x == 0) {
out[3] = v;
}
}

TEST(CUDABarrier, BarrierOr) {
vecmem::cuda::managed_memory_resource mr;
constexpr std::size_t n_bools = 4;

vecmem::unique_alloc_ptr<bool[]> out =
vecmem::make_unique_alloc<bool[]>(mr, n_bools);

testBarrierOr<<<1, 1024>>>(out.get());

ASSERT_EQ(cudaGetLastError(), cudaSuccess);
ASSERT_EQ(cudaDeviceSynchronize(), cudaSuccess);

EXPECT_FALSE(out.get()[0]);
EXPECT_TRUE(out.get()[1]);
EXPECT_TRUE(out.get()[2]);
EXPECT_TRUE(out.get()[3]);
}

__global__ void testBarrierCount(int* out) {
traccc::cuda::barrier bar;

int v;

v = bar.blockOr(false);
if (threadIdx.x == 0) {
out[0] = v;
}

v = bar.blockOr(true);
if (threadIdx.x == 0) {
out[1] = v;
}

v = bar.blockOr(threadIdx.x % 2 == 0);
if (threadIdx.x == 0) {
out[2] = v;
}

v = bar.blockOr(threadIdx.x < 32);
if (threadIdx.x == 0) {
out[3] = v;
}
}

TEST(CUDABarrier, BarrierCount) {
vecmem::cuda::managed_memory_resource mr;
constexpr std::size_t n_ints = 4;

vecmem::unique_alloc_ptr<int[]> out =
vecmem::make_unique_alloc<int[]>(mr, n_ints);

testBarrierCount<<<1, 1024>>>(out.get());

ASSERT_EQ(cudaGetLastError(), cudaSuccess);
ASSERT_EQ(cudaDeviceSynchronize(), cudaSuccess);

EXPECT_EQ(out.get()[0], 0);
EXPECT_EQ(out.get()[1], 1024);
EXPECT_EQ(out.get()[2], 512);
EXPECT_EQ(out.get()[3], 32);
}
1 change: 1 addition & 0 deletions tests/sycl/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ traccc_add_test(
test_kalman_fitter_telescope.sycl
test_clusterization.sycl
test_spacepoint_formation.sycl
test_barrier.sycl

LINK_LIBRARIES
GTest::gtest_main
Expand Down
Loading
Loading