Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 5ef533c

Browse files
committedJul 17, 2024··
Add sanity checks for contiguity and orderedness
In some places, we rely on certain properties of our inputs. For one, we rely on our measurements being contiguous (with respect to the module identifiers) in the fitting code, and we rely on our cells being ordered in some way in the clustering code. I aim to emit properly contiguous elements from the CCL code in the future, but I want to make this process foolproof. To that end, I have added two new sanity checks which aim to find problems with input. One of them checks whether elements are contiguous (according to some projection function) and the other checks whether the elements are in some order. This commit also employs these checks for debug builds in some places.
1 parent 4377873 commit 5ef533c

17 files changed

+1241
-2
lines changed
 
+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
/**
2+
* traccc library, part of the ACTS project (R&D line)
3+
*
4+
* (c) 2024 CERN for the benefit of the ACTS project
5+
*
6+
* Mozilla Public License Version 2.0
7+
*/
8+
9+
#pragma once
10+
11+
#include "traccc/definitions/qualifiers.hpp"
12+
#include "traccc/edm/cell.hpp"
13+
#include "traccc/edm/measurement.hpp"
14+
15+
namespace traccc {
16+
struct [[maybe_unused]] cell_module_projection{
17+
TRACCC_HOST_DEVICE [[maybe_unused]] auto operator()(const traccc::cell& m)
18+
const {return m.module_link;
19+
}
20+
}
21+
;
22+
23+
struct [[maybe_unused]] measurement_module_projection{
24+
TRACCC_HOST_DEVICE auto operator()(const traccc::measurement& m)
25+
const {return m.module_link;
26+
}
27+
}
28+
;
29+
} // namespace traccc
+34
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/**
2+
* traccc library, part of the ACTS project (R&D line)
3+
*
4+
* (c) 2024 CERN for the benefit of the ACTS project
5+
*
6+
* Mozilla Public License Version 2.0
7+
*/
8+
9+
#pragma once
10+
11+
#include "traccc/definitions/qualifiers.hpp"
12+
#include "traccc/edm/cell.hpp"
13+
14+
namespace traccc {
15+
struct [[maybe_unused]] channel0_major_cell_order_relation{
16+
TRACCC_HOST_DEVICE [[maybe_unused]] bool operator()(const traccc::cell& a,
17+
const traccc::cell& b)
18+
const {if (a.module_link ==
19+
b.module_link){if (a.channel1 < b.channel1){return true;
20+
}
21+
else if (a.channel1 == b.channel1) {
22+
return a.channel0 < b.channel0;
23+
}
24+
else {
25+
return false;
26+
}
27+
}
28+
else {
29+
return true;
30+
}
31+
}
32+
}
33+
;
34+
} // namespace traccc

‎device/cuda/CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ traccc_add_library( traccc_cuda cuda TYPE SHARED
2828
"src/utils/opaque_stream.cpp"
2929
"src/utils/utils.hpp"
3030
"src/utils/utils.cpp"
31+
"src/sanity/contiguous_on.cuh"
32+
"src/sanity/ordered_on.cuh"
3133
# Seed finding code.
3234
"include/traccc/cuda/seeding/track_params_estimation.hpp"
3335
"src/seeding/track_params_estimation.cu"

‎device/cuda/src/clusterization/clusterization_algorithm.cu

+9
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,14 @@
66
*/
77

88
// CUDA Library include(s).
9+
#include "../sanity/contiguous_on.cuh"
10+
#include "../sanity/ordered_on.cuh"
911
#include "../utils/barrier.hpp"
1012
#include "../utils/cuda_error_handling.hpp"
1113
#include "../utils/utils.hpp"
1214
#include "traccc/cuda/clusterization/clusterization_algorithm.hpp"
15+
#include "traccc/utils/projections.hpp"
16+
#include "traccc/utils/relations.hpp"
1317

1418
// Project include(s)
1519
#include "traccc/clusterization/device/ccl_kernel.hpp"
@@ -60,6 +64,11 @@ clusterization_algorithm::output_type clusterization_algorithm::operator()(
6064
const cell_collection_types::const_view& cells,
6165
const cell_module_collection_types::const_view& modules) const {
6266

67+
assert(is_contiguous_on(cell_module_projection(), m_mr.main, m_copy,
68+
m_stream, cells));
69+
assert(is_ordered_on(channel0_major_cell_order_relation(), m_mr.main,
70+
m_copy, m_stream, cells));
71+
6372
// Get a convenience variable for the stream that we'll be using.
6473
cudaStream_t stream = details::get_stream(m_stream);
6574

‎device/cuda/src/finding/finding_algorithm.cu

+7-1
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,12 @@
66
*/
77

88
// Project include(s).
9+
#include "../sanity/contiguous_on.cuh"
910
#include "../utils/cuda_error_handling.hpp"
1011
#include "../utils/utils.hpp"
1112
#include "traccc/cuda/finding/finding_algorithm.hpp"
1213
#include "traccc/definitions/primitives.hpp"
14+
#include "traccc/definitions/qualifiers.hpp"
1315
#include "traccc/edm/device/finding_global_counter.hpp"
1416
#include "traccc/finding/candidate_link.hpp"
1517
#include "traccc/finding/device/add_links_for_holes.hpp"
@@ -20,6 +22,7 @@
2022
#include "traccc/finding/device/make_barcode_sequence.hpp"
2123
#include "traccc/finding/device/propagate_to_next_surface.hpp"
2224
#include "traccc/finding/device/prune_tracks.hpp"
25+
#include "traccc/utils/projections.hpp"
2326

2427
// detray include(s).
2528
#include "detray/core/detector.hpp"
@@ -43,10 +46,10 @@
4346
#include <thrust/unique.h>
4447

4548
// System include(s).
49+
#include <cassert>
4650
#include <vector>
4751

4852
namespace traccc::cuda {
49-
5053
namespace kernels {
5154

5255
/// CUDA kernel for running @c traccc::device::make_barcode_sequence
@@ -270,6 +273,9 @@ finding_algorithm<stepper_t, navigator_t>::operator()(
270273
measurement_collection_types::const_view::size_type n_measurements =
271274
m_copy.get_size(measurements);
272275

276+
assert(is_contiguous_on(measurement_module_projection(), m_mr.main, m_copy,
277+
m_stream, measurements));
278+
273279
// Get copy of barcode uniques
274280
measurement_collection_types::buffer uniques_buffer{n_measurements,
275281
m_mr.main};
+162
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
/**
2+
* traccc library, part of the ACTS project (R&D line)
3+
*
4+
* (c) 2024 CERN for the benefit of the ACTS project
5+
*
6+
* Mozilla Public License Version 2.0
7+
*/
8+
9+
#pragma once
10+
11+
// Project include(s).
12+
#include "../utils/cuda_error_handling.hpp"
13+
#include "traccc/cuda/utils/stream.hpp"
14+
#include "traccc/definitions/concepts.hpp"
15+
16+
// VecMem include(s).
17+
#include <vecmem/containers/data/vector_view.hpp>
18+
#include <vecmem/containers/device_vector.hpp>
19+
#include <vecmem/memory/memory_resource.hpp>
20+
#include <vecmem/memory/unique_ptr.hpp>
21+
#include <vecmem/utils/copy.hpp>
22+
23+
// CUDA include
24+
#include <cuda_runtime.h>
25+
26+
// System include
27+
#if __cpp_concepts >= 201907L
28+
#include <concepts>
29+
#endif
30+
31+
namespace traccc::cuda {
32+
namespace kernels {
33+
template <TRACCC_CONSTRAINT(std::semiregular) P, typename T, typename S>
34+
#if __cpp_concepts >= 201907L
35+
requires std::regular_invocable<P, T>
36+
#endif
37+
__global__ void compress_adjacent(P projection,
38+
vecmem::data::vector_view<T> _in, S* out,
39+
uint32_t* out_size) {
40+
int tid = threadIdx.x + blockIdx.x * blockDim.x;
41+
42+
vecmem::device_vector<T> in(_in);
43+
44+
if (tid > 0 && tid < in.size()) {
45+
std::invoke_result_t<P, T> v1 = projection(in.at(tid - 1));
46+
std::invoke_result_t<P, T> v2 = projection(in.at(tid));
47+
48+
if (v1 != v2) {
49+
out[atomicAdd(out_size, 1u)] = v2;
50+
}
51+
} else if (tid == 0) {
52+
out[atomicAdd(out_size, 1u)] = projection(in.at(tid));
53+
}
54+
}
55+
56+
template <TRACCC_CONSTRAINT(std::equality_comparable) T>
57+
__global__ void all_unique(const T* in, const size_t n, bool* out) {
58+
int tid_x = threadIdx.x + blockIdx.x * blockDim.x;
59+
int tid_y = threadIdx.y + blockIdx.y * blockDim.y;
60+
61+
if (tid_x < n && tid_y < n && tid_x != tid_y && in[tid_x] == in[tid_y]) {
62+
*out = false;
63+
}
64+
}
65+
} // namespace kernels
66+
67+
/**
68+
* @brief Sanity check that a given vector is contiguous on a given projection.
69+
*
70+
* For a vector $v$ to be contiguous on a projection $\pi$, it must be the case
71+
* that for all indices $i$ and $j$, if $v_i = v_j$, then all indices $k$
72+
* between $i$ and $j$, $v_i = v_j = v_k$.
73+
*
74+
* @note This function runs in O(n^2) time.
75+
*
76+
* @tparam P The type of projection $\pi$, a callable which returns some
77+
* comparable type.
78+
* @tparam T The type of the vector.
79+
* @param projection A projection object of type `P`.
80+
* @param mr A memory resource used for allocating intermediate memory.
81+
* @param vector The vector which to check for contiguity.
82+
* @return true If the vector is contiguous on `P`.
83+
* @return false Otherwise.
84+
*/
85+
template <TRACCC_CONSTRAINT(std::semiregular) P,
86+
TRACCC_CONSTRAINT(std::equality_comparable) T>
87+
#if __cpp_concepts >= 201907L
88+
requires std::regular_invocable<P, T>
89+
#endif
90+
bool is_contiguous_on(P&& projection, vecmem::memory_resource& mr,
91+
vecmem::copy& copy, stream& stream,
92+
vecmem::data::vector_view<T> vector) {
93+
// This should never be a performance-critical step, so we can keep the
94+
// block size fixed.
95+
constexpr int block_size = 512;
96+
constexpr int block_size_2d = 32;
97+
98+
cudaStream_t cuda_stream =
99+
reinterpret_cast<cudaStream_t>(stream.cudaStream());
100+
101+
// Grab the number of elements in our vector.
102+
uint32_t n = copy.get_size(vector);
103+
104+
// Get the output type of the projection.
105+
using projection_t = std::invoke_result_t<P, T>;
106+
107+
// Allocate memory for intermediate values and outputs, then set them up.
108+
vecmem::unique_alloc_ptr<projection_t[]> iout =
109+
vecmem::make_unique_alloc<projection_t[]>(mr, n);
110+
vecmem::unique_alloc_ptr<uint32_t> iout_size =
111+
vecmem::make_unique_alloc<uint32_t>(mr);
112+
vecmem::unique_alloc_ptr<bool> out = vecmem::make_unique_alloc<bool>(mr);
113+
114+
uint32_t initial_iout_size = 0;
115+
bool initial_out = true;
116+
117+
TRACCC_CUDA_ERROR_CHECK(
118+
cudaMemcpyAsync(iout_size.get(), &initial_iout_size, sizeof(uint32_t),
119+
cudaMemcpyHostToDevice, cuda_stream));
120+
TRACCC_CUDA_ERROR_CHECK(
121+
cudaMemcpyAsync(out.get(), &initial_out, sizeof(bool),
122+
cudaMemcpyHostToDevice, cuda_stream));
123+
124+
// Launch the first kernel, which will squash consecutive equal elements
125+
// into one element.
126+
kernels::compress_adjacent<P, T, projection_t>
127+
<<<(n + block_size - 1) / block_size, block_size, 0, cuda_stream>>>(
128+
projection, vector, iout.get(), iout_size.get());
129+
130+
TRACCC_CUDA_ERROR_CHECK(cudaGetLastError());
131+
132+
// Copy the total number of squashed elements, e.g. the size of the
133+
// resulting vector.
134+
uint32_t host_iout_size;
135+
136+
TRACCC_CUDA_ERROR_CHECK(
137+
cudaMemcpyAsync(&host_iout_size, iout_size.get(), sizeof(uint32_t),
138+
cudaMemcpyDeviceToHost, cuda_stream));
139+
140+
// Launch the second kernel, which will check if the values are unique.
141+
uint32_t grid_size_rd =
142+
(host_iout_size + block_size_2d - 1) / block_size_2d;
143+
dim3 all_unique_grid_size(grid_size_rd, grid_size_rd);
144+
dim3 all_unique_block_size(block_size_2d, block_size_2d);
145+
146+
kernels::all_unique<<<all_unique_grid_size, all_unique_block_size, 0,
147+
cuda_stream>>>(iout.get(), host_iout_size, out.get());
148+
149+
TRACCC_CUDA_ERROR_CHECK(cudaGetLastError());
150+
151+
// Get the result from the device and return it.
152+
bool host_out;
153+
154+
TRACCC_CUDA_ERROR_CHECK(cudaMemcpyAsync(&host_out, out.get(), sizeof(bool),
155+
cudaMemcpyDeviceToHost,
156+
cuda_stream));
157+
158+
stream.synchronize();
159+
160+
return host_out;
161+
}
162+
} // namespace traccc::cuda

‎device/cuda/src/sanity/ordered_on.cuh

+117
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
/**
2+
* traccc library, part of the ACTS project (R&D line)
3+
*
4+
* (c) 2024 CERN for the benefit of the ACTS project
5+
*
6+
* Mozilla Public License Version 2.0
7+
*/
8+
9+
#pragma once
10+
11+
// Project include(s).
12+
#include "../utils/cuda_error_handling.hpp"
13+
#include "traccc/cuda/utils/stream.hpp"
14+
#include "traccc/definitions/concepts.hpp"
15+
16+
// VecMem include(s).
17+
#include <vecmem/containers/data/vector_view.hpp>
18+
#include <vecmem/containers/device_vector.hpp>
19+
#include <vecmem/memory/memory_resource.hpp>
20+
#include <vecmem/memory/unique_ptr.hpp>
21+
#include <vecmem/utils/copy.hpp>
22+
23+
// CUDA include
24+
#include <cuda_runtime.h>
25+
26+
// System include
27+
#if __cpp_concepts >= 201907L
28+
#include <concepts>
29+
#endif
30+
31+
namespace traccc::cuda {
32+
namespace kernels {
33+
template <TRACCC_CONSTRAINT(std::semiregular) R, typename T>
34+
#if __cpp_concepts >= 201907L
35+
requires std::relation<R, T, T>
36+
#endif
37+
__global__ void is_ordered_on_kernel(R relation,
38+
vecmem::data::vector_view<T> _in,
39+
bool* out) {
40+
int tid = threadIdx.x + blockIdx.x * blockDim.x;
41+
42+
vecmem::device_vector<T> in(_in);
43+
44+
if (tid > 0 && tid < in.size()) {
45+
if (!relation(in.at(tid - 1), in.at(tid))) {
46+
*out = false;
47+
}
48+
}
49+
}
50+
} // namespace kernels
51+
52+
/**
53+
* @brief Sanity check that a given vector is ordered on a given relation.
54+
*
55+
* For a vector $v$ to be ordered on a relation $R$, it must be the case that
56+
* for all indices $i$ and $j$, if $i < j$, then $R(i, j)$.
57+
*
58+
* @note This function runs in O(n) time.
59+
*
60+
* @note Although functions like `std::sort` requires the relation to be strict
61+
* weak order, this function is more lax in its requirements. Rather, the
62+
* relation should be a total preorder, i.e. a non-strict weak order.
63+
*
64+
* @note For any strict weak order $R$, `is_ordered_on(sort(R, v))` is true.
65+
*
66+
* @tparam R The type of relation $R$, a callable which returns a bool if the
67+
* first argument can be immediately before the second type.
68+
* @tparam T The type of the vector.
69+
* @param relation A relation object of type `R`.
70+
* @param mr A memory resource used for allocating intermediate memory.
71+
* @param vector The vector which to check for ordering.
72+
* @return true If the vector is ordered on `R`.
73+
* @return false Otherwise.
74+
*/
75+
template <TRACCC_CONSTRAINT(std::semiregular) R, typename T>
76+
#if __cpp_concepts >= 201907L
77+
requires std::relation<R, T, T>
78+
#endif
79+
bool is_ordered_on(R relation, vecmem::memory_resource& mr,
80+
vecmem::copy& copy, stream& stream,
81+
vecmem::data::vector_view<T> vector) {
82+
// This should never be a performance-critical step, so we can keep the
83+
// block size fixed.
84+
constexpr int block_size = 512;
85+
86+
cudaStream_t cuda_stream =
87+
reinterpret_cast<cudaStream_t>(stream.cudaStream());
88+
89+
// Grab the number of elements in our vector.
90+
uint32_t n = copy.get_size(vector);
91+
92+
// Initialize the output boolean.
93+
vecmem::unique_alloc_ptr<bool> out = vecmem::make_unique_alloc<bool>(mr);
94+
bool initial_out = true;
95+
TRACCC_CUDA_ERROR_CHECK(
96+
cudaMemcpyAsync(out.get(), &initial_out, sizeof(bool),
97+
cudaMemcpyHostToDevice, cuda_stream));
98+
99+
// Launch the kernel which will write its result to the `out` boolean.
100+
kernels::is_ordered_on_kernel<<<(n + block_size - 1) / block_size,
101+
block_size, 0, cuda_stream>>>(
102+
relation, vector, out.get());
103+
104+
TRACCC_CUDA_ERROR_CHECK(cudaGetLastError());
105+
106+
// Copy the output to host, then return it.
107+
bool host_out;
108+
109+
TRACCC_CUDA_ERROR_CHECK(cudaMemcpyAsync(&host_out, out.get(), sizeof(bool),
110+
cudaMemcpyDeviceToHost,
111+
cuda_stream));
112+
113+
stream.synchronize();
114+
115+
return host_out;
116+
}
117+
} // namespace traccc::cuda

‎device/sycl/CMakeLists.txt

+4-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,10 @@ traccc_add_library( traccc_sycl sycl TYPE SHARED
3939
"src/utils/get_queue.sycl"
4040
"src/utils/queue_wrapper.cpp"
4141
"src/utils/calculate1DimNdRange.sycl"
42-
"src/utils/make_prefix_sum_buff.sycl" )
42+
"src/utils/make_prefix_sum_buff.sycl"
43+
"src/sanity/contiguous_on.hpp"
44+
"src/sanity/ordered_on.hpp"
45+
)
4346
target_link_libraries( traccc_sycl
4447
PUBLIC traccc::core detray::core detray::utils vecmem::core covfie::core
4548
PRIVATE traccc::device_common vecmem::sycl )

‎device/sycl/src/clusterization/clusterization_algorithm.sycl

+9
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,13 @@
66
*/
77

88
// Local include(s).
9+
#include "../sanity/contiguous_on.hpp"
10+
#include "../sanity/ordered_on.hpp"
911
#include "../utils/barrier.hpp"
1012
#include "../utils/get_queue.hpp"
1113
#include "traccc/sycl/clusterization/clusterization_algorithm.hpp"
14+
#include "traccc/utils/projections.hpp"
15+
#include "traccc/utils/relations.hpp"
1216

1317
// Project include(s)
1418
#include "traccc/clusterization/device/ccl_kernel.hpp"
@@ -37,6 +41,11 @@ clusterization_algorithm::output_type clusterization_algorithm::operator()(
3741
const cell_collection_types::const_view& cells_view,
3842
const cell_module_collection_types::const_view& modules_view) const {
3943

44+
assert(is_contiguous_on(cell_module_projection(), m_mr.main, m_copy,
45+
m_queue, cells_view));
46+
assert(is_ordered_on(channel0_major_cell_order_relation(), m_mr.main,
47+
m_copy, m_queue, cells_view));
48+
4049
// Get the number of cells
4150
const cell_collection_types::view::size_type num_cells =
4251
m_copy.get().get_size(cells_view);
+165
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
/**
2+
* traccc library, part of the ACTS project (R&D line)
3+
*
4+
* (c) 2024 CERN for the benefit of the ACTS project
5+
*
6+
* Mozilla Public License Version 2.0
7+
*/
8+
9+
#pragma once
10+
11+
// Project include(s).
12+
#include <traccc/definitions/concepts.hpp>
13+
#include <traccc/sycl/utils/queue_wrapper.hpp>
14+
15+
#include "../utils/get_queue.hpp"
16+
17+
// VecMem include(s).
18+
#include <vecmem/containers/data/vector_view.hpp>
19+
#include <vecmem/containers/device_vector.hpp>
20+
#include <vecmem/memory/device_atomic_ref.hpp>
21+
#include <vecmem/memory/memory_resource.hpp>
22+
#include <vecmem/memory/unique_ptr.hpp>
23+
#include <vecmem/utils/copy.hpp>
24+
25+
// SYCL include
26+
#include <CL/sycl.hpp>
27+
28+
// System include
29+
#if __cpp_concepts >= 201907L
30+
#include <concepts>
31+
#endif
32+
33+
namespace traccc::sycl {
34+
namespace kernels {
35+
template <typename P, typename T, typename S>
36+
class IsContiguousOnCompressAdjacent {};
37+
38+
template <typename T>
39+
class IsContiguousOnAllUnique {};
40+
} // namespace kernels
41+
42+
/**
43+
* @brief Sanity check that a given vector is contiguous on a given projection.
44+
*
45+
* For a vector $v$ to be contiguous on a projection $\pi$, it must be the case
46+
* that for all indices $i$ and $j$, if $v_i = v_j$, then all indices $k$
47+
* between $i$ and $j$, $v_i = v_j = v_k$.
48+
*
49+
* @note This function runs in O(n^2) time.
50+
*
51+
* @tparam P The type of projection $\pi$, a callable which returns some
52+
* comparable type.
53+
* @tparam T The type of the vector.
54+
* @param projection A projection object of type `P`.
55+
* @param mr A memory resource used for allocating intermediate memory.
56+
* @param vector The vector which to check for contiguity.
57+
* @return true If the vector is contiguous on `P`.
58+
* @return false Otherwise.
59+
*/
60+
template <TRACCC_CONSTRAINT(std::semiregular) P,
61+
TRACCC_CONSTRAINT(std::equality_comparable) T>
62+
#if __cpp_concepts >= 201907L
63+
requires std::regular_invocable<P, T>
64+
#endif
65+
bool is_contiguous_on(P&& projection, vecmem::memory_resource& mr,
66+
vecmem::copy& copy, queue_wrapper& queue_wrapper,
67+
vecmem::data::vector_view<T> vector) {
68+
// This should never be a performance-critical step, so we can keep the
69+
// block size fixed.
70+
constexpr int block_size = 512;
71+
72+
cl::sycl::queue& queue = details::get_queue(queue_wrapper);
73+
74+
// Grab the number of elements in our vector.
75+
uint32_t n = copy.get_size(vector);
76+
77+
// Get the output type of the projection.
78+
using projection_t = std::invoke_result_t<P, T>;
79+
80+
// Allocate memory for intermediate values and outputs, then set them up.
81+
vecmem::unique_alloc_ptr<projection_t[]> iout =
82+
vecmem::make_unique_alloc<projection_t[]>(mr, n);
83+
vecmem::unique_alloc_ptr<uint32_t> iout_size =
84+
vecmem::make_unique_alloc<uint32_t>(mr);
85+
vecmem::unique_alloc_ptr<bool> out = vecmem::make_unique_alloc<bool>(mr);
86+
87+
uint32_t initial_iout_size = 0;
88+
bool initial_out = true;
89+
90+
cl::sycl::event kernel1_memcpy1_evt =
91+
queue.copy(&initial_iout_size, iout_size.get(), 1);
92+
cl::sycl::event kernel2_memcpy1_evt =
93+
queue.copy(&initial_out, out.get(), 1);
94+
95+
cl::sycl::nd_range<1> compress_adjacent_range{
96+
cl::sycl::range<1>(((n + block_size - 1) / block_size) * block_size),
97+
cl::sycl::range<1>(block_size)};
98+
99+
// Launch the first kernel, which will squash consecutive equal elements
100+
// into one element.
101+
cl::sycl::event kernel1_evt = queue.submit([&](cl::sycl::handler& h) {
102+
h.depends_on(kernel1_memcpy1_evt);
103+
h.parallel_for<
104+
kernels::IsContiguousOnCompressAdjacent<P, T, projection_t>>(
105+
compress_adjacent_range,
106+
[vector, projection, out = iout.get(),
107+
out_size = iout_size.get()](cl::sycl::nd_item<1> item) {
108+
std::size_t tid = item.get_global_linear_id();
109+
110+
vecmem::device_vector<T> in(vector);
111+
vecmem::device_atomic_ref<uint32_t> out_siz_atm(*out_size);
112+
113+
if (tid > 0 && tid < in.size()) {
114+
std::invoke_result_t<P, T> v1 = projection(in.at(tid - 1));
115+
std::invoke_result_t<P, T> v2 = projection(in.at(tid));
116+
117+
if (v1 != v2) {
118+
out[out_siz_atm.fetch_add(1)] = v2;
119+
}
120+
} else if (tid == 0) {
121+
out[out_siz_atm.fetch_add(1)] = projection(in.at(tid));
122+
}
123+
});
124+
});
125+
126+
// Copy the total number of squashed elements, e.g. the size of the
127+
// resulting vector.
128+
uint32_t host_iout_size;
129+
130+
queue
131+
.memcpy(&host_iout_size, iout_size.get(), sizeof(uint32_t),
132+
{kernel1_evt})
133+
.wait_and_throw();
134+
135+
uint32_t grid_size_rd = (host_iout_size + block_size - 1) / block_size;
136+
137+
cl::sycl::nd_range<2> all_unique_range{
138+
cl::sycl::range<2>(grid_size_rd * block_size, host_iout_size),
139+
cl::sycl::range<2>(block_size, 1)};
140+
141+
// Launch the second kernel, which will check if the values are unique.
142+
cl::sycl::event kernel2_evt = queue.submit([&](cl::sycl::handler& h) {
143+
h.depends_on(kernel2_memcpy1_evt);
144+
h.parallel_for<kernels::IsContiguousOnAllUnique<T>>(
145+
all_unique_range, [n = host_iout_size, in = iout.get(),
146+
out = out.get()](cl::sycl::nd_item<2> item) {
147+
std::size_t tid_x = item.get_global_id(0);
148+
std::size_t tid_y = item.get_global_id(1);
149+
150+
if (tid_x < n && tid_y < n && tid_x != tid_y &&
151+
in[tid_x] == in[tid_y]) {
152+
*out = false;
153+
}
154+
});
155+
});
156+
157+
// Get the result from the device and return it.
158+
bool host_out;
159+
160+
queue.memcpy(&host_out, out.get(), sizeof(bool), {kernel2_evt})
161+
.wait_and_throw();
162+
163+
return host_out;
164+
}
165+
} // namespace traccc::sycl

‎device/sycl/src/sanity/ordered_on.hpp

+112
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
/**
2+
* traccc library, part of the ACTS project (R&D line)
3+
*
4+
* (c) 2024 CERN for the benefit of the ACTS project
5+
*
6+
* Mozilla Public License Version 2.0
7+
*/
8+
9+
#pragma once
10+
11+
// Project include(s).
12+
#include <traccc/definitions/concepts.hpp>
13+
#include <traccc/sycl/utils/queue_wrapper.hpp>
14+
15+
#include "../utils/get_queue.hpp"
16+
17+
// VecMem include(s).
18+
#include <vecmem/containers/data/vector_view.hpp>
19+
#include <vecmem/containers/device_vector.hpp>
20+
#include <vecmem/memory/memory_resource.hpp>
21+
#include <vecmem/memory/unique_ptr.hpp>
22+
#include <vecmem/utils/copy.hpp>
23+
24+
// SYCL include
25+
#include <CL/sycl.hpp>
26+
27+
// System include
28+
#if __cpp_concepts >= 201907L
29+
#include <concepts>
30+
#endif
31+
32+
namespace traccc::sycl {
33+
namespace kernels {
34+
template <typename R, typename T>
35+
class IsOrderedOn {};
36+
} // namespace kernels
37+
38+
/**
39+
* @brief Sanity check that a given vector is ordered on a given relation.
40+
*
41+
* For a vector $v$ to be ordered on a relation $R$, it must be the case that
42+
* for all indices $i$ and $j$, if $i < j$, then $R(i, j)$.
43+
*
44+
* @note This function runs in O(n) time.
45+
*
46+
* @note Although functions like `std::sort` requires the relation to be strict
47+
* weak order, this function is more lax in its requirements. Rather, the
48+
* relation should be a total preorder, i.e. a non-strict weak order.
49+
*
50+
* @note For any strict weak order $R$, `is_ordered_on(sort(R, v))` is true.
51+
*
52+
* @tparam R The type of relation $R$, a callable which returns a bool if the
53+
* first argument can be immediately before the second type.
54+
* @tparam T The type of the vector.
55+
* @param relation A relation object of type `R`.
56+
* @param mr A memory resource used for allocating intermediate memory.
57+
* @param vector The vector which to check for ordering.
58+
* @return true If the vector is ordered on `R`.
59+
* @return false Otherwise.
60+
*/
61+
template <TRACCC_CONSTRAINT(std::semiregular) R, typename T>
62+
#if __cpp_concepts >= 201907L
63+
requires std::relation<R, T, T>
64+
#endif
65+
bool is_ordered_on(R relation, vecmem::memory_resource& mr,
66+
vecmem::copy& copy, queue_wrapper& queue_wrapper,
67+
vecmem::data::vector_view<T> vector) {
68+
// This should never be a performance-critical step, so we can keep the
69+
// block size fixed.
70+
constexpr int block_size = 512;
71+
72+
cl::sycl::queue& queue = details::get_queue(queue_wrapper);
73+
74+
// Grab the number of elements in our vector.
75+
uint32_t n = copy.get_size(vector);
76+
77+
// Initialize the output boolean.
78+
vecmem::unique_alloc_ptr<bool> out = vecmem::make_unique_alloc<bool>(mr);
79+
bool initial_out = true;
80+
81+
cl::sycl::event kernel1_memcpy1 =
82+
queue.memcpy(out.get(), &initial_out, sizeof(bool));
83+
84+
cl::sycl::nd_range<1> kernel_range{
85+
cl::sycl::range<1>(((n + block_size - 1) / block_size) * block_size),
86+
cl::sycl::range<1>(block_size)};
87+
88+
cl::sycl::event kernel1 = queue.submit([&](cl::sycl::handler& h) {
89+
h.depends_on(kernel1_memcpy1);
90+
h.parallel_for<kernels::IsOrderedOn<R, T>>(
91+
kernel_range, [=, out = out.get()](cl::sycl::nd_item<1> item) {
92+
std::size_t tid = item.get_global_linear_id();
93+
94+
vecmem::device_vector<T> in(vector);
95+
96+
if (tid > 0 && tid < in.size()) {
97+
if (!relation(in.at(tid - 1), in.at(tid))) {
98+
*out = false;
99+
}
100+
}
101+
});
102+
});
103+
104+
// Copy the output to host, then return it.
105+
bool host_out;
106+
107+
queue.memcpy(&host_out, out.get(), sizeof(bool), {kernel1})
108+
.wait_and_throw();
109+
110+
return host_out;
111+
}
112+
} // namespace traccc::sycl

‎tests/cuda/CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@ traccc_add_test(
4242
test_array_wrapper.cu
4343
test_mutex.cu
4444
test_unique_lock.cu
45+
test_sanity_contiguous_on.cu
46+
test_sanity_ordered_on.cu
4547

4648
LINK_LIBRARIES
4749
CUDA::cudart
+155
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,155 @@
1+
/*
2+
* traccc library, part of the ACTS project (R&D line)
3+
*
4+
* (c) 2024 CERN for the benefit of the ACTS project
5+
*
6+
* Mozilla Public License Version 2.0
7+
*/
8+
9+
// vecmem includes
10+
#include <vecmem/memory/cuda/device_memory_resource.hpp>
11+
#include <vecmem/utils/cuda/async_copy.hpp>
12+
13+
// traccc includes
14+
#include <traccc/definitions/qualifiers.hpp>
15+
16+
#include "../../device/cuda/src/sanity/contiguous_on.cuh"
17+
18+
// GTest include(s).
19+
#include <gtest/gtest.h>
20+
21+
struct int_identity_projection {
22+
TRACCC_HOST_DEVICE
23+
int operator()(const int& v) const { return v; }
24+
};
25+
26+
class CUDASanityContiguousOn : public testing::Test {
27+
protected:
28+
CUDASanityContiguousOn() : copy(stream.cudaStream()) {}
29+
30+
vecmem::cuda::device_memory_resource mr;
31+
traccc::cuda::stream stream;
32+
vecmem::cuda::async_copy copy;
33+
};
34+
35+
TEST_F(CUDASanityContiguousOn, TrueOrdered) {
36+
std::vector<int> host_vector;
37+
38+
for (int i = 0; i < 5000; ++i) {
39+
for (int j = 0; j < i; ++j) {
40+
host_vector.push_back(i);
41+
}
42+
}
43+
44+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
45+
vecmem::copy::type::host_to_device);
46+
47+
ASSERT_TRUE(traccc::cuda::is_contiguous_on(int_identity_projection(), mr,
48+
copy, stream, device_data));
49+
}
50+
51+
TEST_F(CUDASanityContiguousOn, TrueRandom) {
52+
std::vector<int> host_vector;
53+
54+
for (int i : {603, 6432, 1, 3, 67, 2, 1111}) {
55+
for (int j = 0; j < i; ++j) {
56+
host_vector.push_back(i);
57+
}
58+
}
59+
60+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
61+
vecmem::copy::type::host_to_device);
62+
63+
ASSERT_TRUE(traccc::cuda::is_contiguous_on(int_identity_projection(), mr,
64+
copy, stream, device_data));
65+
}
66+
67+
TEST_F(CUDASanityContiguousOn, FalseOrdered) {
68+
std::vector<int> host_vector;
69+
70+
for (int i = 0; i < 5000; ++i) {
71+
if (i == 105) {
72+
host_vector.push_back(5);
73+
} else {
74+
for (int j = 0; j < i; ++j) {
75+
host_vector.push_back(i);
76+
}
77+
}
78+
}
79+
80+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
81+
vecmem::copy::type::host_to_device);
82+
83+
ASSERT_FALSE(traccc::cuda::is_contiguous_on(int_identity_projection(), mr,
84+
copy, stream, device_data));
85+
}
86+
87+
TEST_F(CUDASanityContiguousOn, FalseOrderedPathologicalFirst) {
88+
std::vector<int> host_vector;
89+
90+
host_vector.push_back(4000);
91+
92+
for (int i = 0; i < 5000; ++i) {
93+
for (int j = 0; j < i; ++j) {
94+
host_vector.push_back(i);
95+
}
96+
}
97+
98+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
99+
vecmem::copy::type::host_to_device);
100+
101+
ASSERT_FALSE(traccc::cuda::is_contiguous_on(int_identity_projection(), mr,
102+
copy, stream, device_data));
103+
}
104+
105+
TEST_F(CUDASanityContiguousOn, TrueOrderedPathologicalFirst) {
106+
std::vector<int> host_vector;
107+
108+
host_vector.push_back(6000);
109+
110+
for (int i = 0; i < 5000; ++i) {
111+
for (int j = 0; j < i; ++j) {
112+
host_vector.push_back(i);
113+
}
114+
}
115+
116+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
117+
vecmem::copy::type::host_to_device);
118+
119+
ASSERT_TRUE(traccc::cuda::is_contiguous_on(int_identity_projection(), mr,
120+
copy, stream, device_data));
121+
}
122+
123+
TEST_F(CUDASanityContiguousOn, FalseOrderedPathologicalLast) {
124+
std::vector<int> host_vector;
125+
126+
for (int i = 0; i < 5000; ++i) {
127+
for (int j = 0; j < i; ++j) {
128+
host_vector.push_back(i);
129+
}
130+
}
131+
132+
host_vector.push_back(2);
133+
134+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
135+
vecmem::copy::type::host_to_device);
136+
137+
ASSERT_FALSE(traccc::cuda::is_contiguous_on(int_identity_projection(), mr,
138+
copy, stream, device_data));
139+
}
140+
141+
TEST_F(CUDASanityContiguousOn, FalseRandom) {
142+
std::vector<int> host_vector;
143+
144+
for (int i : {603, 6432, 1, 3, 67, 1, 1111}) {
145+
for (int j = 0; j < i; ++j) {
146+
host_vector.push_back(i);
147+
}
148+
}
149+
150+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
151+
vecmem::copy::type::host_to_device);
152+
153+
ASSERT_FALSE(traccc::cuda::is_contiguous_on(int_identity_projection(), mr,
154+
copy, stream, device_data));
155+
}

‎tests/cuda/test_sanity_ordered_on.cu

+134
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,134 @@
1+
/*
2+
* traccc library, part of the ACTS project (R&D line)
3+
*
4+
* (c) 2024 CERN for the benefit of the ACTS project
5+
*
6+
* Mozilla Public License Version 2.0
7+
*/
8+
9+
// vecmem includes
10+
#include <vecmem/memory/cuda/device_memory_resource.hpp>
11+
#include <vecmem/utils/cuda/async_copy.hpp>
12+
13+
// traccc includes
14+
#include <traccc/definitions/qualifiers.hpp>
15+
16+
#include "../../device/cuda/src/sanity/ordered_on.cuh"
17+
18+
// GTest include(s).
19+
#include <gtest/gtest.h>
20+
21+
struct int_lt_relation {
22+
TRACCC_HOST_DEVICE
23+
bool operator()(const int& a, const int& b) const { return a < b; }
24+
};
25+
26+
struct int_leq_relation {
27+
TRACCC_HOST_DEVICE
28+
bool operator()(const int& a, const int& b) const { return a <= b; }
29+
};
30+
31+
class CUDASanityOrderedOn : public testing::Test {
32+
protected:
33+
CUDASanityOrderedOn() : copy(stream.cudaStream()) {}
34+
35+
vecmem::cuda::device_memory_resource mr;
36+
traccc::cuda::stream stream;
37+
vecmem::cuda::async_copy copy;
38+
};
39+
40+
TEST_F(CUDASanityOrderedOn, TrueConsecutiveNoRepeatsLeq) {
41+
std::vector<int> host_vector;
42+
43+
for (int i = 0; i < 500000; ++i) {
44+
host_vector.push_back(i);
45+
}
46+
47+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
48+
vecmem::copy::type::host_to_device);
49+
50+
ASSERT_TRUE(traccc::cuda::is_ordered_on(int_leq_relation(), mr, copy,
51+
stream, device_data));
52+
}
53+
54+
TEST_F(CUDASanityOrderedOn, TrueConsecutiveNoRepeatsLt) {
55+
std::vector<int> host_vector;
56+
57+
for (int i = 0; i < 500000; ++i) {
58+
host_vector.push_back(i);
59+
}
60+
61+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
62+
vecmem::copy::type::host_to_device);
63+
64+
ASSERT_TRUE(traccc::cuda::is_ordered_on(int_lt_relation(), mr, copy, stream,
65+
device_data));
66+
}
67+
68+
TEST_F(CUDASanityOrderedOn, TrueConsecutiveRepeatsLeq) {
69+
std::vector<int> host_vector;
70+
71+
for (int i = 0; i < 5000; ++i) {
72+
for (int j = 0; j < i; ++j) {
73+
host_vector.push_back(i);
74+
}
75+
}
76+
77+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
78+
vecmem::copy::type::host_to_device);
79+
80+
ASSERT_TRUE(traccc::cuda::is_ordered_on(int_leq_relation(), mr, copy,
81+
stream, device_data));
82+
}
83+
84+
TEST_F(CUDASanityOrderedOn, FalseConsecutiveRepeatLt) {
85+
std::vector<int> host_vector;
86+
87+
for (int i = 0; i < 5000; ++i) {
88+
for (int j = 0; j < i; ++j) {
89+
host_vector.push_back(i);
90+
}
91+
}
92+
93+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
94+
vecmem::copy::type::host_to_device);
95+
96+
ASSERT_FALSE(traccc::cuda::is_ordered_on(int_lt_relation(), mr, copy,
97+
stream, device_data));
98+
}
99+
100+
TEST_F(CUDASanityOrderedOn, TrueConsecutivePathologicalFirstLeq) {
101+
std::vector<int> host_vector;
102+
103+
host_vector.push_back(4000);
104+
105+
for (int i = 0; i < 5000; ++i) {
106+
for (int j = 0; j < i; ++j) {
107+
host_vector.push_back(i);
108+
}
109+
}
110+
111+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
112+
vecmem::copy::type::host_to_device);
113+
114+
ASSERT_FALSE(traccc::cuda::is_ordered_on(int_leq_relation(), mr, copy,
115+
stream, device_data));
116+
}
117+
118+
TEST_F(CUDASanityOrderedOn, TrueConsecutivePathologicalLastLeq) {
119+
std::vector<int> host_vector;
120+
121+
host_vector.push_back(2000);
122+
123+
for (int i = 0; i < 5000; ++i) {
124+
for (int j = 0; j < i; ++j) {
125+
host_vector.push_back(i);
126+
}
127+
}
128+
129+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
130+
vecmem::copy::type::host_to_device);
131+
132+
ASSERT_FALSE(traccc::cuda::is_ordered_on(int_leq_relation(), mr, copy,
133+
stream, device_data));
134+
}

‎tests/sycl/CMakeLists.txt

+2
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ traccc_add_test(
1919
test_mutex.sycl
2020
test_unique_lock.sycl
2121
test_cca.sycl
22+
test_sanity_contiguous_on.sycl
23+
test_sanity_ordered_on.sycl
2224

2325
LINK_LIBRARIES
2426
GTest::gtest_main
+159
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,159 @@
1+
/*
2+
* traccc library, part of the ACTS project (R&D line)
3+
*
4+
* (c) 2024 CERN for the benefit of the ACTS project
5+
*
6+
* Mozilla Public License Version 2.0
7+
*/
8+
9+
// vecmem includes
10+
#include <vecmem/memory/sycl/device_memory_resource.hpp>
11+
#include <vecmem/utils/sycl/async_copy.hpp>
12+
13+
// traccc includes
14+
#include <traccc/definitions/qualifiers.hpp>
15+
16+
#include "../../device/sycl/src/sanity/contiguous_on.hpp"
17+
18+
// GTest include(s).
19+
#include <gtest/gtest.h>
20+
21+
// System include
22+
#include <CL/sycl.hpp>
23+
24+
struct int_identity_projection {
25+
TRACCC_HOST_DEVICE
26+
int operator()(const int& v) const { return v; }
27+
};
28+
29+
class SYCLSanityContiguousOn : public testing::Test {
30+
protected:
31+
SYCLSanityContiguousOn() : queue_wrapper(&queue), copy(&queue) {}
32+
33+
vecmem::sycl::device_memory_resource mr;
34+
cl::sycl::queue queue;
35+
traccc::sycl::queue_wrapper queue_wrapper;
36+
vecmem::sycl::async_copy copy;
37+
};
38+
39+
TEST_F(SYCLSanityContiguousOn, TrueOrdered) {
40+
std::vector<int> host_vector;
41+
42+
for (int i = 0; i < 5000; ++i) {
43+
for (int j = 0; j < i; ++j) {
44+
host_vector.push_back(i);
45+
}
46+
}
47+
48+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
49+
vecmem::copy::type::host_to_device);
50+
51+
ASSERT_TRUE(traccc::sycl::is_contiguous_on(
52+
int_identity_projection(), mr, copy, queue_wrapper, device_data));
53+
}
54+
55+
TEST_F(SYCLSanityContiguousOn, TrueRandom) {
56+
std::vector<int> host_vector;
57+
58+
for (int i : {603, 6432, 1, 3, 67, 2, 1111}) {
59+
for (int j = 0; j < i; ++j) {
60+
host_vector.push_back(i);
61+
}
62+
}
63+
64+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
65+
vecmem::copy::type::host_to_device);
66+
67+
ASSERT_TRUE(traccc::sycl::is_contiguous_on(
68+
int_identity_projection(), mr, copy, queue_wrapper, device_data));
69+
}
70+
71+
TEST_F(SYCLSanityContiguousOn, FalseOrdered) {
72+
std::vector<int> host_vector;
73+
74+
for (int i = 0; i < 5000; ++i) {
75+
if (i == 105) {
76+
host_vector.push_back(5);
77+
} else {
78+
for (int j = 0; j < i; ++j) {
79+
host_vector.push_back(i);
80+
}
81+
}
82+
}
83+
84+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
85+
vecmem::copy::type::host_to_device);
86+
87+
ASSERT_FALSE(traccc::sycl::is_contiguous_on(
88+
int_identity_projection(), mr, copy, queue_wrapper, device_data));
89+
}
90+
91+
TEST_F(SYCLSanityContiguousOn, FalseOrderedPathologicalFirst) {
92+
std::vector<int> host_vector;
93+
94+
host_vector.push_back(4000);
95+
96+
for (int i = 0; i < 5000; ++i) {
97+
for (int j = 0; j < i; ++j) {
98+
host_vector.push_back(i);
99+
}
100+
}
101+
102+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
103+
vecmem::copy::type::host_to_device);
104+
105+
ASSERT_FALSE(traccc::sycl::is_contiguous_on(
106+
int_identity_projection(), mr, copy, queue_wrapper, device_data));
107+
}
108+
109+
TEST_F(SYCLSanityContiguousOn, TrueOrderedPathologicalFirst) {
110+
std::vector<int> host_vector;
111+
112+
host_vector.push_back(6000);
113+
114+
for (int i = 0; i < 5000; ++i) {
115+
for (int j = 0; j < i; ++j) {
116+
host_vector.push_back(i);
117+
}
118+
}
119+
120+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
121+
vecmem::copy::type::host_to_device);
122+
123+
ASSERT_TRUE(traccc::sycl::is_contiguous_on(
124+
int_identity_projection(), mr, copy, queue_wrapper, device_data));
125+
}
126+
127+
TEST_F(SYCLSanityContiguousOn, FalseOrderedPathologicalLast) {
128+
std::vector<int> host_vector;
129+
130+
for (int i = 0; i < 5000; ++i) {
131+
for (int j = 0; j < i; ++j) {
132+
host_vector.push_back(i);
133+
}
134+
}
135+
136+
host_vector.push_back(2);
137+
138+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
139+
vecmem::copy::type::host_to_device);
140+
141+
ASSERT_FALSE(traccc::sycl::is_contiguous_on(
142+
int_identity_projection(), mr, copy, queue_wrapper, device_data));
143+
}
144+
145+
TEST_F(SYCLSanityContiguousOn, FalseRandom) {
146+
std::vector<int> host_vector;
147+
148+
for (int i : {603, 6432, 1, 3, 67, 1, 1111}) {
149+
for (int j = 0; j < i; ++j) {
150+
host_vector.push_back(i);
151+
}
152+
}
153+
154+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
155+
vecmem::copy::type::host_to_device);
156+
157+
ASSERT_FALSE(traccc::sycl::is_contiguous_on(
158+
int_identity_projection(), mr, copy, queue_wrapper, device_data));
159+
}
+139
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
/*
2+
* traccc library, part of the ACTS project (R&D line)
3+
*
4+
* (c) 2024 CERN for the benefit of the ACTS project
5+
*
6+
* Mozilla Public License Version 2.0
7+
*/
8+
9+
// vecmem includes
10+
#include <vecmem/memory/sycl/device_memory_resource.hpp>
11+
#include <vecmem/utils/sycl/async_copy.hpp>
12+
13+
// traccc includes
14+
#include <traccc/definitions/qualifiers.hpp>
15+
16+
#include "../../device/sycl/src/sanity/ordered_on.hpp"
17+
#include "traccc/sycl/utils/queue_wrapper.hpp"
18+
19+
// GTest include(s).
20+
#include <gtest/gtest.h>
21+
22+
// System include
23+
#include <CL/sycl.hpp>
24+
25+
struct int_lt_relation {
26+
TRACCC_HOST_DEVICE
27+
bool operator()(const int& a, const int& b) const { return a < b; }
28+
};
29+
30+
struct int_leq_relation {
31+
TRACCC_HOST_DEVICE
32+
bool operator()(const int& a, const int& b) const { return a <= b; }
33+
};
34+
35+
class SYCLSanityOrderedOn : public testing::Test {
36+
protected:
37+
SYCLSanityOrderedOn() : queue_wrapper(&queue), copy(&queue) {}
38+
39+
vecmem::sycl::device_memory_resource mr;
40+
cl::sycl::queue queue;
41+
traccc::sycl::queue_wrapper queue_wrapper;
42+
vecmem::sycl::async_copy copy;
43+
};
44+
45+
TEST_F(SYCLSanityOrderedOn, TrueConsecutiveNoRepeatsLeq) {
46+
std::vector<int> host_vector;
47+
48+
for (int i = 0; i < 500000; ++i) {
49+
host_vector.push_back(i);
50+
}
51+
52+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
53+
vecmem::copy::type::host_to_device);
54+
55+
ASSERT_TRUE(traccc::sycl::is_ordered_on(int_leq_relation(), mr, copy,
56+
queue_wrapper, device_data));
57+
}
58+
59+
TEST_F(SYCLSanityOrderedOn, TrueConsecutiveNoRepeatsLt) {
60+
std::vector<int> host_vector;
61+
62+
for (int i = 0; i < 500000; ++i) {
63+
host_vector.push_back(i);
64+
}
65+
66+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
67+
vecmem::copy::type::host_to_device);
68+
69+
ASSERT_TRUE(traccc::sycl::is_ordered_on(int_lt_relation(), mr, copy,
70+
queue_wrapper, device_data));
71+
}
72+
73+
TEST_F(SYCLSanityOrderedOn, TrueConsecutiveRepeatsLeq) {
74+
std::vector<int> host_vector;
75+
76+
for (int i = 0; i < 5000; ++i) {
77+
for (int j = 0; j < i; ++j) {
78+
host_vector.push_back(i);
79+
}
80+
}
81+
82+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
83+
vecmem::copy::type::host_to_device);
84+
85+
ASSERT_TRUE(traccc::sycl::is_ordered_on(int_leq_relation(), mr, copy,
86+
queue_wrapper, device_data));
87+
}
88+
89+
TEST_F(SYCLSanityOrderedOn, FalseConsecutiveRepeatLt) {
90+
std::vector<int> host_vector;
91+
92+
for (int i = 0; i < 5000; ++i) {
93+
for (int j = 0; j < i; ++j) {
94+
host_vector.push_back(i);
95+
}
96+
}
97+
98+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
99+
vecmem::copy::type::host_to_device);
100+
101+
ASSERT_FALSE(traccc::sycl::is_ordered_on(int_lt_relation(), mr, copy,
102+
queue_wrapper, device_data));
103+
}
104+
105+
TEST_F(SYCLSanityOrderedOn, TrueConsecutivePathologicalFirstLeq) {
106+
std::vector<int> host_vector;
107+
108+
host_vector.push_back(4000);
109+
110+
for (int i = 0; i < 5000; ++i) {
111+
for (int j = 0; j < i; ++j) {
112+
host_vector.push_back(i);
113+
}
114+
}
115+
116+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
117+
vecmem::copy::type::host_to_device);
118+
119+
ASSERT_FALSE(traccc::sycl::is_ordered_on(int_leq_relation(), mr, copy,
120+
queue_wrapper, device_data));
121+
}
122+
123+
TEST_F(SYCLSanityOrderedOn, TrueConsecutivePathologicalLastLeq) {
124+
std::vector<int> host_vector;
125+
126+
host_vector.push_back(2000);
127+
128+
for (int i = 0; i < 5000; ++i) {
129+
for (int j = 0; j < i; ++j) {
130+
host_vector.push_back(i);
131+
}
132+
}
133+
134+
auto device_data = copy.to(vecmem::get_data(host_vector), mr,
135+
vecmem::copy::type::host_to_device);
136+
137+
ASSERT_FALSE(traccc::sycl::is_ordered_on(int_leq_relation(), mr, copy,
138+
queue_wrapper, device_data));
139+
}

0 commit comments

Comments
 (0)
Please sign in to comment.