Skip to content

Commit f08d6a6

Browse files
elstehledavebayer
authored andcommitted
Improves DeviceSegmentedSort test run time for large number of items and segments (NVIDIA#3246)
* fixes segment offset generation * switches to analytical verification * switches to analytical verification for pairs * fixes spelling * adds tests for large number of segments * fixes narrowing conversion in tests * addresses review comments * fixes includes
1 parent ec2cd74 commit f08d6a6

File tree

4 files changed

+10
-5
lines changed

4 files changed

+10
-5
lines changed

cub/test/catch2_radix_sort_helper.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ struct segment_iterator
6969

7070
__host__ __device__ OffsetT operator()(std::int64_t x) const
7171
{
72-
return (::cuda::std::min)(last, x * Step);
72+
return ::cuda::std::min(last, x * Step);
7373
}
7474
};
7575

cub/test/catch2_segmented_sort_helper.cuh

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ struct segment_index_to_offset_op
8080
OffsetT segment_size;
8181
OffsetT num_items;
8282

83-
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE OffsetT operator()(SegmentIndexT i)
83+
_CCCL_HOST_DEVICE __forceinline__ OffsetT operator()(SegmentIndexT i)
8484
{
8585
if (i < num_empty_segments)
8686
{
@@ -103,15 +103,16 @@ struct mod_n
103103
std::size_t mod;
104104

105105
template <typename IndexT>
106-
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE T operator()(IndexT x)
106+
_CCCL_HOST_DEVICE __forceinline__ T operator()(IndexT x)
107107
{
108108
return static_cast<T>(x % mod);
109109
}
110110
};
111111

112112
template <typename KeyT>
113-
struct short_key_verification_helper
113+
class short_key_verification_helper
114114
{
115+
private:
115116
using key_t = KeyT;
116117
// The histogram size of the keys being sorted for later verification
117118
const std::int64_t max_histo_size = std::int64_t{1} << ::cuda::std::numeric_limits<key_t>::digits;
@@ -250,7 +251,7 @@ public:
250251
auto const next_end =
251252
(uniques_index == count - 1) ? out_keys.size() : h_unique_indexes_out[uniques_index + 1];
252253
REQUIRE(h_unique_keys_out[uniques_index] == i);
253-
REQUIRE(next_end - h_unique_indexes_out[uniques_index] == static_cast<std::size_t>(segment_histogram[i]));
254+
REQUIRE(next_end - h_unique_indexes_out[uniques_index] == segment_histogram[i]);
254255
current_offset += segment_histogram[i];
255256
uniques_index++;
256257
}

cub/test/catch2_test_device_segmented_sort_keys.cu

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,8 @@ C2H_TEST("DeviceSegmentedSortKeys: Unspecified segments, random keys", "[keys][s
178178
test_unspecified_segments_random<KeyT>(C2H_SEED(4));
179179
}
180180

181+
#if defined(CCCL_TEST_ENABLE_LARGE_SEGMENTED_SORT)
182+
181183
C2H_TEST("DeviceSegmentedSortKeys: very large number of segments", "[keys][segmented][sort][device]", all_offset_types)
182184
try
183185
{

cub/test/catch2_test_device_segmented_sort_pairs.cu

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,8 @@ C2H_TEST("DeviceSegmentedSortPairs: Unspecified segments, random key/values",
199199
test_unspecified_segments_random<KeyT, ValueT>(C2H_SEED(4));
200200
}
201201

202+
#if defined(CCCL_TEST_ENABLE_LARGE_SEGMENTED_SORT)
203+
202204
C2H_TEST("DeviceSegmentedSortPairs: very large num. items and num. segments",
203205
"[pairs][segmented][sort][device]",
204206
all_offset_types)

0 commit comments

Comments
 (0)