Skip to content

Commit cb110e6

Browse files
elstehledavebayer
authored andcommitted
Adds support for large number of items and large number of segments to DeviceSegmentedSort (NVIDIA#3308)
* fixes segment offset generation * switches to analytical verification * switches to analytical verification for pairs * addresses review comments * introduces segment offset type * adds tests for large number of segments * adds support for large number of segments * drops segment offset type * fixes thrust namespace * removes about-to-be-deprecated cub iterators * no exec specifier on defaulted ctor * fixes gcc7 linker error * uses local_segment_index_t throughout * determine offset type based on type returned by segment iterator begin/end iterators * minor style improvements
1 parent 834ab8e commit cb110e6

File tree

4 files changed

+4
-11
lines changed

4 files changed

+4
-11
lines changed

cub/cub/device/dispatch/dispatch_segmented_sort.cuh

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,6 @@
6060
#include <thrust/system/cuda/detail/core/triple_chevron_launch.h>
6161

6262
#include <cuda/cmath>
63-
#include <cuda/std/__algorithm/max.h>
64-
#include <cuda/std/__algorithm/min.h>
6563
#include <cuda/std/type_traits>
6664

6765
#include <type_traits>
@@ -960,7 +958,7 @@ struct DispatchSegmentedSort
960958
constexpr auto num_segments_per_invocation_limit =
961959
static_cast<global_segment_offset_t>(::cuda::std::numeric_limits<int>::max());
962960
auto const max_num_segments_per_invocation = static_cast<global_segment_offset_t>(
963-
(::cuda::std::min)(static_cast<global_segment_offset_t>(num_segments), num_segments_per_invocation_limit));
961+
::cuda::std::min(static_cast<global_segment_offset_t>(num_segments), num_segments_per_invocation_limit));
964962

965963
large_and_medium_segments_indices.grow(max_num_segments_per_invocation);
966964
small_segments_indices.grow(max_num_segments_per_invocation);

cub/test/catch2_segmented_sort_helper.cuh

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ struct segment_index_to_offset_op
8080
OffsetT segment_size;
8181
OffsetT num_items;
8282

83-
_CCCL_HOST_DEVICE __forceinline__ OffsetT operator()(SegmentIndexT i)
83+
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE OffsetT operator()(SegmentIndexT i)
8484
{
8585
if (i < num_empty_segments)
8686
{
@@ -103,16 +103,15 @@ struct mod_n
103103
std::size_t mod;
104104

105105
template <typename IndexT>
106-
_CCCL_HOST_DEVICE __forceinline__ T operator()(IndexT x)
106+
_CCCL_HOST_DEVICE _CCCL_FORCEINLINE T operator()(IndexT x)
107107
{
108108
return static_cast<T>(x % mod);
109109
}
110110
};
111111

112112
template <typename KeyT>
113-
class short_key_verification_helper
113+
struct short_key_verification_helper
114114
{
115-
private:
116115
using key_t = KeyT;
117116
// The histogram size of the keys being sorted for later verification
118117
const std::int64_t max_histo_size = std::int64_t{1} << ::cuda::std::numeric_limits<key_t>::digits;

cub/test/catch2_test_device_segmented_sort_keys.cu

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,6 @@ C2H_TEST("DeviceSegmentedSortKeys: Unspecified segments, random keys", "[keys][s
178178
test_unspecified_segments_random<KeyT>(C2H_SEED(4));
179179
}
180180

181-
#if defined(CCCL_TEST_ENABLE_LARGE_SEGMENTED_SORT)
182-
183181
C2H_TEST("DeviceSegmentedSortKeys: very large number of segments", "[keys][segmented][sort][device]", all_offset_types)
184182
try
185183
{

cub/test/catch2_test_device_segmented_sort_pairs.cu

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,6 @@ C2H_TEST("DeviceSegmentedSortPairs: Unspecified segments, random key/values",
199199
test_unspecified_segments_random<KeyT, ValueT>(C2H_SEED(4));
200200
}
201201

202-
#if defined(CCCL_TEST_ENABLE_LARGE_SEGMENTED_SORT)
203-
204202
C2H_TEST("DeviceSegmentedSortPairs: very large num. items and num. segments",
205203
"[pairs][segmented][sort][device]",
206204
all_offset_types)

0 commit comments

Comments
 (0)