4141# pragma system_header
4242#endif // no system header
4343
44+ #include < cub/detail/choose_offset.cuh>
4445#include < cub/detail/nvtx.cuh>
4546#include < cub/device/dispatch/dispatch_segmented_sort.cuh>
4647#include < cub/util_namespace.cuh>
4748
49+ #include < cuda/std/cstdint>
50+
4851CUB_NAMESPACE_BEGIN
4952
5053// ! @rst
@@ -140,16 +143,19 @@ private:
140143 std::size_t & temp_storage_bytes,
141144 const KeyT* d_keys_in,
142145 KeyT* d_keys_out,
143- int num_items,
144- int num_segments,
146+ ::cuda::std:: int64_t num_items,
147+ ::cuda::std:: int64_t num_segments,
145148 BeginOffsetIteratorT d_begin_offsets,
146149 EndOffsetIteratorT d_end_offsets,
147150 cudaStream_t stream = 0 )
148151 {
149152 constexpr bool is_descending = false ;
150153 constexpr bool is_overwrite_okay = false ;
154+
155+ using OffsetT =
156+ detail::choose_signed_offset_t <detail::common_iterator_value_t <BeginOffsetIteratorT, EndOffsetIteratorT>>;
151157 using DispatchT =
152- DispatchSegmentedSort<is_descending, KeyT, cub::NullType, int , BeginOffsetIteratorT, EndOffsetIteratorT>;
158+ DispatchSegmentedSort<is_descending, KeyT, cub::NullType, OffsetT , BeginOffsetIteratorT, EndOffsetIteratorT>;
153159
154160 DoubleBuffer<KeyT> d_keys (const_cast <KeyT*>(d_keys_in), d_keys_out);
155161 DoubleBuffer<NullType> d_values;
@@ -286,8 +292,8 @@ public:
286292 std::size_t & temp_storage_bytes,
287293 const KeyT* d_keys_in,
288294 KeyT* d_keys_out,
289- int num_items,
290- int num_segments,
295+ ::cuda::std:: int64_t num_items,
296+ ::cuda::std:: int64_t num_segments,
291297 BeginOffsetIteratorT d_begin_offsets,
292298 EndOffsetIteratorT d_end_offsets,
293299 cudaStream_t stream = 0 )
@@ -313,16 +319,19 @@ private:
313319 std::size_t & temp_storage_bytes,
314320 const KeyT* d_keys_in,
315321 KeyT* d_keys_out,
316- int num_items,
317- int num_segments,
322+ ::cuda::std:: int64_t num_items,
323+ ::cuda::std:: int64_t num_segments,
318324 BeginOffsetIteratorT d_begin_offsets,
319325 EndOffsetIteratorT d_end_offsets,
320326 cudaStream_t stream = 0 )
321327 {
322328 constexpr bool is_descending = true ;
323329 constexpr bool is_overwrite_okay = false ;
330+
331+ using OffsetT =
332+ detail::choose_signed_offset_t <detail::common_iterator_value_t <BeginOffsetIteratorT, EndOffsetIteratorT>>;
324333 using DispatchT =
325- DispatchSegmentedSort<is_descending, KeyT, cub::NullType, int , BeginOffsetIteratorT, EndOffsetIteratorT>;
334+ DispatchSegmentedSort<is_descending, KeyT, cub::NullType, OffsetT , BeginOffsetIteratorT, EndOffsetIteratorT>;
326335
327336 DoubleBuffer<KeyT> d_keys (const_cast <KeyT*>(d_keys_in), d_keys_out);
328337 DoubleBuffer<NullType> d_values;
@@ -454,8 +463,8 @@ public:
454463 std::size_t & temp_storage_bytes,
455464 const KeyT* d_keys_in,
456465 KeyT* d_keys_out,
457- int num_items,
458- int num_segments,
466+ ::cuda::std:: int64_t num_items,
467+ ::cuda::std:: int64_t num_segments,
459468 BeginOffsetIteratorT d_begin_offsets,
460469 EndOffsetIteratorT d_end_offsets,
461470 cudaStream_t stream = 0 )
@@ -480,17 +489,18 @@ private:
480489 void * d_temp_storage,
481490 std::size_t & temp_storage_bytes,
482491 DoubleBuffer<KeyT>& d_keys,
483- int num_items,
484- int num_segments,
492+ ::cuda::std:: int64_t num_items,
493+ ::cuda::std:: int64_t num_segments,
485494 BeginOffsetIteratorT d_begin_offsets,
486495 EndOffsetIteratorT d_end_offsets,
487496 cudaStream_t stream = 0 )
488497 {
489498 constexpr bool is_descending = false ;
490499 constexpr bool is_overwrite_okay = true ;
491-
500+ using OffsetT =
501+ detail::choose_signed_offset_t <detail::common_iterator_value_t <BeginOffsetIteratorT, EndOffsetIteratorT>>;
492502 using DispatchT =
493- DispatchSegmentedSort<is_descending, KeyT, cub::NullType, int , BeginOffsetIteratorT, EndOffsetIteratorT>;
503+ DispatchSegmentedSort<is_descending, KeyT, cub::NullType, OffsetT , BeginOffsetIteratorT, EndOffsetIteratorT>;
494504
495505 DoubleBuffer<NullType> d_values;
496506
@@ -632,8 +642,8 @@ public:
632642 void * d_temp_storage,
633643 std::size_t & temp_storage_bytes,
634644 DoubleBuffer<KeyT>& d_keys,
635- int num_items,
636- int num_segments,
645+ ::cuda::std:: int64_t num_items,
646+ ::cuda::std:: int64_t num_segments,
637647 BeginOffsetIteratorT d_begin_offsets,
638648 EndOffsetIteratorT d_end_offsets,
639649 cudaStream_t stream = 0 )
@@ -650,17 +660,18 @@ private:
650660 void * d_temp_storage,
651661 std::size_t & temp_storage_bytes,
652662 DoubleBuffer<KeyT>& d_keys,
653- int num_items,
654- int num_segments,
663+ ::cuda::std:: int64_t num_items,
664+ ::cuda::std:: int64_t num_segments,
655665 BeginOffsetIteratorT d_begin_offsets,
656666 EndOffsetIteratorT d_end_offsets,
657667 cudaStream_t stream = 0 )
658668 {
659669 constexpr bool is_descending = true ;
660670 constexpr bool is_overwrite_okay = true ;
661-
671+ using OffsetT =
672+ detail::choose_signed_offset_t <detail::common_iterator_value_t <BeginOffsetIteratorT, EndOffsetIteratorT>>;
662673 using DispatchT =
663- DispatchSegmentedSort<is_descending, KeyT, cub::NullType, int , BeginOffsetIteratorT, EndOffsetIteratorT>;
674+ DispatchSegmentedSort<is_descending, KeyT, cub::NullType, OffsetT , BeginOffsetIteratorT, EndOffsetIteratorT>;
664675
665676 DoubleBuffer<NullType> d_values;
666677
@@ -803,8 +814,8 @@ public:
803814 void * d_temp_storage,
804815 std::size_t & temp_storage_bytes,
805816 DoubleBuffer<KeyT>& d_keys,
806- int num_items,
807- int num_segments,
817+ ::cuda::std:: int64_t num_items,
818+ ::cuda::std:: int64_t num_segments,
808819 BeginOffsetIteratorT d_begin_offsets,
809820 EndOffsetIteratorT d_end_offsets,
810821 cudaStream_t stream = 0 )
@@ -931,8 +942,8 @@ public:
931942 std::size_t & temp_storage_bytes,
932943 const KeyT* d_keys_in,
933944 KeyT* d_keys_out,
934- int num_items,
935- int num_segments,
945+ ::cuda::std:: int64_t num_items,
946+ ::cuda::std:: int64_t num_segments,
936947 BeginOffsetIteratorT d_begin_offsets,
937948 EndOffsetIteratorT d_end_offsets,
938949 cudaStream_t stream = 0 )
@@ -1067,8 +1078,8 @@ public:
10671078 std::size_t & temp_storage_bytes,
10681079 const KeyT* d_keys_in,
10691080 KeyT* d_keys_out,
1070- int num_items,
1071- int num_segments,
1081+ ::cuda::std:: int64_t num_items,
1082+ ::cuda::std:: int64_t num_segments,
10721083 BeginOffsetIteratorT d_begin_offsets,
10731084 EndOffsetIteratorT d_end_offsets,
10741085 cudaStream_t stream = 0 )
@@ -1213,8 +1224,8 @@ public:
12131224 void * d_temp_storage,
12141225 std::size_t & temp_storage_bytes,
12151226 DoubleBuffer<KeyT>& d_keys,
1216- int num_items,
1217- int num_segments,
1227+ ::cuda::std:: int64_t num_items,
1228+ ::cuda::std:: int64_t num_segments,
12181229 BeginOffsetIteratorT d_begin_offsets,
12191230 EndOffsetIteratorT d_end_offsets,
12201231 cudaStream_t stream = 0 )
@@ -1350,8 +1361,8 @@ public:
13501361 void * d_temp_storage,
13511362 std::size_t & temp_storage_bytes,
13521363 DoubleBuffer<KeyT>& d_keys,
1353- int num_items,
1354- int num_segments,
1364+ ::cuda::std:: int64_t num_items,
1365+ ::cuda::std:: int64_t num_segments,
13551366 BeginOffsetIteratorT d_begin_offsets,
13561367 EndOffsetIteratorT d_end_offsets,
13571368 cudaStream_t stream = 0 )
@@ -1371,15 +1382,19 @@ private:
13711382 KeyT* d_keys_out,
13721383 const ValueT* d_values_in,
13731384 ValueT* d_values_out,
1374- int num_items,
1375- int num_segments,
1385+ ::cuda::std:: int64_t num_items,
1386+ ::cuda::std:: int64_t num_segments,
13761387 BeginOffsetIteratorT d_begin_offsets,
13771388 EndOffsetIteratorT d_end_offsets,
13781389 cudaStream_t stream = 0 )
13791390 {
13801391 constexpr bool is_descending = false ;
13811392 constexpr bool is_overwrite_okay = false ;
1382- using DispatchT = DispatchSegmentedSort<is_descending, KeyT, ValueT, int , BeginOffsetIteratorT, EndOffsetIteratorT>;
1393+
1394+ using OffsetT =
1395+ detail::choose_signed_offset_t <detail::common_iterator_value_t <BeginOffsetIteratorT, EndOffsetIteratorT>>;
1396+ using DispatchT =
1397+ DispatchSegmentedSort<is_descending, KeyT, ValueT, OffsetT, BeginOffsetIteratorT, EndOffsetIteratorT>;
13831398
13841399 DoubleBuffer<KeyT> d_keys (const_cast <KeyT*>(d_keys_in), d_keys_out);
13851400 DoubleBuffer<ValueT> d_values (const_cast <ValueT*>(d_values_in), d_values_out);
@@ -1539,8 +1554,8 @@ public:
15391554 KeyT* d_keys_out,
15401555 const ValueT* d_values_in,
15411556 ValueT* d_values_out,
1542- int num_items,
1543- int num_segments,
1557+ ::cuda::std:: int64_t num_items,
1558+ ::cuda::std:: int64_t num_segments,
15441559 BeginOffsetIteratorT d_begin_offsets,
15451560 EndOffsetIteratorT d_end_offsets,
15461561 cudaStream_t stream = 0 )
@@ -1570,15 +1585,19 @@ private:
15701585 KeyT* d_keys_out,
15711586 const ValueT* d_values_in,
15721587 ValueT* d_values_out,
1573- int num_items,
1574- int num_segments,
1588+ ::cuda::std:: int64_t num_items,
1589+ ::cuda::std:: int64_t num_segments,
15751590 BeginOffsetIteratorT d_begin_offsets,
15761591 EndOffsetIteratorT d_end_offsets,
15771592 cudaStream_t stream = 0 )
15781593 {
15791594 constexpr bool is_descending = true ;
15801595 constexpr bool is_overwrite_okay = false ;
1581- using DispatchT = DispatchSegmentedSort<is_descending, KeyT, ValueT, int , BeginOffsetIteratorT, EndOffsetIteratorT>;
1596+
1597+ using OffsetT =
1598+ detail::choose_signed_offset_t <detail::common_iterator_value_t <BeginOffsetIteratorT, EndOffsetIteratorT>>;
1599+ using DispatchT =
1600+ DispatchSegmentedSort<is_descending, KeyT, ValueT, OffsetT, BeginOffsetIteratorT, EndOffsetIteratorT>;
15821601
15831602 DoubleBuffer<KeyT> d_keys (const_cast <KeyT*>(d_keys_in), d_keys_out);
15841603 DoubleBuffer<ValueT> d_values (const_cast <ValueT*>(d_values_in), d_values_out);
@@ -1734,8 +1753,8 @@ public:
17341753 KeyT* d_keys_out,
17351754 const ValueT* d_values_in,
17361755 ValueT* d_values_out,
1737- int num_items,
1738- int num_segments,
1756+ ::cuda::std:: int64_t num_items,
1757+ ::cuda::std:: int64_t num_segments,
17391758 BeginOffsetIteratorT d_begin_offsets,
17401759 EndOffsetIteratorT d_end_offsets,
17411760 cudaStream_t stream = 0 )
@@ -1763,15 +1782,19 @@ private:
17631782 std::size_t & temp_storage_bytes,
17641783 DoubleBuffer<KeyT>& d_keys,
17651784 DoubleBuffer<ValueT>& d_values,
1766- int num_items,
1767- int num_segments,
1785+ ::cuda::std:: int64_t num_items,
1786+ ::cuda::std:: int64_t num_segments,
17681787 BeginOffsetIteratorT d_begin_offsets,
17691788 EndOffsetIteratorT d_end_offsets,
17701789 cudaStream_t stream = 0 )
17711790 {
17721791 constexpr bool is_descending = false ;
17731792 constexpr bool is_overwrite_okay = true ;
1774- using DispatchT = DispatchSegmentedSort<is_descending, KeyT, ValueT, int , BeginOffsetIteratorT, EndOffsetIteratorT>;
1793+
1794+ using OffsetT =
1795+ detail::choose_signed_offset_t <detail::common_iterator_value_t <BeginOffsetIteratorT, EndOffsetIteratorT>>;
1796+ using DispatchT =
1797+ DispatchSegmentedSort<is_descending, KeyT, ValueT, OffsetT, BeginOffsetIteratorT, EndOffsetIteratorT>;
17751798
17761799 return DispatchT::Dispatch (
17771800 d_temp_storage,
@@ -1931,8 +1954,8 @@ public:
19311954 std::size_t & temp_storage_bytes,
19321955 DoubleBuffer<KeyT>& d_keys,
19331956 DoubleBuffer<ValueT>& d_values,
1934- int num_items,
1935- int num_segments,
1957+ ::cuda::std:: int64_t num_items,
1958+ ::cuda::std:: int64_t num_segments,
19361959 BeginOffsetIteratorT d_begin_offsets,
19371960 EndOffsetIteratorT d_end_offsets,
19381961 cudaStream_t stream = 0 )
@@ -1958,15 +1981,19 @@ private:
19581981 std::size_t & temp_storage_bytes,
19591982 DoubleBuffer<KeyT>& d_keys,
19601983 DoubleBuffer<ValueT>& d_values,
1961- int num_items,
1962- int num_segments,
1984+ ::cuda::std:: int64_t num_items,
1985+ ::cuda::std:: int64_t num_segments,
19631986 BeginOffsetIteratorT d_begin_offsets,
19641987 EndOffsetIteratorT d_end_offsets,
19651988 cudaStream_t stream = 0 )
19661989 {
19671990 constexpr bool is_descending = true ;
19681991 constexpr bool is_overwrite_okay = true ;
1969- using DispatchT = DispatchSegmentedSort<is_descending, KeyT, ValueT, int , BeginOffsetIteratorT, EndOffsetIteratorT>;
1992+
1993+ using OffsetT =
1994+ detail::choose_signed_offset_t <detail::common_iterator_value_t <BeginOffsetIteratorT, EndOffsetIteratorT>>;
1995+ using DispatchT =
1996+ DispatchSegmentedSort<is_descending, KeyT, ValueT, OffsetT, BeginOffsetIteratorT, EndOffsetIteratorT>;
19701997
19711998 return DispatchT::Dispatch (
19721999 d_temp_storage,
@@ -2125,8 +2152,8 @@ public:
21252152 std::size_t & temp_storage_bytes,
21262153 DoubleBuffer<KeyT>& d_keys,
21272154 DoubleBuffer<ValueT>& d_values,
2128- int num_items,
2129- int num_segments,
2155+ ::cuda::std:: int64_t num_items,
2156+ ::cuda::std:: int64_t num_segments,
21302157 BeginOffsetIteratorT d_begin_offsets,
21312158 EndOffsetIteratorT d_end_offsets,
21322159 cudaStream_t stream = 0 )
@@ -2281,8 +2308,8 @@ public:
22812308 KeyT* d_keys_out,
22822309 const ValueT* d_values_in,
22832310 ValueT* d_values_out,
2284- int num_items,
2285- int num_segments,
2311+ ::cuda::std:: int64_t num_items,
2312+ ::cuda::std:: int64_t num_segments,
22862313 BeginOffsetIteratorT d_begin_offsets,
22872314 EndOffsetIteratorT d_end_offsets,
22882315 cudaStream_t stream = 0 )
@@ -2439,8 +2466,8 @@ public:
24392466 KeyT* d_keys_out,
24402467 const ValueT* d_values_in,
24412468 ValueT* d_values_out,
2442- int num_items,
2443- int num_segments,
2469+ ::cuda::std:: int64_t num_items,
2470+ ::cuda::std:: int64_t num_segments,
24442471 BeginOffsetIteratorT d_begin_offsets,
24452472 EndOffsetIteratorT d_end_offsets,
24462473 cudaStream_t stream = 0 )
@@ -2605,8 +2632,8 @@ public:
26052632 std::size_t & temp_storage_bytes,
26062633 DoubleBuffer<KeyT>& d_keys,
26072634 DoubleBuffer<ValueT>& d_values,
2608- int num_items,
2609- int num_segments,
2635+ ::cuda::std:: int64_t num_items,
2636+ ::cuda::std:: int64_t num_segments,
26102637 BeginOffsetIteratorT d_begin_offsets,
26112638 EndOffsetIteratorT d_end_offsets,
26122639 cudaStream_t stream = 0 )
@@ -2768,8 +2795,8 @@ public:
27682795 std::size_t & temp_storage_bytes,
27692796 DoubleBuffer<KeyT>& d_keys,
27702797 DoubleBuffer<ValueT>& d_values,
2771- int num_items,
2772- int num_segments,
2798+ ::cuda::std:: int64_t num_items,
2799+ ::cuda::std:: int64_t num_segments,
27732800 BeginOffsetIteratorT d_begin_offsets,
27742801 EndOffsetIteratorT d_end_offsets,
27752802 cudaStream_t stream = 0 )
0 commit comments