5353
5454#include < thrust/system/cuda/detail/core/triple_chevron_launch.h>
5555
56+ #include < cuda/std/__algorithm_>
5657#include < cuda/std/type_traits>
5758
5859#include < iterator>
@@ -275,7 +276,7 @@ struct DispatchRadixSort
275276 cudaError error = cudaSuccess;
276277 do
277278 {
278- int pass_bits = CUB_MIN (pass_config.radix_bits , ( end_bit - current_bit) );
279+ int pass_bits = _CUDA_VSTD::min (pass_config.radix_bits , end_bit - current_bit);
279280
280281// Log upsweep_kernel configuration
281282#ifdef CUB_DEBUG_LOG
@@ -447,7 +448,7 @@ struct DispatchRadixSort
447448 max_downsweep_grid_size = (downsweep_config.sm_occupancy * sm_count) * CUB_SUBSCRIPTION_FACTOR (0 );
448449
449450 even_share.DispatchInit (
450- num_items, max_downsweep_grid_size, CUB_MAX (downsweep_config.tile_size , upsweep_config.tile_size ));
451+ num_items, max_downsweep_grid_size, _CUDA_VSTD::max (downsweep_config.tile_size , upsweep_config.tile_size ));
451452
452453 } while (0 );
453454 return error;
@@ -472,8 +473,8 @@ struct DispatchRadixSort
472473 constexpr PortionOffsetT PORTION_SIZE = ((1 << 28 ) - 1 ) / ONESWEEP_TILE_ITEMS * ONESWEEP_TILE_ITEMS;
473474 int num_passes = ::cuda::ceil_div (end_bit - begin_bit, RADIX_BITS);
474475 OffsetT num_portions = static_cast <OffsetT>(::cuda::ceil_div (num_items, PORTION_SIZE));
475- PortionOffsetT max_num_blocks =
476- ::cuda::ceil_div ( static_cast <int >(CUB_MIN (num_items, static_cast <OffsetT>(PORTION_SIZE))), ONESWEEP_TILE_ITEMS);
476+ PortionOffsetT max_num_blocks = :: cuda::ceil_div (
477+ static_cast <int >(_CUDA_VSTD::min (num_items, static_cast <OffsetT>(PORTION_SIZE))), ONESWEEP_TILE_ITEMS);
477478
478479 size_t value_size = KEYS_ONLY ? 0 : sizeof (ValueT);
479480 size_t allocation_sizes[] = {
@@ -611,11 +612,11 @@ struct DispatchRadixSort
611612
612613 for (int current_bit = begin_bit, pass = 0 ; current_bit < end_bit; current_bit += RADIX_BITS, ++pass)
613614 {
614- int num_bits = CUB_MIN (end_bit - current_bit, RADIX_BITS);
615+ int num_bits = _CUDA_VSTD::min (end_bit - current_bit, RADIX_BITS);
615616 for (OffsetT portion = 0 ; portion < num_portions; ++portion)
616617 {
617- PortionOffsetT portion_num_items = static_cast <PortionOffsetT>(
618- CUB_MIN ( num_items - portion * PORTION_SIZE, static_cast < OffsetT>( PORTION_SIZE) ));
618+ PortionOffsetT portion_num_items =
619+ static_cast <PortionOffsetT>( _CUDA_VSTD::min ( num_items - portion * PORTION_SIZE, OffsetT{ PORTION_SIZE} ));
619620
620621 PortionOffsetT num_blocks = ::cuda::ceil_div (portion_num_items, ONESWEEP_TILE_ITEMS);
621622
@@ -777,7 +778,7 @@ struct DispatchRadixSort
777778 }
778779
779780 // Get maximum spine length
780- int max_grid_size = CUB_MAX (pass_config.max_downsweep_grid_size , alt_pass_config.max_downsweep_grid_size );
781+ int max_grid_size = _CUDA_VSTD::max (pass_config.max_downsweep_grid_size , alt_pass_config.max_downsweep_grid_size );
781782 int spine_length = (max_grid_size * pass_config.radix_digits ) + pass_config.scan_config .tile_size ;
782783
783784 // Temporary storage allocation requirements
@@ -812,7 +813,7 @@ struct DispatchRadixSort
812813 int num_passes = ::cuda::ceil_div (num_bits, pass_config.radix_bits );
813814 bool is_num_passes_odd = num_passes & 1 ;
814815 int max_alt_passes = (num_passes * pass_config.radix_bits ) - num_bits;
815- int alt_end_bit = CUB_MIN (end_bit, begin_bit + (max_alt_passes * alt_pass_config.radix_bits ));
816+ int alt_end_bit = _CUDA_VSTD::min (end_bit, begin_bit + (max_alt_passes * alt_pass_config.radix_bits ));
816817
817818 // Alias the temporary storage allocations
818819 OffsetT* d_spine = static_cast <OffsetT*>(allocations[0 ]);
@@ -1241,7 +1242,7 @@ struct DispatchSegmentedRadixSort
12411242 cudaError error = cudaSuccess;
12421243 do
12431244 {
1244- int pass_bits = CUB_MIN (pass_config.radix_bits , (end_bit - current_bit));
1245+ int pass_bits = _CUDA_VSTD::min (pass_config.radix_bits , (end_bit - current_bit));
12451246
12461247// Log kernel configuration
12471248#ifdef CUB_DEBUG_LOG
@@ -1381,10 +1382,10 @@ struct DispatchSegmentedRadixSort
13811382 int radix_bits = ActivePolicyT::SegmentedPolicy::RADIX_BITS;
13821383 int alt_radix_bits = ActivePolicyT::AltSegmentedPolicy::RADIX_BITS;
13831384 int num_bits = end_bit - begin_bit;
1384- int num_passes = CUB_MAX (::cuda::ceil_div (num_bits, radix_bits), 1 );
1385+ int num_passes = _CUDA_VSTD::max (::cuda::ceil_div (num_bits, radix_bits), 1 ); // num_bits may be zero
13851386 bool is_num_passes_odd = num_passes & 1 ;
13861387 int max_alt_passes = (num_passes * radix_bits) - num_bits;
1387- int alt_end_bit = CUB_MIN (end_bit, begin_bit + (max_alt_passes * alt_radix_bits));
1388+ int alt_end_bit = _CUDA_VSTD::min (end_bit, begin_bit + (max_alt_passes * alt_radix_bits));
13881389
13891390 DoubleBuffer<KeyT> d_keys_remaining_passes (
13901391 (is_overwrite_okay || is_num_passes_odd) ? d_keys.Alternate () : static_cast <KeyT*>(allocations[0 ]),
0 commit comments