Skip to content

Commit 9ac1ebe

Browse files
misccodavebayer
authored andcommitted
Deprecate cub::{min, max} and replace internal uses with those from libcu++ (NVIDIA#3419)
* Deprecate `cub::{min, max}` and replace internal uses with those from libcu++ Fixes NVIDIA#3404
1 parent 26ab94e commit 9ac1ebe

File tree

4 files changed

+33
-11
lines changed

4 files changed

+33
-11
lines changed

cub/cub/device/dispatch/dispatch_segmented_sort.cuh

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,8 @@
6060
#include <thrust/system/cuda/detail/core/triple_chevron_launch.h>
6161

6262
#include <cuda/cmath>
63+
#include <cuda/std/__algorithm/max.h>
64+
#include <cuda/std/__algorithm/min.h>
6365
#include <cuda/std/type_traits>
6466

6567
#include <type_traits>
@@ -964,7 +966,7 @@ struct DispatchSegmentedSort
964966
constexpr auto num_segments_per_invocation_limit =
965967
static_cast<global_segment_offset_t>(::cuda::std::numeric_limits<int>::max());
966968
auto const max_num_segments_per_invocation = static_cast<global_segment_offset_t>(
967-
::cuda::std::min(static_cast<global_segment_offset_t>(num_segments), num_segments_per_invocation_limit));
969+
(::cuda::std::min)(static_cast<global_segment_offset_t>(num_segments), num_segments_per_invocation_limit));
968970

969971
large_and_medium_segments_indices.grow(max_num_segments_per_invocation);
970972
small_segments_indices.grow(max_num_segments_per_invocation);

cub/cub/util_arch.cuh

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -121,20 +121,20 @@ template <int Nominal4ByteBlockThreads, int Nominal4ByteItemsPerThread, typename
121121
struct RegBoundScaling
122122
{
123123
static constexpr int ITEMS_PER_THREAD =
124-
::cuda::std::max(1, Nominal4ByteItemsPerThread * 4 / ::cuda::std::max(4, int{sizeof(T)}));
125-
static constexpr int BLOCK_THREADS =
126-
::cuda::std::min(Nominal4ByteBlockThreads,
127-
::cuda::ceil_div(int{detail::max_smem_per_block} / (int{sizeof(T)} * ITEMS_PER_THREAD), 32) * 32);
124+
(::cuda::std::max)(1, Nominal4ByteItemsPerThread * 4 / (::cuda::std::max)(4, int{sizeof(T)}));
125+
static constexpr int BLOCK_THREADS = (::cuda::std::min)(
126+
Nominal4ByteBlockThreads,
127+
::cuda::ceil_div(int{detail::max_smem_per_block} / (int{sizeof(T)} * ITEMS_PER_THREAD), 32) * 32);
128128
};
129129

130130
template <int Nominal4ByteBlockThreads, int Nominal4ByteItemsPerThread, typename T>
131131
struct MemBoundScaling
132132
{
133-
static constexpr int ITEMS_PER_THREAD = ::cuda::std::max(
134-
1, ::cuda::std::min(Nominal4ByteItemsPerThread * 4 / int{sizeof(T)}, Nominal4ByteItemsPerThread * 2));
135-
static constexpr int BLOCK_THREADS =
136-
::cuda::std::min(Nominal4ByteBlockThreads,
137-
::cuda::ceil_div(int{detail::max_smem_per_block} / (int{sizeof(T)} * ITEMS_PER_THREAD), 32) * 32);
133+
static constexpr int ITEMS_PER_THREAD = (::cuda::std::max)(
134+
1, (::cuda::std::min)(Nominal4ByteItemsPerThread * 4 / int{sizeof(T)}, Nominal4ByteItemsPerThread * 2));
135+
static constexpr int BLOCK_THREADS = (::cuda::std::min)(
136+
Nominal4ByteBlockThreads,
137+
::cuda::ceil_div(int{detail::max_smem_per_block} / (int{sizeof(T)} * ITEMS_PER_THREAD), 32) * 32);
138138
};
139139

140140
#endif // Do not document

cub/cub/util_macro.cuh

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,26 @@
4949

5050
CUB_NAMESPACE_BEGIN
5151

52+
#ifndef _CCCL_DOXYGEN_INVOKED // Do not document
53+
# define CUB_PREVENT_MACRO_SUBSTITUTION
54+
template <typename T, typename U>
55+
CCCL_DEPRECATED_BECAUSE("Use cuda::std::min from <cuda/std/functional> instead")
56+
constexpr _CCCL_HOST_DEVICE auto min CUB_PREVENT_MACRO_SUBSTITUTION(T&& t, U&& u)
57+
-> decltype(t < u ? ::cuda::std::forward<T>(t) : ::cuda::std::forward<U>(u))
58+
{
59+
return t < u ? ::cuda::std::forward<T>(t) : ::cuda::std::forward<U>(u);
60+
}
61+
62+
template <typename T, typename U>
63+
CCCL_DEPRECATED_BECAUSE("Use cuda::std::max from <cuda/std/functional> instead")
64+
constexpr _CCCL_HOST_DEVICE auto max CUB_PREVENT_MACRO_SUBSTITUTION(T&& t, U&& u)
65+
-> decltype(t < u ? ::cuda::std::forward<U>(u) : ::cuda::std::forward<T>(t))
66+
{
67+
return t < u ? ::cuda::std::forward<U>(u) : ::cuda::std::forward<T>(t);
68+
}
69+
# undef CUB_PREVENT_MACRO_SUBSTITUTION
70+
#endif
71+
5272
#ifndef CUB_MAX
5373
/// Select maximum(a, b)
5474
/// Deprecated since [2.8]

cub/test/catch2_radix_sort_helper.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ struct segment_iterator
6969

7070
__host__ __device__ OffsetT operator()(std::int64_t x) const
7171
{
72-
return ::cuda::std::min(last, x * Step);
72+
return (::cuda::std::min)(last, x * Step);
7373
}
7474
};
7575

0 commit comments

Comments
 (0)