Skip to content

Commit 74faa1d

Browse files
authored
Merge branch 'dev' into bf-tests
2 parents 923ad26 + 80f2ad7 commit 74faa1d

16 files changed

+101
-70
lines changed

benchmarks/static_multimap/count_bench.cu

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -57,12 +57,12 @@ std::enable_if_t<(sizeof(Key) == sizeof(Value)), void> static_multimap_count(
5757

5858
state.add_element_count(num_keys);
5959

60-
cuco::static_multimap<Key, Value> map{
60+
cuco::experimental::static_multimap<Key, Value> map{
6161
size, cuco::empty_key<Key>{-1}, cuco::empty_value<Value>{-1}};
6262
map.insert(pairs.begin(), pairs.end());
6363

6464
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
65-
auto count = map.count(keys.begin(), keys.end(), launch.get_stream());
65+
auto count = map.count(keys.begin(), keys.end(), {launch.get_stream()});
6666
});
6767
}
6868

benchmarks/static_multimap/insert_bench.cu

+11-4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2021-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2021-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -56,11 +56,18 @@ std::enable_if_t<(sizeof(Key) == sizeof(Value)), void> static_multimap_insert(
5656

5757
state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
5858
[&](nvbench::launch& launch, auto& timer) {
59-
cuco::static_multimap<Key, Value> map{
60-
size, cuco::empty_key<Key>{-1}, cuco::empty_value<Value>{-1}, launch.get_stream()};
59+
cuco::experimental::static_multimap<Key, Value> map{size,
60+
cuco::empty_key<Key>{-1},
61+
cuco::empty_value<Value>{-1},
62+
{},
63+
{},
64+
{},
65+
{},
66+
{},
67+
{launch.get_stream()}};
6168

6269
timer.start();
63-
map.insert(pairs.begin(), pairs.end(), launch.get_stream());
70+
map.insert(pairs.begin(), pairs.end(), {launch.get_stream()});
6471
timer.stop();
6572
});
6673
}

include/cuco/detail/open_addressing/open_addressing_impl.cuh

+4-4
Original file line numberDiff line numberDiff line change
@@ -578,8 +578,8 @@ class open_addressing_impl {
578578
* a match with its key equivalent to the query key.
579579
*
580580
* @note If `pred( *(stencil + i) )` is true, stores the payload of the
581-
* matched key or the `empty_value_sentienl` to `(output_begin + i)`. If `pred( *(stencil + i) )`
582-
* is false, stores `empty_value_sentienl` to `(output_begin + i)`.
581+
* matched key or the `empty_value_sentinel` to `(output_begin + i)`. If `pred( *(stencil + i) )`
582+
* is false, stores `empty_value_sentinel` to `(output_begin + i)`.
583583
*
584584
* @tparam InputIt Device accessible input iterator
585585
* @tparam StencilIt Device accessible random access iterator whose value_type is
@@ -953,7 +953,7 @@ class open_addressing_impl {
953953
* @note Behavior is undefined if the desired `extent` is insufficient to store all of the
954954
* contained elements.
955955
*
956-
* @note This function is not available if the conatiner's `extent_type` is static.
956+
* @note This function is not available if the container's `extent_type` is static.
957957
*
958958
* @tparam Container The container type this function operates on
959959
*
@@ -994,7 +994,7 @@ class open_addressing_impl {
994994
* @note Behavior is undefined if the desired `extent` is insufficient to store all of the
995995
* contained elements.
996996
*
997-
* @note This function is not available if the conatiner's `extent_type` is static.
997+
* @note This function is not available if the container's `extent_type` is static.
998998
*
999999
* @tparam Container The container type this function operates on
10001000
*

include/cuco/detail/open_addressing/open_addressing_ref_impl.cuh

+15-7
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ class open_addressing_ref_impl {
309309
*
310310
* @tparam CG The type of the cooperative thread group
311311
*
312-
* @param g The ooperative thread group used to copy the data structure
312+
* @param g The cooperative thread group used to copy the data structure
313313
* @param memory_to_use Array large enough to support `capacity` elements. Object does not take
314314
* the ownership of the memory
315315
*/
@@ -426,7 +426,7 @@ class open_addressing_ref_impl {
426426
*
427427
* @return True if the given element is successfully inserted
428428
*/
429-
template <typename Value>
429+
template <bool SupportsErase, typename Value>
430430
__device__ bool insert(cooperative_groups::thread_block_tile<cg_size> const& group,
431431
Value const& value) noexcept
432432
{
@@ -466,12 +466,20 @@ class open_addressing_ref_impl {
466466
auto const group_contains_available = group.ballot(state == detail::equal_result::AVAILABLE);
467467
if (group_contains_available) {
468468
auto const src_lane = __ffs(group_contains_available) - 1;
469-
auto const status =
470-
(group.thread_rank() == src_lane)
471-
? attempt_insert((storage_ref_.data() + *probing_iter)->data() + intra_bucket_index,
469+
auto status = insert_result::CONTINUE;
470+
if (group.thread_rank() == src_lane) {
471+
if constexpr (SupportsErase) {
472+
status =
473+
attempt_insert((storage_ref_.data() + *probing_iter)->data() + intra_bucket_index,
472474
bucket_slots[intra_bucket_index],
473-
val)
474-
: insert_result::CONTINUE;
475+
val);
476+
} else {
477+
status =
478+
attempt_insert((storage_ref_.data() + *probing_iter)->data() + intra_bucket_index,
479+
this->empty_slot_sentinel(),
480+
val);
481+
}
482+
}
475483

476484
switch (group.shfl(status, src_lane)) {
477485
case insert_result::SUCCESS: return true;

include/cuco/detail/operator.inl

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -48,7 +48,7 @@ class operator_impl {
4848
* @return `true` if `Operator` is contained in `Operators`, `false` otherwise.
4949
*/
5050
template <typename Operator, typename... Operators>
51-
static constexpr bool has_operator()
51+
__host__ __device__ static constexpr bool has_operator()
5252
{
5353
return ((std::is_same_v<Operators, Operator>) || ...);
5454
}

include/cuco/detail/static_map/static_map_ref.inl

+5-1
Original file line numberDiff line numberDiff line change
@@ -449,7 +449,11 @@ class operator_impl<
449449
Value const& value) noexcept
450450
{
451451
auto& ref_ = static_cast<ref_type&>(*this);
452-
return ref_.impl_.insert(group, value);
452+
if (ref_.erased_key_sentinel() != ref_.empty_key_sentinel()) {
453+
return ref_.impl_.insert<true>(group, value);
454+
} else {
455+
return ref_.impl_.insert<false>(group, value);
456+
}
453457
}
454458
};
455459

include/cuco/detail/static_multimap/static_multimap_ref.inl

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -455,7 +455,11 @@ class operator_impl<
455455
Value const& value) noexcept
456456
{
457457
auto& ref_ = static_cast<ref_type&>(*this);
458-
return ref_.impl_.insert(group, value);
458+
if (ref_.erased_key_sentinel() != ref_.empty_key_sentinel()) {
459+
return ref_.impl_.insert<true>(group, value);
460+
} else {
461+
return ref_.impl_.insert<false>(group, value);
462+
}
459463
}
460464
};
461465

include/cuco/detail/static_multiset/static_multiset_ref.inl

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2024-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -358,7 +358,11 @@ class operator_impl<
358358
Value const& value) noexcept
359359
{
360360
auto& ref_ = static_cast<ref_type&>(*this);
361-
return ref_.impl_.insert(group, value);
361+
if (ref_.erased_key_sentinel() != ref_.empty_key_sentinel()) {
362+
return ref_.impl_.insert<true>(group, value);
363+
} else {
364+
return ref_.impl_.insert<false>(group, value);
365+
}
362366
}
363367
};
364368

include/cuco/detail/static_set/static_set_ref.inl

+6-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
2+
* Copyright (c) 2022-2025, NVIDIA CORPORATION.
33
*
44
* Licensed under the Apache License, Version 2.0 (the "License");
55
* you may not use this file except in compliance with the License.
@@ -391,7 +391,11 @@ class operator_impl<op::insert_tag,
391391
Value const& value) noexcept
392392
{
393393
auto& ref_ = static_cast<ref_type&>(*this);
394-
return ref_.impl_.insert(group, value);
394+
if (ref_.erased_key_sentinel() != ref_.empty_key_sentinel()) {
395+
return ref_.impl_.insert<true>(group, value);
396+
} else {
397+
return ref_.impl_.insert<false>(group, value);
398+
}
395399
}
396400
};
397401

include/cuco/static_map.cuh

+13-13
Original file line numberDiff line numberDiff line change
@@ -50,12 +50,12 @@ namespace cuco {
5050
*
5151
* The host-side bulk operations include `insert`, `contains`, etc. These APIs should be used when
5252
* there are a large number of keys to modify or lookup. For example, given a range of keys
53-
* specified by device-accessible iterators, the bulk `insert` function will insert all keys into
53+
* specified by device-accessible iterators, the bulk `insert` function inserts all keys into
5454
* the map.
5555
*
5656
* The singular device-side operations allow individual threads (or cooperative groups) to perform
5757
* independent modify or lookup operations from device code. These operations are accessed through
58-
* non-owning, trivially copyable reference types (or "ref"). User can combine any arbitrary
58+
* non-owning, trivially copyable reference types (or "ref"). Users can combine any arbitrary
5959
* operators (see options in `include/cuco/operator.hpp`) when creating the ref. Concurrent modify
6060
* and lookup will be supported if both kinds of operators are specified during the ref
6161
* construction.
@@ -799,8 +799,8 @@ class static_map {
799799
* query key.
800800
*
801801
* @note If `pred( *(stencil + i) )` is true, stores the payload of the
802-
* matched key or the `empty_value_sentienl` to `(output_begin + i)`. If `pred( *(stencil + i) )`
803-
* is false, always stores the `empty_value_sentienl` to `(output_begin + i)`.
802+
* matched key or the `empty_value_sentinel` to `(output_begin + i)`. If `pred( *(stencil + i) )`
803+
* is false, always stores the `empty_value_sentinel` to `(output_begin + i)`.
804804
* @note This function synchronizes the given stream. For asynchronous execution use
805805
* `find_if_async`.
806806
*
@@ -832,8 +832,8 @@ class static_map {
832832
* a match with its key equivalent to the query key.
833833
*
834834
* @note If `pred( *(stencil + i) )` is true, stores the payload of the
835-
* matched key or the `empty_value_sentienl` to `(output_begin + i)`. If `pred( *(stencil + i) )`
836-
* is false, always stores the `empty_value_sentienl` to `(output_begin + i)`.
835+
* matched key or the `empty_value_sentinel` to `(output_begin + i)`. If `pred( *(stencil + i) )`
836+
* is false, always stores the `empty_value_sentinel` to `(output_begin + i)`.
837837
*
838838
* @tparam InputIt Device accessible input iterator
839839
* @tparam StencilIt Device accessible random access iterator whose `value_type` is convertible to
@@ -863,8 +863,8 @@ class static_map {
863863
* a match with its key equivalent to the query key.
864864
*
865865
* @note If `pred( *(stencil + i) )` is true, stores the payload of the
866-
* matched key or the `empty_value_sentienl` to `(output_begin + i)`. If `pred( *(stencil + i) )`
867-
* is false, always stores the `empty_value_sentienl` to `(output_begin + i)`.
866+
* matched key or the `empty_value_sentinel` to `(output_begin + i)`. If `pred( *(stencil + i) )`
867+
* is false, always stores the `empty_value_sentinel` to `(output_begin + i)`.
868868
*
869869
* @tparam InputIt Device accessible input iterator
870870
* @tparam StencilIt Device accessible random access iterator whose `value_type` is convertible to
@@ -1029,7 +1029,7 @@ class static_map {
10291029
*
10301030
* @tparam KeyOut Device accessible random access output iterator whose `value_type` is
10311031
* convertible from `key_type`.
1032-
* @tparam ValueOut Device accesible random access output iterator whose `value_type` is
1032+
* @tparam ValueOut Device accessible random access output iterator whose `value_type` is
10331033
* convertible from `mapped_type`.
10341034
*
10351035
* @param keys_out Beginning output iterator for keys
@@ -1066,7 +1066,7 @@ class static_map {
10661066
* @note Behavior is undefined if the desired `capacity` is insufficient to store all of the
10671067
* contained elements.
10681068
*
1069-
* @note This function is not available if the conatiner's `extent_type` is static.
1069+
* @note This function is not available if the container's `extent_type` is static.
10701070
*
10711071
* @param capacity New capacity of the container
10721072
* @param stream CUDA stream used for this operation
@@ -1091,7 +1091,7 @@ class static_map {
10911091
* @note Behavior is undefined if the desired `capacity` is insufficient to store all of the
10921092
* contained elements.
10931093
*
1094-
* @note This function is not available if the conatiner's `extent_type` is static.
1094+
* @note This function is not available if the container's `extent_type` is static.
10951095
*
10961096
* @param capacity New capacity of the container
10971097
* @param stream CUDA stream used for this operation
@@ -1512,7 +1512,7 @@ class static_map {
15121512
*
15131513
* @tparam KeyOut Device accessible random access output iterator whose `value_type` is
15141514
* convertible from `key_type`.
1515-
* @tparam ValueOut Device accesible random access output iterator whose `value_type` is
1515+
* @tparam ValueOut Device accessible random access output iterator whose `value_type` is
15161516
* convertible from `mapped_type`.
15171517
* @param keys_out Beginning output iterator for keys
15181518
* @param values_out Beginning output iterator for values
@@ -2280,7 +2280,7 @@ class static_map {
22802280
* @endcode
22812281
*
22822282
* @tparam CG The type of the cooperative thread group
2283-
* @param g The ooperative thread group used to copy the slots
2283+
* @param g The cooperative thread group used to copy the slots
22842284
* @param source_device_view `device_view` to copy from
22852285
* @param memory_to_use Array large enough to support `capacity` elements. Object does not take
22862286
* the ownership of the memory

include/cuco/static_map_ref.cuh

+1-1
Original file line numberDiff line numberDiff line change
@@ -276,7 +276,7 @@ class static_map_ref
276276
* @tparam CG The type of the cooperative thread group
277277
* @tparam NewScope The thread scope of the newly created device ref
278278
*
279-
* @param tile The ooperative thread group used to copy the data structure
279+
* @param tile The cooperative thread group used to copy the data structure
280280
* @param memory_to_use Array large enough to support `capacity` elements. Object does not take
281281
* the ownership of the memory
282282
* @param scope The thread scope of the newly created device ref

0 commit comments

Comments
 (0)