Skip to content

Commit 5ea1654

Browse files
committedFeb 28, 2025··
Add flat storage
1 parent ceb1452 commit 5ea1654

File tree

7 files changed

+77
-112
lines changed

7 files changed

+77
-112
lines changed
 

‎benchmarks/CMakeLists.txt

-56
Original file line numberDiff line numberDiff line change
@@ -46,63 +46,7 @@ endfunction(ConfigureBench)
4646
### benchmark sources #############################################################################
4747
###################################################################################################
4848

49-
###################################################################################################
50-
# - static_set benchmarks -------------------------------------------------------------------------
51-
ConfigureBench(STATIC_SET_BENCH
52-
static_set/contains_bench.cu
53-
static_set/find_bench.cu
54-
static_set/insert_bench.cu
55-
static_set/retrieve_bench.cu
56-
static_set/retrieve_all_bench.cu
57-
static_set/size_bench.cu
58-
static_set/rehash_bench.cu)
59-
60-
###################################################################################################
61-
# - static_map benchmarks -------------------------------------------------------------------------
62-
ConfigureBench(STATIC_MAP_BENCH
63-
static_map/insert_bench.cu
64-
static_map/find_bench.cu
65-
static_map/contains_bench.cu
66-
static_map/erase_bench.cu
67-
static_map/insert_or_apply_bench.cu)
68-
69-
###################################################################################################
70-
# - static_multiset benchmarks --------------------------------------------------------------------
71-
ConfigureBench(STATIC_MULTISET_BENCH
72-
static_multiset/contains_bench.cu
73-
static_multiset/retrieve_bench.cu
74-
static_multiset/count_bench.cu
75-
static_multiset/find_bench.cu
76-
static_multiset/insert_bench.cu)
77-
7849
###################################################################################################
7950
# - static_multimap benchmarks --------------------------------------------------------------------
8051
ConfigureBench(STATIC_MULTIMAP_BENCH
81-
static_multimap/insert_bench.cu
82-
static_multimap/retrieve_bench.cu
83-
static_multimap/query_bench.cu
8452
static_multimap/count_bench.cu)
85-
86-
###################################################################################################
87-
# - dynamic_map benchmarks ------------------------------------------------------------------------
88-
ConfigureBench(DYNAMIC_MAP_BENCH
89-
dynamic_map/insert_bench.cu
90-
dynamic_map/find_bench.cu
91-
dynamic_map/contains_bench.cu
92-
dynamic_map/erase_bench.cu)
93-
94-
###################################################################################################
95-
# - hash function benchmarks ----------------------------------------------------------------------
96-
ConfigureBench(HASH_FUNCTION_BENCH
97-
hash_function/hash_function_bench.cu)
98-
99-
###################################################################################################
100-
# - hyperloglog benchmarks -----------------------------------------------------------
101-
ConfigureBench(HYPERLOGLOG_BENCH
102-
hyperloglog/hyperloglog_bench.cu)
103-
104-
###################################################################################################
105-
# - bloom_filter benchmarks -----------------------------------------------------------------------
106-
ConfigureBench(BLOOM_FILTER_BENCH
107-
bloom_filter/add_bench.cu
108-
bloom_filter/contains_bench.cu)

‎include/cuco/detail/extent/extent.inl

+1-1
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ template <int32_t CGSize, int32_t BucketSize, typename SizeType, std::size_t N>
9999
return bucket_extent<SizeType>{static_cast<SizeType>(
100100
*cuco::detail::lower_bound(
101101
cuco::detail::primes.begin(), cuco::detail::primes.end(), static_cast<uint64_t>(size)) *
102-
CGSize)};
102+
CGSize * BucketSize)};
103103
}
104104
if constexpr (N != dynamic_extent) {
105105
return bucket_extent<SizeType,

‎include/cuco/detail/open_addressing/open_addressing_ref_impl.cuh

+17-54
Original file line numberDiff line numberDiff line change
@@ -430,71 +430,34 @@ class open_addressing_ref_impl {
430430
__device__ bool insert(cooperative_groups::thread_block_tile<cg_size> const& group,
431431
Value const& value) noexcept
432432
{
433-
auto const val = this->heterogeneous_value(value);
434-
auto const key = this->extract_key(val);
435-
auto probing_iter = probing_scheme_(group, key, storage_ref_.bucket_extent());
436-
auto const init_idx = *probing_iter;
433+
auto const val = this->heterogeneous_value(value);
434+
auto const key = this->extract_key(val);
435+
auto probing_iter = probing_scheme_(group, key, storage_ref_.bucket_extent());
436+
auto* data = reinterpret_cast<char*>(storage_ref_.data());
437437

438438
while (true) {
439-
auto const bucket_slots = storage_ref_[*probing_iter];
439+
value_type bucket_slots[2];
440+
auto const tmp =
441+
*reinterpret_cast<uint4 const*>(data + *probing_iter * sizeof(value_type) * 2);
442+
memcpy(&bucket_slots[0], &tmp, 2 * sizeof(value_type));
440443

441-
auto const [state, intra_bucket_index] = [&]() {
442-
for (auto i = 0; i < bucket_size; ++i) {
443-
switch (
444-
this->predicate_.operator()<is_insert::YES>(key, this->extract_key(bucket_slots[i]))) {
445-
case detail::equal_result::AVAILABLE:
446-
return bucket_probing_results{detail::equal_result::AVAILABLE, i};
447-
case detail::equal_result::EQUAL: {
448-
if constexpr (allows_duplicates) {
449-
continue;
450-
} else {
451-
return bucket_probing_results{detail::equal_result::EQUAL, i};
452-
}
453-
}
454-
default: continue;
455-
}
456-
}
457-
// returns dummy index `-1` for UNEQUAL
458-
return bucket_probing_results{detail::equal_result::UNEQUAL, -1};
459-
}();
444+
auto const first_slot_is_empty =
445+
detail::bitwise_compare(bucket_slots[0].first, this->empty_key_sentinel());
446+
auto const second_slot_is_empty =
447+
detail::bitwise_compare(bucket_slots[1].first, this->empty_key_sentinel());
460448

461-
if constexpr (not allows_duplicates) {
462-
// If the key is already in the container, return false
463-
if (group.any(state == detail::equal_result::EQUAL)) { return false; }
464-
}
449+
auto const bucket_contains_empty = group.ballot(first_slot_is_empty or second_slot_is_empty);
465450

466-
auto const group_contains_available = group.ballot(state == detail::equal_result::AVAILABLE);
467-
if (group_contains_available) {
468-
auto const src_lane = __ffs(group_contains_available) - 1;
451+
if (bucket_contains_empty) {
452+
auto const src_lane = __ffs(bucket_contains_empty) - 1;
469453
auto status = insert_result::CONTINUE;
470454
if (group.thread_rank() == src_lane) {
471-
if constexpr (SupportsErase) {
472-
status =
473-
attempt_insert((storage_ref_.data() + *probing_iter)->data() + intra_bucket_index,
474-
bucket_slots[intra_bucket_index],
475-
val);
476-
} else {
477-
status =
478-
attempt_insert((storage_ref_.data() + *probing_iter)->data() + intra_bucket_index,
479-
this->empty_slot_sentinel(),
480-
val);
481-
}
455+
status = attempt_insert(bucket_slots, this->empty_slot_sentinel(), val);
482456
}
483457

484-
switch (group.shfl(status, src_lane)) {
485-
case insert_result::SUCCESS: return true;
486-
case insert_result::DUPLICATE: {
487-
if constexpr (allows_duplicates) {
488-
[[fallthrough]];
489-
} else {
490-
return false;
491-
}
492-
}
493-
default: continue;
494-
}
458+
if (group.any(status == insert_result::SUCCESS)) { return true; }
495459
} else {
496460
++probing_iter;
497-
if (*probing_iter == init_idx) { return false; }
498461
}
499462
}
500463
}

‎include/cuco/detail/storage/kernels.cuh

+13
Original file line numberDiff line numberDiff line change
@@ -51,5 +51,18 @@ CUCO_KERNEL void initialize(BucketT* buckets,
5151
}
5252
}
5353

54+
template <typename BucketT>
55+
CUCO_KERNEL void initialize(BucketT* buckets, cuco::detail::index_type n, BucketT value)
56+
{
57+
auto const loop_stride = cuco::detail::grid_stride();
58+
auto idx = cuco::detail::global_thread_id();
59+
60+
while (idx < n) {
61+
auto& slot = *(buckets + idx);
62+
slot = value;
63+
idx += loop_stride;
64+
}
65+
}
66+
5467
} // namespace detail
5568
} // namespace cuco

‎include/cuco/detail/storage/storage.cuh

+34
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#pragma once
1818

1919
#include <cuco/bucket_storage.cuh>
20+
#include <cuco/flat_storage.cuh>
2021

2122
namespace cuco {
2223
namespace detail {
@@ -60,5 +61,38 @@ class storage : StorageImpl::template impl<T, Extent, Allocator> {
6061
}
6162
};
6263

64+
template <class StorageImpl, class T, class Extent, class Allocator>
65+
class slot_storage : StorageImpl::template impl<T, Extent, Allocator> {
66+
public:
67+
/// Storage implementation type
68+
using impl_type = typename StorageImpl::template impl<T, Extent, Allocator>;
69+
using ref_type = typename impl_type::ref_type; ///< Storage ref type
70+
using value_type = typename impl_type::value_type; ///< Storage value type
71+
using allocator_type = typename impl_type::allocator_type; ///< Storage value type
72+
73+
/// Number of elements per bucket
74+
static constexpr int bucket_size = impl_type::bucket_size;
75+
76+
using impl_type::allocator;
77+
using impl_type::bucket_extent;
78+
using impl_type::capacity;
79+
using impl_type::data;
80+
using impl_type::initialize;
81+
using impl_type::initialize_async;
82+
using impl_type::num_buckets;
83+
using impl_type::ref;
84+
85+
/**
86+
* @brief Constructs storage.
87+
*
88+
* @param size Number of slots to (de)allocate
89+
* @param allocator Allocator used for (de)allocating device storage
90+
*/
91+
explicit constexpr slot_storage(Extent size, Allocator const& allocator)
92+
: impl_type{size, allocator}
93+
{
94+
}
95+
};
96+
6397
} // namespace detail
6498
} // namespace cuco

‎include/cuco/static_multimap.cuh

+1-1
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ template <class Key,
9696
class ProbingScheme = cuco::double_hashing<8, // CG size
9797
cuco::default_hash_function<Key>>,
9898
class Allocator = cuco::cuda_allocator<cuco::pair<Key, T>>,
99-
class Storage = cuco::storage<2>>
99+
class Storage = cuco::slot_storage<2>>
100100
class static_multimap {
101101
static_assert(sizeof(Key) <= 8, "Container does not support key types larger than 8 bytes.");
102102

‎include/cuco/storage.cuh

+11
Original file line numberDiff line numberDiff line change
@@ -45,4 +45,15 @@ class storage {
4545
using impl = bucket_storage<T, bucket_size, Extent, Allocator>;
4646
};
4747

48+
template <int32_t BucketSize>
49+
class slot_storage {
50+
public:
51+
/// Number of slots per bucket storage
52+
static constexpr int32_t bucket_size = BucketSize;
53+
54+
/// Type of implementation details
55+
template <class T, class Extent, class Allocator>
56+
using impl = flat_storage<T, bucket_size, Extent, Allocator>;
57+
};
58+
4859
} // namespace cuco

0 commit comments

Comments
 (0)
Please sign in to comment.