Skip to content

Commit

Permalink
Plenty of fixes when capacity is reached
Browse files Browse the repository at this point in the history
* Fix capacity, rehash issues when bucket gets full
* Make sure uint16_t types work in bucket
* Maximum capacity is now 2^63 for bucket_type::big
* Fix bad hash for 32bit builds
  • Loading branch information
martinus committed Aug 15, 2022
1 parent 1f50c22 commit f4850bd
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 43 deletions.
82 changes: 57 additions & 25 deletions include/ankerl/unordered_dense.h
Original file line number Diff line number Diff line change
Expand Up @@ -401,14 +401,16 @@ class table {
value_container_type m_values{}; // Contains all the key-value pairs in one densely stored container. No holes.
typename std::allocator_traits<BucketAlloc>::pointer m_buckets{};
size_t m_num_buckets = 0;
value_idx_type m_max_bucket_capacity = 0;
size_t m_max_bucket_capacity = 0;
float m_max_load_factor = DEFAULT_MAX_LOAD_FACTOR;
Hash m_hash{};
KeyEqual m_equal{};
uint8_t m_shifts = INITIAL_SHIFTS;

[[nodiscard]] auto next(size_t bucket_idx) const -> size_t {
return ANKERL_UNORDERED_DENSE_UNLIKELY(bucket_idx + 1 == m_num_buckets) ? 0 : bucket_idx + 1;
[[nodiscard]] auto next(value_idx_type bucket_idx) const -> value_idx_type {
return ANKERL_UNORDERED_DENSE_UNLIKELY(bucket_idx + 1U == m_num_buckets)
? 0
: static_cast<value_idx_type>(bucket_idx + 1U);
}

// Helper to access bucket through pointer types
Expand All @@ -417,10 +419,24 @@ class table {
return *(bucket_ptr + static_cast<typename std::allocator_traits<BucketAlloc>::difference_type>(offset));
}

// use the dist_inc and dist_dec functions so that uint16_t types work without warning
[[nodiscard]] static constexpr auto dist_inc(dist_and_fingerprint_type x) -> dist_and_fingerprint_type {
return static_cast<dist_and_fingerprint_type>(x + Bucket::DIST_INC);
}

[[nodiscard]] static constexpr auto dist_dec(dist_and_fingerprint_type x) -> dist_and_fingerprint_type {
return static_cast<dist_and_fingerprint_type>(x - Bucket::DIST_INC);
}

template <typename K>
[[nodiscard]] constexpr auto mixed_hash(K const& key) const -> uint64_t {
if constexpr (is_detected_v<detect_avalanching, Hash>) {
# if SIZE_MAX == UINT32_MAX
// On 32bit systems we still want 64bit hashes
return m_hash(key) * UINT64_C(0x9ddfea08eb382d69);
# else
return m_hash(key);
# endif
} else {
return wyhash::hash(m_hash(key));
}
Expand All @@ -430,8 +446,8 @@ class table {
return Bucket::DIST_INC | (static_cast<dist_and_fingerprint_type>(hash) & Bucket::FINGERPRINT_MASK);
}

[[nodiscard]] constexpr auto bucket_idx_from_hash(uint64_t hash) const -> size_t {
return static_cast<size_t>(hash >> m_shifts);
[[nodiscard]] constexpr auto bucket_idx_from_hash(uint64_t hash) const -> value_idx_type {
return static_cast<value_idx_type>(hash >> m_shifts);
}

[[nodiscard]] static constexpr auto get_key(value_type const& vt) -> key_type const& {
Expand All @@ -443,29 +459,29 @@ class table {
}

template <typename K>
[[nodiscard]] auto next_while_less(K const& key) const -> std::pair<dist_and_fingerprint_type, size_t> {
[[nodiscard]] auto next_while_less(K const& key) const -> Bucket {
auto hash = mixed_hash(key);
auto dist_and_fingerprint = dist_and_fingerprint_from_hash(hash);
auto bucket_idx = bucket_idx_from_hash(hash);

while (dist_and_fingerprint < at(m_buckets, bucket_idx).dist_and_fingerprint) {
dist_and_fingerprint += Bucket::DIST_INC;
dist_and_fingerprint = dist_inc(dist_and_fingerprint);
bucket_idx = next(bucket_idx);
}
return {dist_and_fingerprint, bucket_idx};
}

void place_and_shift_up(Bucket bucket, size_t place) {
void place_and_shift_up(Bucket bucket, value_idx_type place) {
while (0 != at(m_buckets, place).dist_and_fingerprint) {
bucket = std::exchange(at(m_buckets, place), bucket);
bucket.dist_and_fingerprint += Bucket::DIST_INC;
bucket.dist_and_fingerprint = dist_inc(bucket.dist_and_fingerprint);
place = next(place);
}
at(m_buckets, place) = bucket;
}

[[nodiscard]] static constexpr auto calc_num_buckets(uint8_t shifts) -> size_t {
return size_t{1} << (64U - shifts);
return std::min(max_bucket_count(), size_t{1} << (64U - shifts));
}

[[nodiscard]] constexpr auto calc_shifts_for_size(size_t s) const -> uint8_t {
Expand Down Expand Up @@ -504,7 +520,12 @@ class table {
auto bucket_alloc = BucketAlloc(m_values.get_allocator());
m_num_buckets = calc_num_buckets(m_shifts);
m_buckets = BucketAllocTraits::allocate(bucket_alloc, m_num_buckets);
m_max_bucket_capacity = static_cast<value_idx_type>(static_cast<float>(m_num_buckets) * max_load_factor());
if (m_num_buckets == max_bucket_count()) {
// reached the maximum, make sure we can use each bucket
m_max_bucket_capacity = max_bucket_count();
} else {
m_max_bucket_capacity = static_cast<value_idx_type>(static_cast<float>(m_num_buckets) * max_load_factor());
}
}

void clear_buckets() {
Expand All @@ -526,19 +547,22 @@ class table {
}

void increase_size() {
if (ANKERL_UNORDERED_DENSE_UNLIKELY(m_max_bucket_capacity == max_bucket_count())) {
throw std::overflow_error("ankerl::unordered_dense: reached max bucket size, cannot increase size");
}
--m_shifts;
deallocate_buckets();
allocate_buckets_from_shift();
clear_and_fill_buckets_from_values();
}

void do_erase(size_t bucket_idx) {
void do_erase(value_idx_type bucket_idx) {
auto const value_idx_to_remove = at(m_buckets, bucket_idx).value_idx;

// shift down until either empty or an element with correct spot is found
auto next_bucket_idx = next(bucket_idx);
while (at(m_buckets, next_bucket_idx).dist_and_fingerprint >= Bucket::DIST_INC * 2) {
at(m_buckets, bucket_idx) = {at(m_buckets, next_bucket_idx).dist_and_fingerprint - Bucket::DIST_INC,
at(m_buckets, bucket_idx) = {dist_dec(at(m_buckets, next_bucket_idx).dist_and_fingerprint),
at(m_buckets, next_bucket_idx).value_idx};
bucket_idx = std::exchange(next_bucket_idx, next(next_bucket_idx));
}
Expand Down Expand Up @@ -573,7 +597,7 @@ class table {

while (dist_and_fingerprint == at(m_buckets, bucket_idx).dist_and_fingerprint &&
!m_equal(key, get_key(m_values[at(m_buckets, bucket_idx).value_idx]))) {
dist_and_fingerprint += Bucket::DIST_INC;
dist_and_fingerprint = dist_inc(dist_and_fingerprint);
bucket_idx = next(bucket_idx);
}

Expand All @@ -594,7 +618,7 @@ class table {
}

template <typename K, typename... Args>
auto do_place_element(dist_and_fingerprint_type dist_and_fingerprint, size_t bucket_idx, K&& key, Args&&... args)
auto do_place_element(dist_and_fingerprint_type dist_and_fingerprint, value_idx_type bucket_idx, K&& key, Args&&... args)
-> std::pair<iterator, bool> {

// emplace the new value. If that throws an exception, no harm done; index is still in a valid state
Expand All @@ -603,7 +627,7 @@ class table {
std::forward_as_tuple(std::forward<Args>(args)...));

// place element and shift up until we find an empty spot
auto value_idx = static_cast<value_idx_type>(m_values.size()) - 1;
auto value_idx = static_cast<value_idx_type>(m_values.size() - 1);
place_and_shift_up({dist_and_fingerprint, value_idx}, bucket_idx);
return {begin() + static_cast<difference_type>(value_idx), true};
}
Expand All @@ -627,7 +651,7 @@ class table {
} else if (dist_and_fingerprint > bucket->dist_and_fingerprint) {
return do_place_element(dist_and_fingerprint, bucket_idx, std::forward<K>(key), std::forward<Args>(args)...);
}
dist_and_fingerprint += Bucket::DIST_INC;
dist_and_fingerprint = dist_inc(dist_and_fingerprint);
bucket_idx = next(bucket_idx);
}
}
Expand All @@ -647,14 +671,14 @@ class table {
if (dist_and_fingerprint == bucket->dist_and_fingerprint && m_equal(key, get_key(m_values[bucket->value_idx]))) {
return begin() + static_cast<difference_type>(bucket->value_idx);
}
dist_and_fingerprint += Bucket::DIST_INC;
dist_and_fingerprint = dist_inc(dist_and_fingerprint);
bucket_idx = next(bucket_idx);
bucket = &at(m_buckets, bucket_idx);

if (dist_and_fingerprint == bucket->dist_and_fingerprint && m_equal(key, get_key(m_values[bucket->value_idx]))) {
return begin() + static_cast<difference_type>(bucket->value_idx);
}
dist_and_fingerprint += Bucket::DIST_INC;
dist_and_fingerprint = dist_inc(dist_and_fingerprint);
bucket_idx = next(bucket_idx);
bucket = &at(m_buckets, bucket_idx);

Expand All @@ -666,7 +690,7 @@ class table {
} else if (dist_and_fingerprint > bucket->dist_and_fingerprint) {
return end();
}
dist_and_fingerprint += Bucket::DIST_INC;
dist_and_fingerprint = dist_inc(dist_and_fingerprint);
bucket_idx = next(bucket_idx);
bucket = &at(m_buckets, bucket_idx);
}
Expand Down Expand Up @@ -845,7 +869,11 @@ class table {
}

[[nodiscard]] static constexpr auto max_size() noexcept -> size_t {
return std::numeric_limits<value_idx_type>::max();
if constexpr (std::numeric_limits<value_idx_type>::max() == std::numeric_limits<size_t>::max()) {
return size_t{1} << (sizeof(value_idx_type) * 8 - 1);
} else {
return size_t{1} << (sizeof(value_idx_type) * 8);
}
}

// modifiers //////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -937,12 +965,12 @@ class table {
m_values.pop_back(); // value was already there, so get rid of it
return {begin() + static_cast<difference_type>(at(m_buckets, bucket_idx).value_idx), false};
}
dist_and_fingerprint += Bucket::DIST_INC;
dist_and_fingerprint = dist_inc(dist_and_fingerprint);
bucket_idx = next(bucket_idx);
}

// value is new, place the bucket and shift up until we find an empty spot
value_idx_type value_idx = static_cast<value_idx_type>(m_values.size()) - 1;
auto value_idx = static_cast<value_idx_type>(m_values.size() - 1);
place_and_shift_up({dist_and_fingerprint, value_idx}, bucket_idx);

return {begin() + static_cast<difference_type>(value_idx), true};
Expand Down Expand Up @@ -1119,7 +1147,7 @@ class table {
}

static constexpr auto max_bucket_count() noexcept -> size_t { // NOLINT(modernize-use-nodiscard)
return std::numeric_limits<value_idx_type>::max();
return max_size();
}

// hash policy ////////////////////////////////////////////////////////////
Expand All @@ -1134,10 +1162,13 @@ class table {

void max_load_factor(float ml) {
m_max_load_factor = ml;
m_max_bucket_capacity = static_cast<value_idx_type>(static_cast<float>(bucket_count()) * max_load_factor());
if (m_num_buckets != max_bucket_count()) {
m_max_bucket_capacity = static_cast<value_idx_type>(static_cast<float>(bucket_count()) * max_load_factor());
}
}

void rehash(size_t count) {
count = std::min(count, max_size());
auto shifts = calc_shifts_for_size(std::max(count, size()));
if (shifts != m_shifts) {
m_shifts = shifts;
Expand All @@ -1149,6 +1180,7 @@ class table {
}

void reserve(size_t capa) {
capa = std::min(capa, max_size());
m_values.reserve(capa);
auto shifts = calc_shifts_for_size(std::max(capa, size()));
if (shifts < m_shifts) {
Expand Down
41 changes: 41 additions & 0 deletions test/app/stacktrace.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#if __GNUC__

# include <fmt/format.h>

# include <array>
# include <cstdio>
# include <cstdlib>
# include <execinfo.h>
# include <signal.h>
# include <unistd.h>

namespace {

void handler(int sig) {
fmt::print(stderr, "Error: signal {}:\n", sig);
auto ary = std::array<void*, 50>();

// get void*'s for all entries on the stack
auto size = backtrace(ary.data(), static_cast<int>(ary.size()));

// print out all the frames to stderr
fmt::print(stderr, "Error: signal {}. See stacktrace with\n", sig);
fmt::print(stderr, "addr2line -Cafpie ./test/udm");
for (size_t i = 0; i < static_cast<size_t>(size); ++i) {
fmt::print(stderr, " {}", ary[i]);
}
exit(1); // NOLINT(concurrency-mt-unsafe)
}

class Handler {
public:
Handler() {
(void)signal(SIGTERM, handler);
}
};

auto const h = Handler();

} // namespace

#endif
1 change: 1 addition & 0 deletions test/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ test_sources = [
'app/Counter.cpp',
'app/doctest.cpp',
'app/nanobench.cpp',
'app/stacktrace.cpp',
'app/ui/Periodic.cpp',
'app/ui/ProgressBar.cpp',
'app/unordered_dense.cpp',
Expand Down
58 changes: 50 additions & 8 deletions test/unit/bucket.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
#include <ankerl/unordered_dense.h>

#include <app/Counter.h>

#include <doctest.h>
#include <fmt/format.h>

#include <limits>
#include <stdexcept> // for out_of_range

using Map = ankerl::unordered_dense::map<std::string, size_t>;
using MapDefault = ankerl::unordered_dense::map<std::string, size_t>;

// big bucket type allows 2^64 elements, but has more memory & CPU overhead.
using MapBig = ankerl::unordered_dense::map<std::string,
Expand All @@ -15,15 +18,54 @@ using MapBig = ankerl::unordered_dense::map<std::string,
std::allocator<std::pair<std::string, size_t>>,
ankerl::unordered_dense::bucket_type::big>;

static_assert(sizeof(Map::bucket_type) == 8U);
static_assert(sizeof(MapDefault::bucket_type) == 8U);
static_assert(sizeof(MapBig::bucket_type) == sizeof(size_t) + 4U);
static_assert(MapDefault::max_size() == MapDefault::max_bucket_count());

#if SIZE_MAX == UINT32_MAX
static_assert(MapDefault::max_size() == uint64_t{1} << 31U);
static_assert(MapBig::max_size() == uint64_t{1} << 31U);
#else
static_assert(MapDefault::max_size() == uint64_t{1} << 32U);
static_assert(MapBig::max_size() == uint64_t{1} << 63U);
#endif

struct bucket_micro {
static constexpr uint8_t DIST_INC = 1U << 1U; // 1 bits for fingerprint
static constexpr uint8_t FINGERPRINT_MASK = DIST_INC - 1; // 11 bit = 2048 positions for distance

uint8_t dist_and_fingerprint;
uint8_t value_idx;
};

TEST_CASE("bucket_micro") {
using Map = ankerl::unordered_dense::map<Counter::Obj,
Counter::Obj,
ankerl::unordered_dense::hash<Counter::Obj>,
std::equal_to<Counter::Obj>,
std::allocator<std::pair<Counter::Obj, Counter::Obj>>,
bucket_micro>;

Counter counts;
INFO(counts);

static_assert(Map::max_size() == std::numeric_limits<uint32_t>::max());
static_assert(MapBig::max_size() == std::numeric_limits<size_t>::max());
auto map = Map();
for (size_t i = 0; i < Map::max_size(); ++i) {
auto const r = map.try_emplace({i, counts}, i, counts);
REQUIRE(r.second);

static_assert(Map::max_bucket_count() == std::numeric_limits<uint32_t>::max());
static_assert(MapBig::max_bucket_count() == std::numeric_limits<size_t>::max());
auto it = map.find({0, counts});
REQUIRE(it != map.end());
}
REQUIRE_THROWS_AS(map.try_emplace({Map::max_size(), counts}, Map::max_size(), counts), std::overflow_error);

TEST_CASE("bucket") {
// TODO nothing here yet
// check that all elements are there
REQUIRE(map.size() == Map::max_size());
for (size_t i = 0; i < Map::max_size(); ++i) {
INFO(i);
auto it = map.find({i, counts});
REQUIRE(it != map.end());
REQUIRE(it->first.get() == i);
REQUIRE(it->second.get() == i);
}
}
10 changes: 0 additions & 10 deletions test/unit/max.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,6 @@
#include <functional> // for equal_to
#include <limits> // for numeric_limits

TEST_CASE("max_size") {
auto const map = ankerl::unordered_dense::map<int, int>();
REQUIRE(map.max_size() == std::numeric_limits<decltype(decltype(map)::bucket_type::value_idx)>::max());
}

TEST_CASE("max_bucket_count") {
auto const map = ankerl::unordered_dense::map<int, int>();
REQUIRE(map.max_bucket_count() == std::numeric_limits<decltype(decltype(map)::bucket_type::value_idx)>::max());
}

TEST_CASE("max_load_factor") {
auto map_60 = ankerl::unordered_dense::map<int, int>();
auto map_90 = ankerl::unordered_dense::map<int, int>();
Expand Down

0 comments on commit f4850bd

Please sign in to comment.