Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Plenty of fixes when capacity is reached #21

Merged
merged 1 commit into from
Aug 15, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 57 additions & 25 deletions include/ankerl/unordered_dense.h
Original file line number Diff line number Diff line change
Expand Up @@ -401,14 +401,16 @@ class table {
value_container_type m_values{}; // Contains all the key-value pairs in one densely stored container. No holes.
typename std::allocator_traits<BucketAlloc>::pointer m_buckets{};
size_t m_num_buckets = 0;
value_idx_type m_max_bucket_capacity = 0;
size_t m_max_bucket_capacity = 0;
float m_max_load_factor = DEFAULT_MAX_LOAD_FACTOR;
Hash m_hash{};
KeyEqual m_equal{};
uint8_t m_shifts = INITIAL_SHIFTS;

[[nodiscard]] auto next(size_t bucket_idx) const -> size_t {
return ANKERL_UNORDERED_DENSE_UNLIKELY(bucket_idx + 1 == m_num_buckets) ? 0 : bucket_idx + 1;
[[nodiscard]] auto next(value_idx_type bucket_idx) const -> value_idx_type {
return ANKERL_UNORDERED_DENSE_UNLIKELY(bucket_idx + 1U == m_num_buckets)
? 0
: static_cast<value_idx_type>(bucket_idx + 1U);
}

// Helper to access bucket through pointer types
Expand All @@ -417,10 +419,24 @@ class table {
return *(bucket_ptr + static_cast<typename std::allocator_traits<BucketAlloc>::difference_type>(offset));
}

// use the dist_inc and dist_dec functions so that uint16_t types work without warning
[[nodiscard]] static constexpr auto dist_inc(dist_and_fingerprint_type x) -> dist_and_fingerprint_type {
return static_cast<dist_and_fingerprint_type>(x + Bucket::DIST_INC);
}

[[nodiscard]] static constexpr auto dist_dec(dist_and_fingerprint_type x) -> dist_and_fingerprint_type {
return static_cast<dist_and_fingerprint_type>(x - Bucket::DIST_INC);
}

template <typename K>
[[nodiscard]] constexpr auto mixed_hash(K const& key) const -> uint64_t {
if constexpr (is_detected_v<detect_avalanching, Hash>) {
# if SIZE_MAX == UINT32_MAX
// On 32bit systems we still want 64bit hashes
return m_hash(key) * UINT64_C(0x9ddfea08eb382d69);
# else
return m_hash(key);
# endif
} else {
return wyhash::hash(m_hash(key));
}
Expand All @@ -430,8 +446,8 @@ class table {
return Bucket::DIST_INC | (static_cast<dist_and_fingerprint_type>(hash) & Bucket::FINGERPRINT_MASK);
}

[[nodiscard]] constexpr auto bucket_idx_from_hash(uint64_t hash) const -> size_t {
return static_cast<size_t>(hash >> m_shifts);
[[nodiscard]] constexpr auto bucket_idx_from_hash(uint64_t hash) const -> value_idx_type {
return static_cast<value_idx_type>(hash >> m_shifts);
}

[[nodiscard]] static constexpr auto get_key(value_type const& vt) -> key_type const& {
Expand All @@ -443,29 +459,29 @@ class table {
}

template <typename K>
[[nodiscard]] auto next_while_less(K const& key) const -> std::pair<dist_and_fingerprint_type, size_t> {
[[nodiscard]] auto next_while_less(K const& key) const -> Bucket {
auto hash = mixed_hash(key);
auto dist_and_fingerprint = dist_and_fingerprint_from_hash(hash);
auto bucket_idx = bucket_idx_from_hash(hash);

while (dist_and_fingerprint < at(m_buckets, bucket_idx).dist_and_fingerprint) {
dist_and_fingerprint += Bucket::DIST_INC;
dist_and_fingerprint = dist_inc(dist_and_fingerprint);
bucket_idx = next(bucket_idx);
}
return {dist_and_fingerprint, bucket_idx};
}

void place_and_shift_up(Bucket bucket, size_t place) {
void place_and_shift_up(Bucket bucket, value_idx_type place) {
while (0 != at(m_buckets, place).dist_and_fingerprint) {
bucket = std::exchange(at(m_buckets, place), bucket);
bucket.dist_and_fingerprint += Bucket::DIST_INC;
bucket.dist_and_fingerprint = dist_inc(bucket.dist_and_fingerprint);
place = next(place);
}
at(m_buckets, place) = bucket;
}

[[nodiscard]] static constexpr auto calc_num_buckets(uint8_t shifts) -> size_t {
return size_t{1} << (64U - shifts);
return std::min(max_bucket_count(), size_t{1} << (64U - shifts));
}

[[nodiscard]] constexpr auto calc_shifts_for_size(size_t s) const -> uint8_t {
Expand Down Expand Up @@ -504,7 +520,12 @@ class table {
auto bucket_alloc = BucketAlloc(m_values.get_allocator());
m_num_buckets = calc_num_buckets(m_shifts);
m_buckets = BucketAllocTraits::allocate(bucket_alloc, m_num_buckets);
m_max_bucket_capacity = static_cast<value_idx_type>(static_cast<float>(m_num_buckets) * max_load_factor());
if (m_num_buckets == max_bucket_count()) {
// reached the maximum, make sure we can use each bucket
m_max_bucket_capacity = max_bucket_count();
} else {
m_max_bucket_capacity = static_cast<value_idx_type>(static_cast<float>(m_num_buckets) * max_load_factor());
}
}

void clear_buckets() {
Expand All @@ -526,19 +547,22 @@ class table {
}

void increase_size() {
if (ANKERL_UNORDERED_DENSE_UNLIKELY(m_max_bucket_capacity == max_bucket_count())) {
throw std::overflow_error("ankerl::unordered_dense: reached max bucket size, cannot increase size");
}
--m_shifts;
deallocate_buckets();
allocate_buckets_from_shift();
clear_and_fill_buckets_from_values();
}

void do_erase(size_t bucket_idx) {
void do_erase(value_idx_type bucket_idx) {
auto const value_idx_to_remove = at(m_buckets, bucket_idx).value_idx;

// shift down until either empty or an element with correct spot is found
auto next_bucket_idx = next(bucket_idx);
while (at(m_buckets, next_bucket_idx).dist_and_fingerprint >= Bucket::DIST_INC * 2) {
at(m_buckets, bucket_idx) = {at(m_buckets, next_bucket_idx).dist_and_fingerprint - Bucket::DIST_INC,
at(m_buckets, bucket_idx) = {dist_dec(at(m_buckets, next_bucket_idx).dist_and_fingerprint),
at(m_buckets, next_bucket_idx).value_idx};
bucket_idx = std::exchange(next_bucket_idx, next(next_bucket_idx));
}
Expand Down Expand Up @@ -573,7 +597,7 @@ class table {

while (dist_and_fingerprint == at(m_buckets, bucket_idx).dist_and_fingerprint &&
!m_equal(key, get_key(m_values[at(m_buckets, bucket_idx).value_idx]))) {
dist_and_fingerprint += Bucket::DIST_INC;
dist_and_fingerprint = dist_inc(dist_and_fingerprint);
bucket_idx = next(bucket_idx);
}

Expand All @@ -594,7 +618,7 @@ class table {
}

template <typename K, typename... Args>
auto do_place_element(dist_and_fingerprint_type dist_and_fingerprint, size_t bucket_idx, K&& key, Args&&... args)
auto do_place_element(dist_and_fingerprint_type dist_and_fingerprint, value_idx_type bucket_idx, K&& key, Args&&... args)
-> std::pair<iterator, bool> {

// emplace the new value. If that throws an exception, no harm done; index is still in a valid state
Expand All @@ -603,7 +627,7 @@ class table {
std::forward_as_tuple(std::forward<Args>(args)...));

// place element and shift up until we find an empty spot
auto value_idx = static_cast<value_idx_type>(m_values.size()) - 1;
auto value_idx = static_cast<value_idx_type>(m_values.size() - 1);
place_and_shift_up({dist_and_fingerprint, value_idx}, bucket_idx);
return {begin() + static_cast<difference_type>(value_idx), true};
}
Expand All @@ -627,7 +651,7 @@ class table {
} else if (dist_and_fingerprint > bucket->dist_and_fingerprint) {
return do_place_element(dist_and_fingerprint, bucket_idx, std::forward<K>(key), std::forward<Args>(args)...);
}
dist_and_fingerprint += Bucket::DIST_INC;
dist_and_fingerprint = dist_inc(dist_and_fingerprint);
bucket_idx = next(bucket_idx);
}
}
Expand All @@ -647,14 +671,14 @@ class table {
if (dist_and_fingerprint == bucket->dist_and_fingerprint && m_equal(key, get_key(m_values[bucket->value_idx]))) {
return begin() + static_cast<difference_type>(bucket->value_idx);
}
dist_and_fingerprint += Bucket::DIST_INC;
dist_and_fingerprint = dist_inc(dist_and_fingerprint);
bucket_idx = next(bucket_idx);
bucket = &at(m_buckets, bucket_idx);

if (dist_and_fingerprint == bucket->dist_and_fingerprint && m_equal(key, get_key(m_values[bucket->value_idx]))) {
return begin() + static_cast<difference_type>(bucket->value_idx);
}
dist_and_fingerprint += Bucket::DIST_INC;
dist_and_fingerprint = dist_inc(dist_and_fingerprint);
bucket_idx = next(bucket_idx);
bucket = &at(m_buckets, bucket_idx);

Expand All @@ -666,7 +690,7 @@ class table {
} else if (dist_and_fingerprint > bucket->dist_and_fingerprint) {
return end();
}
dist_and_fingerprint += Bucket::DIST_INC;
dist_and_fingerprint = dist_inc(dist_and_fingerprint);
bucket_idx = next(bucket_idx);
bucket = &at(m_buckets, bucket_idx);
}
Expand Down Expand Up @@ -845,7 +869,11 @@ class table {
}

[[nodiscard]] static constexpr auto max_size() noexcept -> size_t {
return std::numeric_limits<value_idx_type>::max();
if constexpr (std::numeric_limits<value_idx_type>::max() == std::numeric_limits<size_t>::max()) {
return size_t{1} << (sizeof(value_idx_type) * 8 - 1);
} else {
return size_t{1} << (sizeof(value_idx_type) * 8);
}
}

// modifiers //////////////////////////////////////////////////////////////
Expand Down Expand Up @@ -937,12 +965,12 @@ class table {
m_values.pop_back(); // value was already there, so get rid of it
return {begin() + static_cast<difference_type>(at(m_buckets, bucket_idx).value_idx), false};
}
dist_and_fingerprint += Bucket::DIST_INC;
dist_and_fingerprint = dist_inc(dist_and_fingerprint);
bucket_idx = next(bucket_idx);
}

// value is new, place the bucket and shift up until we find an empty spot
value_idx_type value_idx = static_cast<value_idx_type>(m_values.size()) - 1;
auto value_idx = static_cast<value_idx_type>(m_values.size() - 1);
place_and_shift_up({dist_and_fingerprint, value_idx}, bucket_idx);

return {begin() + static_cast<difference_type>(value_idx), true};
Expand Down Expand Up @@ -1119,7 +1147,7 @@ class table {
}

static constexpr auto max_bucket_count() noexcept -> size_t { // NOLINT(modernize-use-nodiscard)
return std::numeric_limits<value_idx_type>::max();
return max_size();
}

// hash policy ////////////////////////////////////////////////////////////
Expand All @@ -1134,10 +1162,13 @@ class table {

void max_load_factor(float ml) {
m_max_load_factor = ml;
m_max_bucket_capacity = static_cast<value_idx_type>(static_cast<float>(bucket_count()) * max_load_factor());
if (m_num_buckets != max_bucket_count()) {
m_max_bucket_capacity = static_cast<value_idx_type>(static_cast<float>(bucket_count()) * max_load_factor());
}
}

void rehash(size_t count) {
count = std::min(count, max_size());
auto shifts = calc_shifts_for_size(std::max(count, size()));
if (shifts != m_shifts) {
m_shifts = shifts;
Expand All @@ -1149,6 +1180,7 @@ class table {
}

void reserve(size_t capa) {
capa = std::min(capa, max_size());
m_values.reserve(capa);
auto shifts = calc_shifts_for_size(std::max(capa, size()));
if (shifts < m_shifts) {
Expand Down
41 changes: 41 additions & 0 deletions test/app/stacktrace.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#if __GNUC__

# include <fmt/format.h>

# include <array>
# include <cstdio>
# include <cstdlib>
# include <execinfo.h>
# include <signal.h>
# include <unistd.h>

namespace {

void handler(int sig) {
fmt::print(stderr, "Error: signal {}:\n", sig);
auto ary = std::array<void*, 50>();

// get void*'s for all entries on the stack
auto size = backtrace(ary.data(), static_cast<int>(ary.size()));

// print out all the frames to stderr
fmt::print(stderr, "Error: signal {}. See stacktrace with\n", sig);
fmt::print(stderr, "addr2line -Cafpie ./test/udm");
for (size_t i = 0; i < static_cast<size_t>(size); ++i) {
fmt::print(stderr, " {}", ary[i]);
}
exit(1); // NOLINT(concurrency-mt-unsafe)
}

class Handler {
public:
Handler() {
(void)signal(SIGTERM, handler);
}
};

auto const h = Handler();

} // namespace

#endif
1 change: 1 addition & 0 deletions test/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ test_sources = [
'app/Counter.cpp',
'app/doctest.cpp',
'app/nanobench.cpp',
'app/stacktrace.cpp',
'app/ui/Periodic.cpp',
'app/ui/ProgressBar.cpp',
'app/unordered_dense.cpp',
Expand Down
58 changes: 50 additions & 8 deletions test/unit/bucket.cpp
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
#include <ankerl/unordered_dense.h>

#include <app/Counter.h>

#include <doctest.h>
#include <fmt/format.h>

#include <limits>
#include <stdexcept> // for out_of_range

using Map = ankerl::unordered_dense::map<std::string, size_t>;
using MapDefault = ankerl::unordered_dense::map<std::string, size_t>;

// big bucket type allows 2^64 elements, but has more memory & CPU overhead.
using MapBig = ankerl::unordered_dense::map<std::string,
Expand All @@ -15,15 +18,54 @@ using MapBig = ankerl::unordered_dense::map<std::string,
std::allocator<std::pair<std::string, size_t>>,
ankerl::unordered_dense::bucket_type::big>;

static_assert(sizeof(Map::bucket_type) == 8U);
static_assert(sizeof(MapDefault::bucket_type) == 8U);
static_assert(sizeof(MapBig::bucket_type) == sizeof(size_t) + 4U);
static_assert(MapDefault::max_size() == MapDefault::max_bucket_count());

#if SIZE_MAX == UINT32_MAX
static_assert(MapDefault::max_size() == uint64_t{1} << 31U);
static_assert(MapBig::max_size() == uint64_t{1} << 31U);
#else
static_assert(MapDefault::max_size() == uint64_t{1} << 32U);
static_assert(MapBig::max_size() == uint64_t{1} << 63U);
#endif

struct bucket_micro {
static constexpr uint8_t DIST_INC = 1U << 1U; // 1 bits for fingerprint
static constexpr uint8_t FINGERPRINT_MASK = DIST_INC - 1; // 11 bit = 2048 positions for distance

uint8_t dist_and_fingerprint;
uint8_t value_idx;
};

TEST_CASE("bucket_micro") {
using Map = ankerl::unordered_dense::map<Counter::Obj,
Counter::Obj,
ankerl::unordered_dense::hash<Counter::Obj>,
std::equal_to<Counter::Obj>,
std::allocator<std::pair<Counter::Obj, Counter::Obj>>,
bucket_micro>;

Counter counts;
INFO(counts);

static_assert(Map::max_size() == std::numeric_limits<uint32_t>::max());
static_assert(MapBig::max_size() == std::numeric_limits<size_t>::max());
auto map = Map();
for (size_t i = 0; i < Map::max_size(); ++i) {
auto const r = map.try_emplace({i, counts}, i, counts);
REQUIRE(r.second);

static_assert(Map::max_bucket_count() == std::numeric_limits<uint32_t>::max());
static_assert(MapBig::max_bucket_count() == std::numeric_limits<size_t>::max());
auto it = map.find({0, counts});
REQUIRE(it != map.end());
}
REQUIRE_THROWS_AS(map.try_emplace({Map::max_size(), counts}, Map::max_size(), counts), std::overflow_error);

TEST_CASE("bucket") {
// TODO nothing here yet
// check that all elements are there
REQUIRE(map.size() == Map::max_size());
for (size_t i = 0; i < Map::max_size(); ++i) {
INFO(i);
auto it = map.find({i, counts});
REQUIRE(it != map.end());
REQUIRE(it->first.get() == i);
REQUIRE(it->second.get() == i);
}
}
10 changes: 0 additions & 10 deletions test/unit/max.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,6 @@
#include <functional> // for equal_to
#include <limits> // for numeric_limits

TEST_CASE("max_size") {
auto const map = ankerl::unordered_dense::map<int, int>();
REQUIRE(map.max_size() == std::numeric_limits<decltype(decltype(map)::bucket_type::value_idx)>::max());
}

TEST_CASE("max_bucket_count") {
auto const map = ankerl::unordered_dense::map<int, int>();
REQUIRE(map.max_bucket_count() == std::numeric_limits<decltype(decltype(map)::bucket_type::value_idx)>::max());
}

TEST_CASE("max_load_factor") {
auto map_60 = ankerl::unordered_dense::map<int, int>();
auto map_90 = ankerl::unordered_dense::map<int, int>();
Expand Down