diff --git a/CMakeLists.txt b/CMakeLists.txt index 1aeb87f9..928b4ef5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.12) project("unordered_dense" - VERSION 1.3.1 + VERSION 1.3.2 DESCRIPTION "A fast & densely stored hashmap and hashset based on robin-hood backward shift deletion" HOMEPAGE_URL "https://github.com/martinus/unordered_dense") diff --git a/README.md b/README.md index 0678d127..119b21c2 100644 --- a/README.md +++ b/README.md @@ -16,14 +16,20 @@ The classes `ankerl::unordered_dense::map` and `ankerl::unordered_dense::set` ar - [2. Installation](#2-installation) - [2.1. Installing using cmake](#21-installing-using-cmake) - [3. Extensions](#3-extensions) - - [3.1. Container API](#31-container-api) - - [3.1.1. `auto extract() && -> value_container_type`](#311-auto-extract----value_container_type) - - [3.1.2. `[[nodiscard]] auto values() const noexcept -> value_container_type const&`](#312-nodiscard-auto-values-const-noexcept---value_container_type-const) - - [3.1.3. `auto replace(value_container_type&& container)`](#313-auto-replacevalue_container_type-container) - - [3.2. Custom Container Types](#32-custom-container-types) - - [3.3. Custom Bucket Tyeps](#33-custom-bucket-tyeps) - - [3.3.1. `ankerl::unordered_dense::bucket_type::standard`](#331-ankerlunordered_densebucket_typestandard) - - [3.3.2. `ankerl::unordered_dense::bucket_type::big`](#332-ankerlunordered_densebucket_typebig) + - [3.1. Hash](#31-hash) + - [A Simple Hash](#a-simple-hash) + - [A High Quality Hash](#a-high-quality-hash) + - [Specialize `ankerl::unordered_dense::hash`](#specialize-ankerlunordered_densehash) + - [Automatic Fallback to `std::hash`](#automatic-fallback-to-stdhash) + - [Hash the Whole Memory](#hash-the-whole-memory) + - [3.2. Container API](#32-container-api) + - [3.2.1. `auto extract() && -> value_container_type`](#321-auto-extract----value_container_type) + - [3.2.2. `[[nodiscard]] auto values() const noexcept -> value_container_type const&`](#322-nodiscard-auto-values-const-noexcept---value_container_type-const) + - [3.2.3. `auto replace(value_container_type&& container)`](#323-auto-replacevalue_container_type-container) + - [3.3. Custom Container Types](#33-custom-container-types) + - [3.4. Custom Bucket Tyeps](#34-custom-bucket-tyeps) + - [3.4.1. `ankerl::unordered_dense::bucket_type::standard`](#341-ankerlunordered_densebucket_typestandard) + - [3.4.2. `ankerl::unordered_dense::bucket_type::big`](#342-ankerlunordered_densebucket_typebig) - [4. Design](#4-design) - [4.1. Inserts](#41-inserts) - [4.2. Lookups](#42-lookups) @@ -79,37 +85,145 @@ target_link_libraries(your_project_name unordered_dense::unordered_dense) ## 3. Extensions -### 3.1. Container API +### 3.1. Hash + +`ankerl::unordered_dense::hash` is a fast and high quality hash, based on [wyhash](https://github.com/wangyi-fudan/wyhash). The `ankerl::unordered_dense` map/set differentiates between hashes of high quality (good [avalanching effect](https://en.wikipedia.org/wiki/Avalanche_effect)) and bad quality. Hashes with good quality contain a special marker: + +```cpp +using is_avalanching = void; +``` + +This is the cases for the specializations `bool`, `char`, `signed char`, `unsigned char`, `char8_t`, `char16_t`, `char32_t`, `wchar_t`, `short`, `unsigned short`, `int`, `unsigned int`, `long`, `long long`, `unsigned long`, `unsigned long long`, `T*`, `std::unique_ptr`, `std::shared_ptr`, `enum`, `std::basic_string`, and `std::basic_string_view`. + +Hashes that do not contain such a marker are assumed to be of bad quality and receive an additional mixing step inside the map/set implementation. + +#### A Simple Hash + +Consider a simple custom key type: + +```cpp +struct id { + uint64_t value{}; + + auto operator==(id const& other) const -> bool { + return value == other.value; + } +}; +``` + +The simplest implementation of a hash is this: + +```cpp +struct custom_hash_simple { + auto operator()(id const& x) const noexcept -> uint64_t { + return x.value; + } +}; +``` +This can be used e.g. with + +```cpp +auto ids = ankerl::unordered_dense::set(); +``` + +Since `custom_hash_simple` doesn't have a `using is_avalanching = void;` marker it is considered to be of bad quality and additional mixing of `x.value` is automatically provided inside the set. + +#### A High Quality Hash + +Back to the `id` example, we can easily implement a higher quality hash: + +```cpp +struct custom_hash_avalanching { + using is_avalanching = void; + + auto operator()(id const& x) const noexcept -> uint64_t { + return ankerl::unordered_dense::detail::wyhash::hash(x.value); + } +}; +``` + +We know `wyhash::hash` is of high quality, so we can add `using is_avalanching = void;` which makes the map/set directly use the returned value. + + +#### Specialize `ankerl::unordered_dense::hash` + +Instead of creating a new class you can also specialize `ankerl::unordered_dense::hash`: + +```cpp +template <> +struct ankerl::unordered_dense::hash { + using is_avalanching = void; + + [[nodiscard]] auto operator()(id const& x) const noexcept -> uint64_t { + return detail::wyhash::hash(x.value); + } +}; +``` + +#### Automatic Fallback to `std::hash` + +When an implementation for `std::hash` of a custom type is available, this is automatically used and assumed to be of bad quality (thus `std::hash` is used, but an additional mixing step is performed). + + +#### Hash the Whole Memory + +When the type [has a unique object representation](https://en.cppreference.com/w/cpp/types/has_unique_object_representations) (no padding, trivially copyable), one can just hash the object's memory. Consider a simple class + +```cpp +struct point { + int x{}; + int y{}; + + auto operator==(point const& other) const -> bool { + return x == other.x && y == other.y; + } +}; +``` + +A fast and high quality hash can be easily provided like so: + +```cpp +struct custom_hash_unique_object_representation { + using is_avalanching = void; + + [[nodiscard]] auto operator()(point const& f) const noexcept -> uint64_t { + static_assert(std::has_unique_object_representations_v); + return ankerl::unordered_dense::detail::wyhash::hash(&f, sizeof(f)); + } +}; +``` + +### 3.2. Container API In addition to the standard `std::unordered_map` API (see https://en.cppreference.com/w/cpp/container/unordered_map) we have additional API leveraging the fact that we're using a random access container internally: -#### 3.1.1. `auto extract() && -> value_container_type` +#### 3.2.1. `auto extract() && -> value_container_type` Extracts the internally used container. `*this` is emptied. -#### 3.1.2. `[[nodiscard]] auto values() const noexcept -> value_container_type const&` +#### 3.2.2. `[[nodiscard]] auto values() const noexcept -> value_container_type const&` Exposes the underlying values container. -#### 3.1.3. `auto replace(value_container_type&& container)` +#### 3.2.3. `auto replace(value_container_type&& container)` Discards the internally held container and replaces it with the one passed. Non-unique elements are removed, and the container will be partly reordered when non-unique elements are found. -### 3.2. Custom Container Types +### 3.3. Custom Container Types `unordered_dense` accepts a custom allocator, but you can also specify a custom container for that template argument. That way it is possible to replace the internally used `std::vector` with e.g. `std::deque` or any other container like `boost::interprocess::vector`. This supports fancy pointers (e.g. [offset_ptr](https://www.boost.org/doc/libs/1_80_0/doc/html/interprocess/offset_ptr.html)), so the container can be used with e.g. shared memory provided by `boost::interprocess`. -### 3.3. Custom Bucket Tyeps +### 3.4. Custom Bucket Tyeps The map/set supports two different bucket types. The default should be good for pretty much everyone. -#### 3.3.1. `ankerl::unordered_dense::bucket_type::standard` +#### 3.4.1. `ankerl::unordered_dense::bucket_type::standard` * Up to 2^32 = 4.29 billion elements. * 8 bytes overhead per bucket. -#### 3.3.2. `ankerl::unordered_dense::bucket_type::big` +#### 3.4.2. `ankerl::unordered_dense::bucket_type::big` * up to 2^63 = 9223372036854775808 elements. * 12 bytes overhead per bucket. diff --git a/include/ankerl/unordered_dense.h b/include/ankerl/unordered_dense.h index 220c65b8..840c6e1f 100644 --- a/include/ankerl/unordered_dense.h +++ b/include/ankerl/unordered_dense.h @@ -1,7 +1,7 @@ ///////////////////////// ankerl::unordered_dense::{map, set} ///////////////////////// // A fast & densely stored hashmap and hashset based on robin-hood backward shift deletion. -// Version 1.3.1 +// Version 1.3.2 // https://github.com/martinus/unordered_dense // // Licensed under the MIT License . @@ -32,7 +32,7 @@ // see https://semver.org/spec/v2.0.0.html #define ANKERL_UNORDERED_DENSE_VERSION_MAJOR 1 // NOLINT(cppcoreguidelines-macro-usage) incompatible API changes #define ANKERL_UNORDERED_DENSE_VERSION_MINOR 3 // NOLINT(cppcoreguidelines-macro-usage) backwards compatible functionality -#define ANKERL_UNORDERED_DENSE_VERSION_PATCH 1 // NOLINT(cppcoreguidelines-macro-usage) backwards compatible bug fixes +#define ANKERL_UNORDERED_DENSE_VERSION_PATCH 2 // NOLINT(cppcoreguidelines-macro-usage) backwards compatible bug fixes // API versioning with inline namespace, see https://www.foonathan.net/2018/11/inline-namespaces/ #define ANKERL_UNORDERED_DENSE_VERSION_CONCAT1(major, minor, patch) v##major##_##minor##_##patch @@ -214,10 +214,9 @@ static inline void mum(uint64_t* a, uint64_t* b) { template struct hash { - using is_avalanching = void; auto operator()(T const& obj) const noexcept(noexcept(std::declval>().operator()(std::declval()))) -> uint64_t { - return detail::wyhash::hash(std::hash{}(obj)); + return std::hash{}(obj); } }; diff --git a/meson.build b/meson.build index 65ca1daa..eeef2672 100644 --- a/meson.build +++ b/meson.build @@ -18,7 +18,7 @@ # project('unordered_dense', 'cpp', - version: '1.3.1', + version: '1.3.2', license: 'MIT', default_options : ['cpp_std=c++17', 'warning_level=3', 'werror=true']) diff --git a/test/meson.build b/test/meson.build index d9230b58..9528cffb 100644 --- a/test/meson.build +++ b/test/meson.build @@ -28,6 +28,7 @@ test_sources = [ 'unit/ctors.cpp', 'unit/custom_container_boost.cpp', 'unit/custom_container.cpp', + 'unit/custom_hash.cpp', 'unit/deduction_guides.cpp', 'unit/diamond.cpp', 'unit/empty.cpp', diff --git a/test/unit/custom_hash.cpp b/test/unit/custom_hash.cpp new file mode 100644 index 00000000..0d3c7f06 --- /dev/null +++ b/test/unit/custom_hash.cpp @@ -0,0 +1,87 @@ +#include + +#include +#include + +namespace { + +struct id { + uint64_t value{}; + + auto operator==(id const& other) const -> bool { + return value == other.value; + } +}; + +struct custom_hash_simple { + [[nodiscard]] auto operator()(id const& x) const noexcept -> uint64_t { + return x.value; + } +}; + +struct custom_hash_avalanching { + using is_avalanching = void; + + auto operator()(id const& x) const noexcept -> uint64_t { + return ankerl::unordered_dense::detail::wyhash::hash(x.value); + } +}; + +struct point { + int x{}; + int y{}; + + auto operator==(point const& other) const -> bool { + return x == other.x && y == other.y; + } +}; + +struct custom_hash_unique_object_representation { + using is_avalanching = void; + + [[nodiscard]] auto operator()(point const& f) const noexcept -> uint64_t { + static_assert(std::has_unique_object_representations_v); + return ankerl::unordered_dense::detail::wyhash::hash(&f, sizeof(f)); + } +}; + +} // namespace + +template <> +struct ankerl::unordered_dense::hash { + using is_avalanching = void; + + [[nodiscard]] auto operator()(id const& x) const noexcept -> uint64_t { + return detail::wyhash::hash(x.value); + } +}; + +TEST_CASE("custom_hash") { + { + auto set = ankerl::unordered_dense::set(); + set.insert(id{124}); + } + { + auto set = ankerl::unordered_dense::set(); + set.insert(id{124}); + } + { + auto set = ankerl::unordered_dense::set(); + set.insert(point{123, 321}); + } + { + auto set = ankerl::unordered_dense::set(); + set.insert(id{124}); + } +} + +static_assert( + !ankerl::unordered_dense::detail::is_detected_v); + +static_assert(ankerl::unordered_dense::detail::is_detected_v); +static_assert(ankerl::unordered_dense::detail::is_detected_v); + +static_assert(!ankerl::unordered_dense::detail::is_detected_v>); diff --git a/test/unit/namespace.cpp b/test/unit/namespace.cpp index e9f1a158..75122f3e 100644 --- a/test/unit/namespace.cpp +++ b/test/unit/namespace.cpp @@ -2,10 +2,10 @@ #include -static_assert(std::is_same_v, ankerl::unordered_dense::map>); -static_assert(std::is_same_v, ankerl::unordered_dense::hash>); +static_assert(std::is_same_v, ankerl::unordered_dense::map>); +static_assert(std::is_same_v, ankerl::unordered_dense::hash>); TEST_CASE("version_namespace") { - auto map = ankerl::unordered_dense::v1_3_1::map{}; + auto map = ankerl::unordered_dense::v1_3_2::map{}; REQUIRE(map.empty()); } diff --git a/test/unit/std_hash.cpp b/test/unit/std_hash.cpp index d24ee9e8..f9df6fdb 100644 --- a/test/unit/std_hash.cpp +++ b/test/unit/std_hash.cpp @@ -26,6 +26,8 @@ TEST_CASE("std_hash") { auto f = foo{12345}; REQUIRE(std::hash{}(f) == 12346U); // unordered_dense::hash blows that up to 64bit! - REQUIRE(ankerl::unordered_dense::hash{}(f) == UINT64_C(0x3F645BE4CE24110C)); + + // Just wraps std::hash + REQUIRE(ankerl::unordered_dense::hash{}(f) == UINT64_C(12346)); REQUIRE(ankerl::unordered_dense::hash{}(12346U) == UINT64_C(0x3F645BE4CE24110C)); }