From 6819bb720fed4dd829373ceb9c650e015c528131 Mon Sep 17 00:00:00 2001 From: Bright Chen Date: Sun, 23 Jun 2024 23:19:57 +0800 Subject: [PATCH] Fix multi FlatMap scale and size --- src/butil/bit_array.h | 26 +++--- src/butil/containers/flat_map.h | 32 ++++++- src/butil/containers/flat_map_inl.h | 127 +++++++++++++++++++--------- test/flat_map_unittest.cpp | 29 ++++++- 4 files changed, 157 insertions(+), 57 deletions(-) diff --git a/src/butil/bit_array.h b/src/butil/bit_array.h index 54beb39862..3bcc694844 100644 --- a/src/butil/bit_array.h +++ b/src/butil/bit_array.h @@ -28,18 +28,22 @@ namespace butil { +#define BIT_ARRAY_LEN(nbit) (((nbit) + 63 ) / 64 * 8) + // Create an array with at least |nbit| bits. The array is not cleared. -inline uint64_t* bit_array_malloc(size_t nbit) -{ +inline uint64_t* bit_array_malloc(size_t nbit) { if (!nbit) { return NULL; } - return (uint64_t*)malloc((nbit + 63 ) / 64 * 8/*different from /8*/); + return (uint64_t*)malloc(BIT_ARRAY_LEN(nbit)/*different from /8*/); +} + +inline void bit_array_free(uint64_t* array) { + free(array); } // Set bit 0 ~ nbit-1 of |array| to be 0 -inline void bit_array_clear(uint64_t* array, size_t nbit) -{ +inline void bit_array_clear(uint64_t* array, size_t nbit) { const size_t off = (nbit >> 6); memset(array, 0, off * 8); const size_t last = (off << 6); @@ -49,22 +53,19 @@ inline void bit_array_clear(uint64_t* array, size_t nbit) } // Set i-th bit (from left, counting from 0) of |array| to be 1 -inline void bit_array_set(uint64_t* array, size_t i) -{ +inline void bit_array_set(uint64_t* array, size_t i) { const size_t off = (i >> 6); array[off] |= (((uint64_t)1) << (i - (off << 6))); } // Set i-th bit (from left, counting from 0) of |array| to be 0 -inline void bit_array_unset(uint64_t* array, size_t i) -{ +inline void bit_array_unset(uint64_t* array, size_t i) { const size_t off = (i >> 6); array[off] &= ~(((uint64_t)1) << (i - (off << 6))); } // Get i-th bit (from left, counting from 0) of |array| -inline uint64_t bit_array_get(const uint64_t* array, size_t i) -{ +inline uint64_t bit_array_get(const uint64_t* array, size_t i) { const size_t off = (i >> 6); return (array[off] & (((uint64_t)1) << (i - (off << 6)))); } @@ -72,8 +73,7 @@ inline uint64_t bit_array_get(const uint64_t* array, size_t i) // Find index of first 1-bit from bit |begin| to |end| in |array|. // Returns |end| if all bits are 0. // This function is of O(nbit) complexity. -inline size_t bit_array_first1(const uint64_t* array, size_t begin, size_t end) -{ +inline size_t bit_array_first1(const uint64_t* array, size_t begin, size_t end) { size_t off1 = (begin >> 6); const size_t first = (off1 << 6); if (first != begin) { diff --git a/src/butil/containers/flat_map.h b/src/butil/containers/flat_map.h index f0d1d1ca2f..40c44c5988 100644 --- a/src/butil/containers/flat_map.h +++ b/src/butil/containers/flat_map.h @@ -104,6 +104,7 @@ #include "butil/containers/hash_tables.h" // hash<> #include "butil/bit_array.h" // bit_array_* #include "butil/strings/string_piece.h" // StringPiece +#include "butil/memory/scope_guard.h" namespace butil { @@ -290,6 +291,18 @@ class FlatMap { template friend class FlatMapIterator; template friend class SparseFlatMapIterator; + struct NewBucketsInfo { + NewBucketsInfo() + : buckets(NULL), thumbnail(NULL), nbucket(0) {} + NewBucketsInfo(Bucket* b, uint64_t* t, size_t n) + : buckets(b), thumbnail(t), nbucket(n) {} + Bucket* buckets; + uint64_t* thumbnail; + size_t nbucket; + }; + + NewBucketsInfo new_buckets_and_thumbnail(size_t size, size_t new_nbucket); + // For `_Multi=true'. // Insert a new default-constructed associated with |key| always. // If size()*100/bucket_count() is more than load_factor, @@ -299,8 +312,23 @@ template friend class SparseFlatMapIterator; typename std::enable_if::type operator[](const key_type& key); // True if buckets need to be resized before holding `size' elements. - inline bool is_too_crowded(size_t size) const - { return size * 100 >= _nbucket * _load_factor; } + bool is_too_crowded(size_t size) const { + return is_too_crowded(size, _nbucket, _load_factor); + } + static bool is_too_crowded(size_t size, size_t nbucket, u_int load_factor) { + return size * 100 >= nbucket * load_factor; + } + + static void init_buckets_and_thumbnail( + Bucket* buckets, uint64_t* thumbnail, size_t nbucket) { + for (size_t i = 0; i < nbucket; ++i) { + buckets[i].set_invalid(); + } + buckets[nbucket].next = NULL; + if (_Sparse) { + bit_array_clear(thumbnail, nbucket); + } + } size_t _size; size_t _nbucket; diff --git a/src/butil/containers/flat_map_inl.h b/src/butil/containers/flat_map_inl.h index 5218fab9b7..551ffe81b5 100644 --- a/src/butil/containers/flat_map_inl.h +++ b/src/butil/containers/flat_map_inl.h @@ -272,35 +272,32 @@ FlatMap<_K, _T, _H, _E, _S, _A, _M>::operator=(const FlatMap<_K, _T, _H, _E, _S, if (!rhs.initialized()) { return *this; } - bool need_copy = !rhs.empty(); _load_factor = rhs._load_factor; if (_buckets == NULL || is_too_crowded(rhs._size)) { - get_allocator().Free(_buckets); - _nbucket = rhs._nbucket; - // note: need an extra bucket to let iterator know where buckets end - _buckets = (Bucket*)get_allocator().Alloc(sizeof(Bucket) * (_nbucket + 1/*note*/)); - if (NULL == _buckets) { - LOG(ERROR) << "Fail to new _buckets"; + NewBucketsInfo info = new_buckets_and_thumbnail(_size, rhs._nbucket); + if (0 == info.nbucket) { + LOG(ERROR) << "Invalid nbucket=0"; return *this; } - // If no need to copy, set buckets invalid. - if (!need_copy) { - for (size_t i = 0; i < _nbucket; ++i) { - _buckets[i].set_invalid(); - } - _buckets[_nbucket].next = NULL; + if (NULL == info.buckets) { + LOG(ERROR) << "Fail to new buckets"; + return *this; } + if (_S && NULL == info.thumbnail) { + LOG(ERROR) << "Fail to new thumbnail"; + return *this; + } + + _nbucket = info.nbucket; + get_allocator().Free(_buckets); + _buckets = info.buckets; if (_S) { - free(_thumbnail); - _thumbnail = bit_array_malloc(_nbucket); - if (NULL == _thumbnail) { - LOG(ERROR) << "Fail to new _thumbnail"; - return *this; - } - bit_array_clear(_thumbnail, _nbucket); + bit_array_free(_thumbnail); + _thumbnail = info.thumbnail; } } - if (!need_copy) { + if (rhs.empty()) { + // No need to copy, returns directly. return *this; } if (_nbucket == rhs._nbucket) { @@ -327,7 +324,7 @@ FlatMap<_K, _T, _H, _E, _S, _A, _M>::operator=(const FlatMap<_K, _T, _H, _E, _S, _size = rhs._size; } else { for (const_iterator it = rhs.begin(); it != rhs.end(); ++it) { - operator[](Element::first_ref_from_value(*it)) = + operator[](Element::first_ref_from_value(*it)) = Element::second_ref_from_value(*it); } } @@ -341,7 +338,7 @@ int FlatMap<_K, _T, _H, _E, _S, _A, _M>::init(size_t nbucket, u_int load_factor) return -1; } if (nbucket == 0) { - LOG(WARNING) << "Fail to init FlatMap, nbucket=" << nbucket; + LOG(WARNING) << "Fail to init FlatMap, nbucket=" << nbucket; return -1; } if (load_factor < 10 || load_factor > 100) { @@ -349,25 +346,25 @@ int FlatMap<_K, _T, _H, _E, _S, _A, _M>::init(size_t nbucket, u_int load_factor) return -1; } _size = 0; - _nbucket = flatmap_round(nbucket); _load_factor = load_factor; - - _buckets = (Bucket*)get_allocator().Alloc(sizeof(Bucket) * (_nbucket + 1)); - if (NULL == _buckets) { - LOG(ERROR) << "Fail to new _buckets"; + + NewBucketsInfo info = new_buckets_and_thumbnail(_size, nbucket); + if (0 == info.nbucket) { + LOG(ERROR) << "Invalid nbucket=0"; return -1; } - for (size_t i = 0; i < _nbucket; ++i) { - _buckets[i].set_invalid(); + if (NULL == info.buckets) { + LOG(ERROR) << "Fail to new buckets"; + return -1; } - _buckets[_nbucket].next = NULL; - + if (_S && NULL == info.thumbnail) { + LOG(ERROR) << "Fail to new thumbnail"; + return -1; + } + _nbucket = info.nbucket; + _buckets = info.buckets; if (_S) { - _thumbnail = bit_array_malloc(_nbucket); - if (NULL == _thumbnail) { - LOG(ERROR) << "Fail to new _thumbnail"; - return -1; - } + _thumbnail = info.thumbnail; bit_array_clear(_thumbnail, _nbucket); } return 0; @@ -625,7 +622,7 @@ FlatMap<_K, _T, _H, _E, _S, _A, _M>::operator[](const key_type& key) { return first_node.element().second_ref(); } Bucket *p = &first_node; - while (1) { + while (true) { if (_eql(p->element().first_ref(), key)) { return p->element().second_ref(); } @@ -659,6 +656,24 @@ FlatMap<_K, _T, _H, _E, _S, _A, _M>::operator[](const key_type& key) { new (&first_node) Bucket(key); return first_node.element().second_ref(); } + if (is_too_crowded(_size)) { + Bucket *p = &first_node; + bool need_scale = false; + while (NULL != p) { + // Increase the capacity of bucket when + // hash collision occur and map is crowded. + if (!_eql(p->element().first_ref(), key)) { + need_scale = true; + break; + } + p = p->next; + } + if (need_scale && resize(_nbucket + 1)) { + return operator[](key); + } + // fail to resize is OK + } + ++_size; Bucket* newp = new (_pool.get()) Bucket(key); newp->next = first_node.next; first_node.next = newp; @@ -720,7 +735,7 @@ bool FlatMap<_K, _T, _H, _E, _S, _A, _M>::resize(size_t nbucket2) { return false; } - // NOTE: following functors must be kept after resizing otherwise the + // NOTE: following functors must be kept after resizing otherwise the // internal state is lost. FlatMap new_map(_hashfn, _eql, get_allocator()); if (new_map.init(nbucket2, _load_factor) != 0) { @@ -728,7 +743,7 @@ bool FlatMap<_K, _T, _H, _E, _S, _A, _M>::resize(size_t nbucket2) { return false; } for (iterator it = begin(); it != end(); ++it) { - new_map[Element::first_ref_from_value(*it)] = + new_map[Element::first_ref_from_value(*it)] = Element::second_movable_ref_from_value(*it); } new_map.swap(*this); @@ -751,6 +766,38 @@ BucketInfo FlatMap<_K, _T, _H, _E, _S, _A, _M>::bucket_info() const { return info; } +template +typename FlatMap<_K, _T, _H, _E, _S, _A, _M>::NewBucketsInfo +FlatMap<_K, _T, _H, _E, _S, _A, _M>::new_buckets_and_thumbnail(size_t size, + size_t new_nbucket) { + do { + new_nbucket = flatmap_round(new_nbucket); + } while (is_too_crowded(size, new_nbucket, _load_factor)); + // Note: need an extra bucket to let iterator know where buckets end. + auto buckets = (Bucket*)get_allocator().Alloc(sizeof(Bucket) * ( + new_nbucket + 1/*note*/)); + auto guard = MakeScopeGuard([buckets, this]() { + get_allocator().Free(buckets); + }); + if (NULL == buckets) { + LOG(FATAL) << "Fail to new Buckets"; + return {}; + } + + uint64_t* thumbnail = NULL; + if (_S) { + thumbnail = bit_array_malloc(new_nbucket); + if (NULL == thumbnail) { + LOG(FATAL) << "Fail to new thumbnail"; + return {}; + } + } + + guard.dismiss(); + init_buckets_and_thumbnail(buckets, thumbnail, new_nbucket); + return { buckets, thumbnail, new_nbucket }; +} + inline std::ostream& operator<<(std::ostream& os, const BucketInfo& info) { return os << "{maxb=" << info.longest_length << " avgb=" << info.average_length << '}'; diff --git a/test/flat_map_unittest.cpp b/test/flat_map_unittest.cpp index 5d9850d718..c3368d365a 100644 --- a/test/flat_map_unittest.cpp +++ b/test/flat_map_unittest.cpp @@ -1437,6 +1437,10 @@ TEST_F(FlatMapTest, copy) { m3 = m2; ASSERT_TRUE(m3.initialized()); + ASSERT_TRUE(m3.seek(1)); + ASSERT_TRUE(m3.seek(2)); + ASSERT_FALSE(m3.seek(3)); + m3.clear(); ASSERT_TRUE(m3.initialized()); ASSERT_TRUE(m3.empty()); @@ -1451,26 +1455,30 @@ TEST_F(FlatMapTest, multi) { g_foo_copy_ctor = 0; g_foo_assign = 0; butil::MultiFlatMap map; - int bucket_count = 32; + size_t bucket_count = 32; ASSERT_EQ(0, map.init(bucket_count)); ASSERT_EQ(0, g_foo_ctor); ASSERT_EQ(0, g_foo_copy_ctor); ASSERT_EQ(0, g_foo_assign); Foo& f1 = map[1]; + ASSERT_EQ(1UL, map.size()); ASSERT_EQ(1, g_foo_ctor); ASSERT_EQ(0, g_foo_copy_ctor); ASSERT_EQ(0, g_foo_assign); Foo& f2 = map[1]; + ASSERT_EQ(2UL, map.size()); ASSERT_EQ(2, g_foo_ctor); ASSERT_EQ(0, g_foo_copy_ctor); ASSERT_EQ(0, g_foo_assign); Foo f3; Foo& f4 = *map.insert(1, f3); + ASSERT_EQ(3UL, map.size()); ASSERT_EQ(4, g_foo_ctor); ASSERT_EQ(0, g_foo_copy_ctor); ASSERT_EQ(1, g_foo_assign); ASSERT_EQ(&f1, map.seek(1)); std::vector f_vec = map.seek_all(1); + ASSERT_EQ(3UL, f_vec.size()); ASSERT_NE(f_vec.end(), std::find(f_vec.begin(), f_vec.end(), &f1)); ASSERT_NE(f_vec.end(), std::find(f_vec.begin(), f_vec.end(), &f2)); ASSERT_NE(f_vec.end(), std::find(f_vec.begin(), f_vec.end(), &f4)); @@ -1479,9 +1487,10 @@ TEST_F(FlatMapTest, multi) { int same_bucket_key = 1 + bucket_count; butil::DefaultHasher hasher; ASSERT_EQ(butil::flatmap_mod(hasher(1), bucket_count), - butil::flatmap_mod(hasher(same_bucket_key), bucket_count)); + butil::flatmap_mod(hasher(same_bucket_key), bucket_count)); ASSERT_EQ(0, map.erase(same_bucket_key)); Foo& f5 = map[same_bucket_key]; + ASSERT_EQ(4UL, map.size()); ASSERT_EQ(&f5, map.seek(same_bucket_key)); ASSERT_EQ(1UL, map.seek_all(same_bucket_key).size()); ASSERT_EQ(5, g_foo_ctor); @@ -1489,15 +1498,31 @@ TEST_F(FlatMapTest, multi) { ASSERT_EQ(1, g_foo_assign); ASSERT_EQ(&f5, map.seek(same_bucket_key)); ASSERT_EQ(3u, map.erase(1)); + ASSERT_EQ(1UL, map.size()); ASSERT_EQ(nullptr, map.seek(1)); ASSERT_TRUE(map.seek_all(1).empty()); // Value node of same_bucket_key is the last one in the bucket, // so it has been moved to the first node. ASSERT_EQ(&f1, map.seek(same_bucket_key)); ASSERT_EQ(1UL, map.erase(same_bucket_key)); + ASSERT_EQ(0UL, map.size()); ASSERT_EQ(nullptr, map.seek(same_bucket_key)); ASSERT_TRUE(map.seek_all(same_bucket_key).empty()); + // Increase the capacity of bucket when hash collision occur and map is crowded. + for (size_t i = 0; i < bucket_count + 1; ++i) { + map[i] = Foo(); + } + ASSERT_EQ(bucket_count + 1, map.size()); + ASSERT_EQ(butil::flatmap_round(bucket_count + 1), map.bucket_count()); + + // No need to Increase the capacity of bucket when key is already in the map. + for (size_t i = 0; i < bucket_count + 1; ++i) { + map[1] = Foo(); + } + ASSERT_EQ((bucket_count + 1) * 2, map.size()); + ASSERT_EQ(butil::flatmap_round(bucket_count + 1), map.bucket_count()); + // Zeroize POD values. butil::MultiFlatMap map2; ASSERT_EQ(0, map2.init(32));