Skip to content

Commit

Permalink
Fix multi FlatMap scale and size
Browse files Browse the repository at this point in the history
  • Loading branch information
chenBright committed Jun 23, 2024
1 parent 2e18318 commit 6819bb7
Show file tree
Hide file tree
Showing 4 changed files with 157 additions and 57 deletions.
26 changes: 13 additions & 13 deletions src/butil/bit_array.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,22 @@

namespace butil {

#define BIT_ARRAY_LEN(nbit) (((nbit) + 63 ) / 64 * 8)

// Create an array with at least |nbit| bits. The array is not cleared.
inline uint64_t* bit_array_malloc(size_t nbit)
{
inline uint64_t* bit_array_malloc(size_t nbit) {
if (!nbit) {
return NULL;
}
return (uint64_t*)malloc((nbit + 63 ) / 64 * 8/*different from /8*/);
return (uint64_t*)malloc(BIT_ARRAY_LEN(nbit)/*different from /8*/);
}

inline void bit_array_free(uint64_t* array) {
free(array);
}

// Set bit 0 ~ nbit-1 of |array| to be 0
inline void bit_array_clear(uint64_t* array, size_t nbit)
{
inline void bit_array_clear(uint64_t* array, size_t nbit) {
const size_t off = (nbit >> 6);
memset(array, 0, off * 8);
const size_t last = (off << 6);
Expand All @@ -49,31 +53,27 @@ inline void bit_array_clear(uint64_t* array, size_t nbit)
}

// Set i-th bit (from left, counting from 0) of |array| to be 1
inline void bit_array_set(uint64_t* array, size_t i)
{
inline void bit_array_set(uint64_t* array, size_t i) {
const size_t off = (i >> 6);
array[off] |= (((uint64_t)1) << (i - (off << 6)));
}

// Set i-th bit (from left, counting from 0) of |array| to be 0
inline void bit_array_unset(uint64_t* array, size_t i)
{
inline void bit_array_unset(uint64_t* array, size_t i) {
const size_t off = (i >> 6);
array[off] &= ~(((uint64_t)1) << (i - (off << 6)));
}

// Get i-th bit (from left, counting from 0) of |array|
inline uint64_t bit_array_get(const uint64_t* array, size_t i)
{
inline uint64_t bit_array_get(const uint64_t* array, size_t i) {
const size_t off = (i >> 6);
return (array[off] & (((uint64_t)1) << (i - (off << 6))));
}

// Find index of first 1-bit from bit |begin| to |end| in |array|.
// Returns |end| if all bits are 0.
// This function is of O(nbit) complexity.
inline size_t bit_array_first1(const uint64_t* array, size_t begin, size_t end)
{
inline size_t bit_array_first1(const uint64_t* array, size_t begin, size_t end) {
size_t off1 = (begin >> 6);
const size_t first = (off1 << 6);
if (first != begin) {
Expand Down
32 changes: 30 additions & 2 deletions src/butil/containers/flat_map.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@
#include "butil/containers/hash_tables.h" // hash<>
#include "butil/bit_array.h" // bit_array_*
#include "butil/strings/string_piece.h" // StringPiece
#include "butil/memory/scope_guard.h"

namespace butil {

Expand Down Expand Up @@ -290,6 +291,18 @@ class FlatMap {
template <typename _Map, typename _Element> friend class FlatMapIterator;
template <typename _Map, typename _Element> friend class SparseFlatMapIterator;

struct NewBucketsInfo {
NewBucketsInfo()
: buckets(NULL), thumbnail(NULL), nbucket(0) {}
NewBucketsInfo(Bucket* b, uint64_t* t, size_t n)
: buckets(b), thumbnail(t), nbucket(n) {}
Bucket* buckets;
uint64_t* thumbnail;
size_t nbucket;
};

NewBucketsInfo new_buckets_and_thumbnail(size_t size, size_t new_nbucket);

// For `_Multi=true'.
// Insert a new default-constructed associated with |key| always.
// If size()*100/bucket_count() is more than load_factor,
Expand All @@ -299,8 +312,23 @@ template <typename _Map, typename _Element> friend class SparseFlatMapIterator;
typename std::enable_if<Multi, mapped_type&>::type operator[](const key_type& key);

// True if buckets need to be resized before holding `size' elements.
inline bool is_too_crowded(size_t size) const
{ return size * 100 >= _nbucket * _load_factor; }
bool is_too_crowded(size_t size) const {
return is_too_crowded(size, _nbucket, _load_factor);
}
static bool is_too_crowded(size_t size, size_t nbucket, u_int load_factor) {
return size * 100 >= nbucket * load_factor;
}

static void init_buckets_and_thumbnail(
Bucket* buckets, uint64_t* thumbnail, size_t nbucket) {
for (size_t i = 0; i < nbucket; ++i) {
buckets[i].set_invalid();
}
buckets[nbucket].next = NULL;
if (_Sparse) {
bit_array_clear(thumbnail, nbucket);
}
}

size_t _size;
size_t _nbucket;
Expand Down
127 changes: 87 additions & 40 deletions src/butil/containers/flat_map_inl.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,35 +272,32 @@ FlatMap<_K, _T, _H, _E, _S, _A, _M>::operator=(const FlatMap<_K, _T, _H, _E, _S,
if (!rhs.initialized()) {
return *this;
}
bool need_copy = !rhs.empty();
_load_factor = rhs._load_factor;
if (_buckets == NULL || is_too_crowded(rhs._size)) {
get_allocator().Free(_buckets);
_nbucket = rhs._nbucket;
// note: need an extra bucket to let iterator know where buckets end
_buckets = (Bucket*)get_allocator().Alloc(sizeof(Bucket) * (_nbucket + 1/*note*/));
if (NULL == _buckets) {
LOG(ERROR) << "Fail to new _buckets";
NewBucketsInfo info = new_buckets_and_thumbnail(_size, rhs._nbucket);
if (0 == info.nbucket) {
LOG(ERROR) << "Invalid nbucket=0";
return *this;
}
// If no need to copy, set buckets invalid.
if (!need_copy) {
for (size_t i = 0; i < _nbucket; ++i) {
_buckets[i].set_invalid();
}
_buckets[_nbucket].next = NULL;
if (NULL == info.buckets) {
LOG(ERROR) << "Fail to new buckets";
return *this;
}
if (_S && NULL == info.thumbnail) {
LOG(ERROR) << "Fail to new thumbnail";
return *this;
}

_nbucket = info.nbucket;
get_allocator().Free(_buckets);
_buckets = info.buckets;
if (_S) {
free(_thumbnail);
_thumbnail = bit_array_malloc(_nbucket);
if (NULL == _thumbnail) {
LOG(ERROR) << "Fail to new _thumbnail";
return *this;
}
bit_array_clear(_thumbnail, _nbucket);
bit_array_free(_thumbnail);
_thumbnail = info.thumbnail;
}
}
if (!need_copy) {
if (rhs.empty()) {
// No need to copy, returns directly.
return *this;
}
if (_nbucket == rhs._nbucket) {
Expand All @@ -327,7 +324,7 @@ FlatMap<_K, _T, _H, _E, _S, _A, _M>::operator=(const FlatMap<_K, _T, _H, _E, _S,
_size = rhs._size;
} else {
for (const_iterator it = rhs.begin(); it != rhs.end(); ++it) {
operator[](Element::first_ref_from_value(*it)) =
operator[](Element::first_ref_from_value(*it)) =
Element::second_ref_from_value(*it);
}
}
Expand All @@ -341,33 +338,33 @@ int FlatMap<_K, _T, _H, _E, _S, _A, _M>::init(size_t nbucket, u_int load_factor)
return -1;
}
if (nbucket == 0) {
LOG(WARNING) << "Fail to init FlatMap, nbucket=" << nbucket;
LOG(WARNING) << "Fail to init FlatMap, nbucket=" << nbucket;
return -1;
}
if (load_factor < 10 || load_factor > 100) {
LOG(ERROR) << "Invalid load_factor=" << load_factor;
return -1;
}
_size = 0;
_nbucket = flatmap_round(nbucket);
_load_factor = load_factor;
_buckets = (Bucket*)get_allocator().Alloc(sizeof(Bucket) * (_nbucket + 1));
if (NULL == _buckets) {
LOG(ERROR) << "Fail to new _buckets";

NewBucketsInfo info = new_buckets_and_thumbnail(_size, nbucket);
if (0 == info.nbucket) {
LOG(ERROR) << "Invalid nbucket=0";
return -1;
}
for (size_t i = 0; i < _nbucket; ++i) {
_buckets[i].set_invalid();
if (NULL == info.buckets) {
LOG(ERROR) << "Fail to new buckets";
return -1;
}
_buckets[_nbucket].next = NULL;

if (_S && NULL == info.thumbnail) {
LOG(ERROR) << "Fail to new thumbnail";
return -1;
}
_nbucket = info.nbucket;
_buckets = info.buckets;
if (_S) {
_thumbnail = bit_array_malloc(_nbucket);
if (NULL == _thumbnail) {
LOG(ERROR) << "Fail to new _thumbnail";
return -1;
}
_thumbnail = info.thumbnail;
bit_array_clear(_thumbnail, _nbucket);
}
return 0;
Expand Down Expand Up @@ -625,7 +622,7 @@ FlatMap<_K, _T, _H, _E, _S, _A, _M>::operator[](const key_type& key) {
return first_node.element().second_ref();
}
Bucket *p = &first_node;
while (1) {
while (true) {
if (_eql(p->element().first_ref(), key)) {
return p->element().second_ref();
}
Expand Down Expand Up @@ -659,6 +656,24 @@ FlatMap<_K, _T, _H, _E, _S, _A, _M>::operator[](const key_type& key) {
new (&first_node) Bucket(key);
return first_node.element().second_ref();
}
if (is_too_crowded(_size)) {
Bucket *p = &first_node;
bool need_scale = false;
while (NULL != p) {
// Increase the capacity of bucket when
// hash collision occur and map is crowded.
if (!_eql(p->element().first_ref(), key)) {
need_scale = true;
break;
}
p = p->next;
}
if (need_scale && resize(_nbucket + 1)) {
return operator[](key);
}
// fail to resize is OK
}
++_size;
Bucket* newp = new (_pool.get()) Bucket(key);
newp->next = first_node.next;
first_node.next = newp;
Expand Down Expand Up @@ -720,15 +735,15 @@ bool FlatMap<_K, _T, _H, _E, _S, _A, _M>::resize(size_t nbucket2) {
return false;
}

// NOTE: following functors must be kept after resizing otherwise the
// NOTE: following functors must be kept after resizing otherwise the
// internal state is lost.
FlatMap new_map(_hashfn, _eql, get_allocator());
if (new_map.init(nbucket2, _load_factor) != 0) {
LOG(ERROR) << "Fail to init new_map, nbucket=" << nbucket2;
return false;
}
for (iterator it = begin(); it != end(); ++it) {
new_map[Element::first_ref_from_value(*it)] =
new_map[Element::first_ref_from_value(*it)] =
Element::second_movable_ref_from_value(*it);
}
new_map.swap(*this);
Expand All @@ -751,6 +766,38 @@ BucketInfo FlatMap<_K, _T, _H, _E, _S, _A, _M>::bucket_info() const {
return info;
}

template <typename _K, typename _T, typename _H, typename _E, bool _S, typename _A, bool _M>
typename FlatMap<_K, _T, _H, _E, _S, _A, _M>::NewBucketsInfo
FlatMap<_K, _T, _H, _E, _S, _A, _M>::new_buckets_and_thumbnail(size_t size,
size_t new_nbucket) {
do {
new_nbucket = flatmap_round(new_nbucket);
} while (is_too_crowded(size, new_nbucket, _load_factor));
// Note: need an extra bucket to let iterator know where buckets end.
auto buckets = (Bucket*)get_allocator().Alloc(sizeof(Bucket) * (
new_nbucket + 1/*note*/));
auto guard = MakeScopeGuard([buckets, this]() {
get_allocator().Free(buckets);
});
if (NULL == buckets) {
LOG(FATAL) << "Fail to new Buckets";
return {};
}

uint64_t* thumbnail = NULL;
if (_S) {
thumbnail = bit_array_malloc(new_nbucket);
if (NULL == thumbnail) {
LOG(FATAL) << "Fail to new thumbnail";
return {};
}
}

guard.dismiss();
init_buckets_and_thumbnail(buckets, thumbnail, new_nbucket);
return { buckets, thumbnail, new_nbucket };
}

inline std::ostream& operator<<(std::ostream& os, const BucketInfo& info) {
return os << "{maxb=" << info.longest_length
<< " avgb=" << info.average_length << '}';
Expand Down
Loading

0 comments on commit 6819bb7

Please sign in to comment.