From dd42c43df32c291186398c5ebeaef4973227551f Mon Sep 17 00:00:00 2001 From: Dayue Gao Date: Thu, 19 May 2022 23:37:59 +0800 Subject: [PATCH] [improvement][performance] improve lru cache resize performance and memory usage (#9521) --- be/src/olap/lru_cache.cpp | 70 ++++++++++++++------------------- be/src/olap/lru_cache.h | 7 +--- be/test/olap/lru_cache_test.cpp | 66 ++++++++++++------------------- 3 files changed, 58 insertions(+), 85 deletions(-) diff --git a/be/src/olap/lru_cache.cpp b/be/src/olap/lru_cache.cpp index 1a045b24a3ba41..584d7dccb99e53 100644 --- a/be/src/olap/lru_cache.cpp +++ b/be/src/olap/lru_cache.cpp @@ -73,9 +73,6 @@ uint32_t CacheKey::hash(const char* data, size_t n, uint32_t seed) const { Cache::~Cache() {} HandleTable::~HandleTable() { - for (uint32_t i = 0; i < _length; i++) { - delete _list[i]; - } delete[] _list; } @@ -85,13 +82,13 @@ LRUHandle* HandleTable::lookup(const CacheKey& key, uint32_t hash) { } LRUHandle* HandleTable::insert(LRUHandle* h) { - LRUHandle* old = remove(h->key(), h->hash); - LRUHandle* head = _list[h->hash & (_length - 1)]; - - _head_insert(head, h); - ++_elems; + LRUHandle** ptr = _find_pointer(h->key(), h->hash); + LRUHandle* old = *ptr; + h->next_hash = old ? old->next_hash : nullptr; + *ptr = h; if (old == nullptr) { + ++_elems; if (_elems > _length) { // Since each cache entry is fairly large, we aim for a small // average linked list length (<= 1). @@ -106,24 +103,31 @@ LRUHandle* HandleTable::remove(const CacheKey& key, uint32_t hash) { LRUHandle** ptr = _find_pointer(key, hash); LRUHandle* result = *ptr; - remove(result); + if (result != nullptr) { + *ptr = result->next_hash; + _elems--; + } return result; } -void HandleTable::remove(const LRUHandle* h) { - if (h != nullptr) { - if (h->next_hash != nullptr) { - h->next_hash->prev_hash = h->prev_hash; - } - DCHECK(h->prev_hash != nullptr); - h->prev_hash->next_hash = h->next_hash; - --_elems; +bool HandleTable::remove(const LRUHandle* h) { + LRUHandle** ptr = &(_list[h->hash & (_length - 1)]); + while (*ptr != nullptr && *ptr != h) { + ptr = &(*ptr)->next_hash; } + + LRUHandle* result = *ptr; + if (result != nullptr) { + *ptr = result->next_hash; + _elems--; + return true; + } + return false; } LRUHandle** HandleTable::_find_pointer(const CacheKey& key, uint32_t hash) { - LRUHandle** ptr = &(_list[hash & (_length - 1)]->next_hash); + LRUHandle** ptr = &(_list[hash & (_length - 1)]); while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) { ptr = &(*ptr)->next_hash; } @@ -131,15 +135,6 @@ LRUHandle** HandleTable::_find_pointer(const CacheKey& key, uint32_t hash) { return ptr; } -void HandleTable::_head_insert(LRUHandle* head, LRUHandle* handle) { - handle->next_hash = head->next_hash; - if (handle->next_hash != nullptr) { - handle->next_hash->prev_hash = handle; - } - handle->prev_hash = head; - head->next_hash = handle; -} - void HandleTable::_resize() { uint32_t new_length = 16; while (new_length < _elems * 1.5) { @@ -148,29 +143,22 @@ void HandleTable::_resize() { LRUHandle** new_list = new (std::nothrow) LRUHandle*[new_length]; memset(new_list, 0, sizeof(new_list[0]) * new_length); - for (uint32_t i = 0; i < new_length; i++) { - // The first node in the linked-list is a dummy node used for - // inserting new node mainly. - new_list[i] = new LRUHandle(); - } uint32_t count = 0; for (uint32_t i = 0; i < _length; i++) { - LRUHandle* h = _list[i]->next_hash; + LRUHandle* h = _list[i]; while (h != nullptr) { LRUHandle* next = h->next_hash; uint32_t hash = h->hash; - LRUHandle* head = new_list[hash & (new_length - 1)]; - _head_insert(head, h); + LRUHandle** ptr = &new_list[hash & (new_length - 1)]; + h->next_hash = *ptr; + *ptr = h; h = next; count++; } } DCHECK_EQ(_elems, count); - for (uint32_t i = 0; i < _length; i++) { - delete _list[i]; - } delete[] _list; _list = new_list; _length = new_length; @@ -240,7 +228,8 @@ void LRUCache::release(Cache::Handle* handle) { // only exists in cache if (_usage > _capacity) { // take this opportunity and remove the item - _table.remove(e); + bool removed = _table.remove(e); + DCHECK(removed); e->in_cache = false; _unref(e); _usage -= e->total_size; @@ -285,7 +274,8 @@ void LRUCache::_evict_one_entry(LRUHandle* e) { DCHECK(e->in_cache); DCHECK(e->refs == 1); // LRU list contains elements which may be evicted _lru_remove(e); - _table.remove(e); + bool removed = _table.remove(e); + DCHECK(removed); e->in_cache = false; _unref(e); _usage -= e->total_size; diff --git a/be/src/olap/lru_cache.h b/be/src/olap/lru_cache.h index 0a4a6ed96724ef..b6b1f95754e97f 100644 --- a/be/src/olap/lru_cache.h +++ b/be/src/olap/lru_cache.h @@ -227,7 +227,6 @@ typedef struct LRUHandle { void* value; void (*deleter)(const CacheKey&, void* value); LRUHandle* next_hash = nullptr; // next entry in hash table - LRUHandle* prev_hash = nullptr; // previous entry in hash table LRUHandle* next = nullptr; // next entry in lru list LRUHandle* prev = nullptr; // previous entry in lru list size_t charge; @@ -277,7 +276,8 @@ class HandleTable { // Remove element from hash table by "h", it would be faster // than the function above. - void remove(const LRUHandle* h); + // Return whether h is found and removed. + bool remove(const LRUHandle* h); private: FRIEND_TEST(CacheTest, HandleTableTest); @@ -293,9 +293,6 @@ class HandleTable { // pointer to the trailing slot in the corresponding linked list. LRUHandle** _find_pointer(const CacheKey& key, uint32_t hash); - // Insert "handle" after "head". - void _head_insert(LRUHandle* head, LRUHandle* handle); - void _resize(); }; diff --git a/be/test/olap/lru_cache_test.cpp b/be/test/olap/lru_cache_test.cpp index d34723640c4959..a23a608f396c66 100644 --- a/be/test/olap/lru_cache_test.cpp +++ b/be/test/olap/lru_cache_test.cpp @@ -228,35 +228,35 @@ TEST_F(CacheTest, Usage) { LRUCache cache(LRUCacheType::SIZE); cache.set_capacity(1050); - // The lru usage is handle_size + charge = 96 - 1 = 95 - // 95 + 3 means handle_size + key size + // The lru usage is handle_size + charge. + // handle_size = sizeof(handle) - 1 + key size = 88 - 1 + 3 = 90 CacheKey key1("100"); insert_LRUCache(cache, key1, 100, CachePriority::NORMAL); - EXPECT_EQ(198, cache.get_usage()); // 100 + 95 + 3 + ASSERT_EQ(190, cache.get_usage()); // 100 + 90 CacheKey key2("200"); insert_LRUCache(cache, key2, 200, CachePriority::DURABLE); - EXPECT_EQ(496, cache.get_usage()); // 198 + 200 + 95 + 3 + ASSERT_EQ(480, cache.get_usage()); // 190 + 290(d) CacheKey key3("300"); insert_LRUCache(cache, key3, 300, CachePriority::NORMAL); - EXPECT_EQ(894, cache.get_usage()); // 496 + 300 + 95 + 3 + ASSERT_EQ(870, cache.get_usage()); // 190 + 290(d) + 390 CacheKey key4("400"); insert_LRUCache(cache, key4, 400, CachePriority::NORMAL); - EXPECT_EQ(796, cache.get_usage()); // 894 + 400 + 95 + 3 - (300 + 100 + (95 + 3) * 2) + ASSERT_EQ(780, cache.get_usage()); // 290(d) + 490 CacheKey key5("500"); insert_LRUCache(cache, key5, 500, CachePriority::NORMAL); - EXPECT_EQ(896, cache.get_usage()); // 796 + 500 + 95 + 3 - (400 + 95 +3) + ASSERT_EQ(880, cache.get_usage()); // 290(d) + 590 CacheKey key6("600"); insert_LRUCache(cache, key6, 600, CachePriority::NORMAL); - EXPECT_EQ(996, cache.get_usage()); // 896 + 600 + 95 +3 - (500 + 95 + 3) + ASSERT_EQ(980, cache.get_usage()); // 290(d) + 690 CacheKey key7("950"); insert_LRUCache(cache, key7, 950, CachePriority::DURABLE); - EXPECT_EQ(1048, cache.get_usage()); // 996 + 950 + 95 +3 - (200 + 600 + (95 + 3) * 2) + ASSERT_EQ(1040, cache.get_usage()); // 1040(d) } TEST_F(CacheTest, Prune) { @@ -360,9 +360,7 @@ TEST(CacheHandleTest, HandleTableTest) { HandleTable ht; for (uint32_t i = 0; i < ht._length; ++i) { - EXPECT_NE(ht._list[i], nullptr); - EXPECT_EQ(ht._list[i]->next_hash, nullptr); - EXPECT_EQ(ht._list[i]->prev_hash, nullptr); + EXPECT_EQ(ht._list[i], nullptr); } const int count = 10; @@ -380,7 +378,6 @@ TEST(CacheHandleTest, HandleTableTest) { h->hash = 1; // make them in a same hash table linked-list h->refs = 0; h->next = h->prev = nullptr; - h->prev_hash = nullptr; h->next_hash = nullptr; h->in_cache = false; h->priority = CachePriority::NORMAL; @@ -392,18 +389,15 @@ TEST(CacheHandleTest, HandleTableTest) { hs[i] = h; } EXPECT_EQ(ht._elems, count); - LRUHandle* h = ht.lookup(CacheKey(std::to_string(count - 1)), 1); - LRUHandle* head = ht._list[1 & (ht._length - 1)]; - EXPECT_EQ(head, h->prev_hash); - EXPECT_EQ(head->next_hash, h); - int index = count - 1; + LRUHandle* h = ht.lookup(keys[0], 1); + LRUHandle** head_ptr = &(ht._list[1 & (ht._length - 1)]); + LRUHandle* head = *head_ptr; + ASSERT_EQ(head, h); + int index = 0; while (h != nullptr) { EXPECT_EQ(hs[index], h) << index; h = h->next_hash; - if (h != nullptr) { - EXPECT_EQ(hs[index], h->prev_hash); - } - --index; + ++index; } for (int i = 0; i < count; ++i) { @@ -417,7 +411,6 @@ TEST(CacheHandleTest, HandleTableTest) { h->hash = 1; // make them in a same hash table linked-list h->refs = 0; h->next = h->prev = nullptr; - h->prev_hash = nullptr; h->next_hash = nullptr; h->in_cache = false; h->priority = CachePriority::NORMAL; @@ -434,28 +427,21 @@ TEST(CacheHandleTest, HandleTableTest) { EXPECT_EQ(ht.lookup(keys[i], 1), hs[i]); } - LRUHandle* old = ht.remove(CacheKey("9"), 1); // first in hash table linked-list - EXPECT_EQ(old, hs[9]); - EXPECT_EQ(old->prev_hash, head); - EXPECT_EQ(old->next_hash, hs[8]); // hs[8] is the new first node - EXPECT_EQ(head->next_hash, hs[8]); - EXPECT_EQ(hs[8]->prev_hash, head); + LRUHandle* old = ht.remove(CacheKey("0"), 1); // first in hash table linked-list + ASSERT_EQ(old, hs[0]); + ASSERT_EQ(old->next_hash, hs[1]); // hs[1] is the new first node + ASSERT_EQ(*head_ptr, hs[1]); - old = ht.remove(CacheKey("0"), 1); // last in hash table linked-list - EXPECT_EQ(old, hs[0]); - EXPECT_EQ(old->prev_hash, hs[1]); // hs[1] is the new last node - EXPECT_EQ(old->prev_hash->next_hash, nullptr); + old = ht.remove(CacheKey("9"), 1); // last in hash table linked-list + ASSERT_EQ(old, hs[9]); old = ht.remove(CacheKey("5"), 1); // middle in hash table linked-list - EXPECT_EQ(old, hs[5]); - EXPECT_EQ(old->prev_hash, hs[6]); - EXPECT_EQ(old->next_hash, hs[4]); - EXPECT_EQ(hs[6]->next_hash, hs[4]); - EXPECT_EQ(hs[4]->prev_hash, hs[6]); + ASSERT_EQ(old, hs[5]); + ASSERT_EQ(old->next_hash, hs[6]); + ASSERT_EQ(hs[4]->next_hash, hs[6]); ht.remove(hs[4]); // middle in hash table linked-list - EXPECT_EQ(hs[6]->next_hash, hs[3]); - EXPECT_EQ(hs[3]->prev_hash, hs[6]); + ASSERT_EQ(hs[3]->next_hash, hs[6]); EXPECT_EQ(ht._elems, count - 4);