From efe44b0c116135740e2fd4ac525d9e8935618adc Mon Sep 17 00:00:00 2001 From: meiravgri Date: Tue, 11 Jul 2023 14:12:44 +0000 Subject: [PATCH 01/10] replace element_neighbors_locks_ lockes with read locks where possible --- src/VecSim/algorithms/hnsw/hnsw.h | 62 ++++++++++--------- .../algorithms/hnsw/hnsw_batch_iterator.h | 4 +- src/VecSim/index_factories/hnsw_factory.cpp | 4 +- tests/unit/test_allocator.cpp | 2 +- 4 files changed, 38 insertions(+), 34 deletions(-) diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h index c4726ef66..8d5f20318 100644 --- a/src/VecSim/algorithms/hnsw/hnsw.h +++ b/src/VecSim/algorithms/hnsw/hnsw.h @@ -47,6 +47,9 @@ template using candidatesLabelsMaxHeap = vecsim_stl::abstract_priority_queue; using graphNodeType = pair; // represented as: (element_id, level) +using elem_mutex_t = std::shared_mutex; +using elem_write_mutex_t = std::unique_lock; +using elem_read_mutex_t = std::shared_lock; // Vectors flags (for marking a specific vector) typedef enum { DELETE_MARK = 0x1, // element is logically deleted, but still exists in the graph @@ -113,7 +116,7 @@ class HNSWIndex : public VecSimIndexAbstract, mutable VisitedNodesHandlerPool visited_nodes_handler_pool; mutable std::shared_mutex index_data_guard_; - mutable vecsim_stl::vector element_neighbors_locks_; + mutable vecsim_stl::vector element_neighbors_locks_; #ifdef BUILD_TESTS #include "VecSim/algorithms/hnsw/hnsw_base_tests_friends.h" @@ -169,8 +172,8 @@ class HNSWIndex : public VecSimIndexAbstract, const std::pair &neighbor_data, idType *new_node_neighbors_list, idType *neighbor_neighbors_list, - std::unique_lock &node_lock, - std::unique_lock &neighbor_lock); + elem_write_mutex_t &node_lock, + elem_write_mutex_t &neighbor_lock); inline idType mutuallyConnectNewElement(idType new_node_id, candidatesMaxHeap &top_candidates, size_t level); @@ -234,8 +237,8 @@ class HNSWIndex : public VecSimIndexAbstract, inline auto safeGetEntryPointState() const; inline void lockIndexDataGuard() const; inline void unlockIndexDataGuard() const; - inline void lockNodeLinks(idType node_id) const; - inline void unlockNodeLinks(idType node_id) const; + inline void lockForReadNodeLinks(idType node_id) const; + inline void unlockForReadNodeLinks(idType node_id) const; inline VisitedNodesHandler *getVisitedList() const; inline void returnVisitedList(VisitedNodesHandler *visited_nodes_handler) const; VecSimIndexInfo info() const override; @@ -515,13 +518,13 @@ void HNSWIndex::unlockIndexDataGuard() const { } template -void HNSWIndex::lockNodeLinks(idType node_id) const { - element_neighbors_locks_[node_id].lock(); +void HNSWIndex::lockForReadNodeLinks(idType node_id) const { + element_neighbors_locks_[node_id].lock_shared(); } template -void HNSWIndex::unlockNodeLinks(idType node_id) const { - element_neighbors_locks_[node_id].unlock(); +void HNSWIndex::unlockForReadNodeLinks(idType node_id) const { + element_neighbors_locks_[node_id].unlock_shared(); } template @@ -585,7 +588,7 @@ DistType HNSWIndex::processCandidate( tag_t *elements_tags, vecsim_stl::abstract_priority_queue &top_candidates, candidatesMaxHeap &candidate_set, DistType lowerBound) const { - std::unique_lock lock(element_neighbors_locks_[curNodeId]); + elem_read_mutex_t lock(element_neighbors_locks_[curNodeId]); idType *node_links = getNodeNeighborsAtLevel(curNodeId, layer); linkListSize links_num = getNodeNeighborsCount(node_links); @@ -637,7 +640,7 @@ void HNSWIndex::processCandidate_RangeSearch( tag_t *elements_tags, std::unique_ptr &results, candidatesMaxHeap &candidate_set, DistType dyn_range, double radius) const { - std::unique_lock lock(element_neighbors_locks_[curNodeId]); + elem_read_mutex_t lock(element_neighbors_locks_[curNodeId]); idType *node_links = getNodeNeighborsAtLevel(curNodeId, layer); linkListSize links_num = getNodeNeighborsCount(node_links); @@ -767,7 +770,7 @@ template void HNSWIndex::revisitNeighborConnections( size_t level, idType new_node_id, const std::pair &neighbor_data, idType *new_node_neighbors_list, idType *neighbor_neighbors_list, - std::unique_lock &node_lock, std::unique_lock &neighbor_lock) { + elem_write_mutex_t &node_lock, elem_write_mutex_t &neighbor_lock) { // Note - expect that node_lock and neighbor_lock are locked at that point. // Collect the existing neighbors and the new node as the neighbor's neighbors candidates. @@ -824,9 +827,9 @@ void HNSWIndex::revisitNeighborConnections( std::sort(nodes_to_update.begin(), nodes_to_update.end()); size_t nodes_to_update_count = nodes_to_update.size(); - std::unique_lock locks[nodes_to_update_count]; + elem_write_mutex_t locks[nodes_to_update_count]; for (size_t i = 0; i < nodes_to_update_count; i++) { - locks[i] = std::unique_lock(element_neighbors_locks_[nodes_to_update[i]]); + locks[i] = elem_write_mutex_t(element_neighbors_locks_[nodes_to_update[i]]); } auto *neighbour_incoming_edges = getIncomingEdgesPtr(selected_neighbor, level); @@ -915,17 +918,17 @@ idType HNSWIndex::mutuallyConnectNewElement( for (auto &neighbor_data : selected_neighbors) { idType selected_neighbor = neighbor_data.second; // neighbor's id - std::unique_lock node_lock; - std::unique_lock neighbor_lock; + elem_write_mutex_t node_lock; + elem_write_mutex_t neighbor_lock; idType lower_id = (new_node_id < selected_neighbor) ? new_node_id : selected_neighbor; if (lower_id == new_node_id) { - node_lock = std::unique_lock(element_neighbors_locks_[new_node_id]); + node_lock = elem_write_mutex_t(element_neighbors_locks_[new_node_id]); neighbor_lock = - std::unique_lock(element_neighbors_locks_[selected_neighbor]); + elem_write_mutex_t(element_neighbors_locks_[selected_neighbor]); } else { neighbor_lock = - std::unique_lock(element_neighbors_locks_[selected_neighbor]); - node_lock = std::unique_lock(element_neighbors_locks_[new_node_id]); + elem_write_mutex_t(element_neighbors_locks_[selected_neighbor]); + node_lock = elem_write_mutex_t(element_neighbors_locks_[new_node_id]); } // get the updated count - this may change between iterations due to releasing the lock. @@ -1068,7 +1071,8 @@ void HNSWIndex::replaceEntryPoint() { volatile idType candidate_in_process = INVALID_ID; { // Go over the entry point's neighbors at the top level. - std::unique_lock lock(this->element_neighbors_locks_[entrypoint_node_]); + // Assuming the lock only protects the entrypoint's neighbours and not the entrypoint itself, we can use a shred lock here, + elem_read_mutex_t lock(this->element_neighbors_locks_[entrypoint_node_]); idType *top_level_list = getNodeNeighborsAtLevel(old_entry, max_level_); auto neighbors_count = getNodeNeighborsCount(top_level_list); // Tries to set the (arbitrary) first neighbor as the entry point which is not deleted, @@ -1227,7 +1231,7 @@ void HNSWIndex::greedySearchLevel(const void *vector_data, s } changed = false; - std::unique_lock lock(element_neighbors_locks_[bestCand]); + elem_read_mutex_t lock(element_neighbors_locks_[bestCand]); idType *node_links = getNodeNeighborsAtNonBaseLevel(bestCand, level); linkListSize links_count = getNodeNeighborsCount(node_links); @@ -1265,7 +1269,7 @@ HNSWIndex::safeCollectAllNodeIncomingNeighbors(idType node_i for (size_t level = 0; level <= node_top_level; level++) { // Save the node neighbor's in the current level while holding its neighbors lock. std::vector neighbors_copy; - std::unique_lock element_lock(element_neighbors_locks_[node_id]); + elem_read_mutex_t element_lock(element_neighbors_locks_[node_id]); auto *neighbours = getNodeNeighborsAtLevel(node_id, level); unsigned short neighbours_count = getNodeNeighborsCount(neighbours); // Store the deleted element's neighbours. @@ -1275,7 +1279,7 @@ HNSWIndex::safeCollectAllNodeIncomingNeighbors(idType node_i // Go over the neighbours and collect tho ones that also points back to the removed node. for (auto neighbour_id : neighbors_copy) { // Hold the neighbor's lock while we are going over its neighbors. - std::unique_lock neighbor_lock(element_neighbors_locks_[neighbour_id]); + elem_read_mutex_t neighbor_lock(element_neighbors_locks_[neighbour_id]); auto *neighbour_neighbours = getNodeNeighborsAtLevel(neighbour_id, level); unsigned short neighbour_neighbours_count = getNodeNeighborsCount(neighbour_neighbours); for (size_t j = 0; j < neighbour_neighbours_count; j++) { @@ -1312,7 +1316,7 @@ void HNSWIndex::resizeIndexInternal(size_t new_max_elements) element_levels_.shrink_to_fit(); resizeLabelLookup(new_max_elements); visited_nodes_handler_pool.resize(new_max_elements); - vecsim_stl::vector(new_max_elements, this->allocator) + vecsim_stl::vector(new_max_elements, this->allocator) .swap(element_neighbors_locks_); // Reallocate base layer char *data_level0_memory_new = (char *)this->allocator->reallocate( @@ -1344,9 +1348,9 @@ void HNSWIndex::mutuallyUpdateForRepairedNode( nodes_to_update.push_back(node_id); std::sort(nodes_to_update.begin(), nodes_to_update.end()); size_t nodes_to_update_count = nodes_to_update.size(); - std::unique_lock locks[nodes_to_update_count]; + elem_write_mutex_t locks[nodes_to_update_count]; for (size_t i = 0; i < nodes_to_update_count; i++) { - locks[i] = std::unique_lock(element_neighbors_locks_[nodes_to_update[i]]); + locks[i] = elem_write_mutex_t(element_neighbors_locks_[nodes_to_update[i]]); } idType *node_neighbors = getNodeNeighborsAtLevel(node_id, level); @@ -1442,7 +1446,7 @@ void HNSWIndex::repairNodeConnections(idType node_id, size_t // Go over the repaired node neighbors, collect the non-deleted ones to be neighbors candidates // after the repair as well. { - std::unique_lock node_lock(element_neighbors_locks_[node_id]); + elem_read_mutex_t node_lock(element_neighbors_locks_[node_id]); idType *node_neighbors = getNodeNeighborsAtLevel(node_id, level); linkListSize node_neighbors_count = getNodeNeighborsCount(node_neighbors); for (size_t j = 0; j < node_neighbors_count; j++) { @@ -1477,7 +1481,7 @@ void HNSWIndex::repairNodeConnections(idType node_id, size_t nodes_to_update.push_back(deleted_neighbor_id); neighbors_to_remove.push_back(deleted_neighbor_id); - std::unique_lock neighbor_lock( + elem_write_mutex_t neighbor_lock( this->element_neighbors_locks_[deleted_neighbor_id]); idType *neighbor_neighbours = getNodeNeighborsAtLevel(deleted_neighbor_id, level); linkListSize neighbor_neighbours_count = getNodeNeighborsCount(neighbor_neighbours); diff --git a/src/VecSim/algorithms/hnsw/hnsw_batch_iterator.h b/src/VecSim/algorithms/hnsw/hnsw_batch_iterator.h index 1523d9835..dc14d1be4 100644 --- a/src/VecSim/algorithms/hnsw/hnsw_batch_iterator.h +++ b/src/VecSim/algorithms/hnsw/hnsw_batch_iterator.h @@ -114,7 +114,7 @@ VecSimQueryResult_Code HNSW_BatchIterator::scanGraphInternal // Take the current node out of the candidates queue and go over his neighbours. candidates.pop(); - this->index->lockNodeLinks(curr_node_id); + this->index->lockForReadNodeLinks(curr_node_id); idType *node_links = this->index->getNodeNeighborsAtLevel(curr_node_id, 0); linkListSize links_num = this->index->getNodeNeighborsCount(node_links); @@ -138,7 +138,7 @@ VecSimQueryResult_Code HNSW_BatchIterator::scanGraphInternal candidates.emplace(candidate_dist, candidate_id); __builtin_prefetch(index->getNodeNeighborsAtLevel(candidates.top().second, 0)); } - this->index->unlockNodeLinks(curr_node_id); + this->index->unlockForReadNodeLinks(curr_node_id); } return VecSim_QueryResult_OK; } diff --git a/src/VecSim/index_factories/hnsw_factory.cpp b/src/VecSim/index_factories/hnsw_factory.cpp index d6e22adf2..3072a49cd 100644 --- a/src/VecSim/index_factories/hnsw_factory.cpp +++ b/src/VecSim/index_factories/hnsw_factory.cpp @@ -88,7 +88,7 @@ size_t EstimateInitialSize(const HNSWParams *params) { est += sizeof(size_t) * params->initialCapacity + allocations_overhead; // Labels lookup hash table buckets. est += - sizeof(std::mutex) * params->initialCapacity + allocations_overhead; // lock per vector + sizeof(std::shared_mutex) * params->initialCapacity + allocations_overhead; // lock per vector } // Explicit allocation calls - always allocate a header. @@ -152,7 +152,7 @@ size_t EstimateElementSize(const HNSWParams *params) { // lookup hash map. size_t size_meta_data = sizeof(tag_t) + sizeof(size_t) + sizeof(size_t) + size_label_lookup_node; - size_t size_lock = sizeof(std::mutex); + size_t size_lock = sizeof(std::shared_mutex); /* Disclaimer: we are neglecting two additional factors that consume memory: * 1. The overall bucket size in labels_lookup hash table is usually higher than the number of diff --git a/tests/unit/test_allocator.cpp b/tests/unit/test_allocator.cpp index 7fe109762..fee3081b0 100644 --- a/tests/unit/test_allocator.cpp +++ b/tests/unit/test_allocator.cpp @@ -415,7 +415,7 @@ TYPED_TEST(IndexAllocatorTest, test_hnsw_reclaim_memory) { // except for the bucket count of the labels_lookup hash table that is calculated separately. size_t size_total_data_per_element = hnswIndex->size_data_per_element_; expected_mem_delta += (sizeof(tag_t) + sizeof(void *) + sizeof(size_t) + - size_total_data_per_element + sizeof(std::mutex)) * + size_total_data_per_element + sizeof(std::shared_mutex)) * block_size; expected_mem_delta += (hnswIndex->label_lookup_.bucket_count() - prev_bucket_count) * sizeof(size_t); From 2ec2951ecf9e7eee13e7e5ecd9a15dfbd98e7f15 Mon Sep 17 00:00:00 2001 From: meiravgri Date: Tue, 11 Jul 2023 14:42:09 +0000 Subject: [PATCH 02/10] format --- src/VecSim/algorithms/hnsw/hnsw.h | 19 ++++++++----------- src/VecSim/index_factories/hnsw_factory.cpp | 4 ++-- 2 files changed, 10 insertions(+), 13 deletions(-) diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h index 8d5f20318..40355ee04 100644 --- a/src/VecSim/algorithms/hnsw/hnsw.h +++ b/src/VecSim/algorithms/hnsw/hnsw.h @@ -171,8 +171,7 @@ class HNSWIndex : public VecSimIndexAbstract, void revisitNeighborConnections(size_t level, idType new_node_id, const std::pair &neighbor_data, idType *new_node_neighbors_list, - idType *neighbor_neighbors_list, - elem_write_mutex_t &node_lock, + idType *neighbor_neighbors_list, elem_write_mutex_t &node_lock, elem_write_mutex_t &neighbor_lock); inline idType mutuallyConnectNewElement(idType new_node_id, candidatesMaxHeap &top_candidates, @@ -769,8 +768,8 @@ void HNSWIndex::getNeighborsByHeuristic2( template void HNSWIndex::revisitNeighborConnections( size_t level, idType new_node_id, const std::pair &neighbor_data, - idType *new_node_neighbors_list, idType *neighbor_neighbors_list, - elem_write_mutex_t &node_lock, elem_write_mutex_t &neighbor_lock) { + idType *new_node_neighbors_list, idType *neighbor_neighbors_list, elem_write_mutex_t &node_lock, + elem_write_mutex_t &neighbor_lock) { // Note - expect that node_lock and neighbor_lock are locked at that point. // Collect the existing neighbors and the new node as the neighbor's neighbors candidates. @@ -923,11 +922,9 @@ idType HNSWIndex::mutuallyConnectNewElement( idType lower_id = (new_node_id < selected_neighbor) ? new_node_id : selected_neighbor; if (lower_id == new_node_id) { node_lock = elem_write_mutex_t(element_neighbors_locks_[new_node_id]); - neighbor_lock = - elem_write_mutex_t(element_neighbors_locks_[selected_neighbor]); + neighbor_lock = elem_write_mutex_t(element_neighbors_locks_[selected_neighbor]); } else { - neighbor_lock = - elem_write_mutex_t(element_neighbors_locks_[selected_neighbor]); + neighbor_lock = elem_write_mutex_t(element_neighbors_locks_[selected_neighbor]); node_lock = elem_write_mutex_t(element_neighbors_locks_[new_node_id]); } @@ -1071,7 +1068,8 @@ void HNSWIndex::replaceEntryPoint() { volatile idType candidate_in_process = INVALID_ID; { // Go over the entry point's neighbors at the top level. - // Assuming the lock only protects the entrypoint's neighbours and not the entrypoint itself, we can use a shred lock here, + // Assuming the lock only protects the entrypoint's neighbours and not the entrypoint + // itself, we can use a shred lock here, elem_read_mutex_t lock(this->element_neighbors_locks_[entrypoint_node_]); idType *top_level_list = getNodeNeighborsAtLevel(old_entry, max_level_); auto neighbors_count = getNodeNeighborsCount(top_level_list); @@ -1481,8 +1479,7 @@ void HNSWIndex::repairNodeConnections(idType node_id, size_t nodes_to_update.push_back(deleted_neighbor_id); neighbors_to_remove.push_back(deleted_neighbor_id); - elem_write_mutex_t neighbor_lock( - this->element_neighbors_locks_[deleted_neighbor_id]); + elem_write_mutex_t neighbor_lock(this->element_neighbors_locks_[deleted_neighbor_id]); idType *neighbor_neighbours = getNodeNeighborsAtLevel(deleted_neighbor_id, level); linkListSize neighbor_neighbours_count = getNodeNeighborsCount(neighbor_neighbours); diff --git a/src/VecSim/index_factories/hnsw_factory.cpp b/src/VecSim/index_factories/hnsw_factory.cpp index 3072a49cd..833c8290f 100644 --- a/src/VecSim/index_factories/hnsw_factory.cpp +++ b/src/VecSim/index_factories/hnsw_factory.cpp @@ -87,8 +87,8 @@ size_t EstimateInitialSize(const HNSWParams *params) { est += sizeof(size_t) * params->initialCapacity + allocations_overhead; // element level est += sizeof(size_t) * params->initialCapacity + allocations_overhead; // Labels lookup hash table buckets. - est += - sizeof(std::shared_mutex) * params->initialCapacity + allocations_overhead; // lock per vector + est += sizeof(std::shared_mutex) * params->initialCapacity + + allocations_overhead; // lock per vector } // Explicit allocation calls - always allocate a header. From 2da0380be1c35f5a6c60df7bf8e3ea30c0b6274c Mon Sep 17 00:00:00 2001 From: meiravgri Date: Tue, 11 Jul 2023 15:15:12 +0000 Subject: [PATCH 03/10] run benchmarks on one byte mutex --- src/VecSim/algorithms/hnsw/hnsw.h | 16 ++++++++-------- .../algorithms/hnsw/hnsw_batch_iterator.h | 4 ++-- src/VecSim/index_factories/hnsw_factory.cpp | 4 ++-- src/VecSim/utils/vecsim_stl.h | 19 +++++++++++++++++++ tests/unit/test_allocator.cpp | 2 +- 5 files changed, 32 insertions(+), 13 deletions(-) diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h index 40355ee04..7d77ec56f 100644 --- a/src/VecSim/algorithms/hnsw/hnsw.h +++ b/src/VecSim/algorithms/hnsw/hnsw.h @@ -47,9 +47,9 @@ template using candidatesLabelsMaxHeap = vecsim_stl::abstract_priority_queue; using graphNodeType = pair; // represented as: (element_id, level) -using elem_mutex_t = std::shared_mutex; +using elem_mutex_t = vecsim_stl::one_byte_mutex; using elem_write_mutex_t = std::unique_lock; -using elem_read_mutex_t = std::shared_lock; +using elem_read_mutex_t = elem_write_mutex_t; // Vectors flags (for marking a specific vector) typedef enum { DELETE_MARK = 0x1, // element is logically deleted, but still exists in the graph @@ -236,8 +236,8 @@ class HNSWIndex : public VecSimIndexAbstract, inline auto safeGetEntryPointState() const; inline void lockIndexDataGuard() const; inline void unlockIndexDataGuard() const; - inline void lockForReadNodeLinks(idType node_id) const; - inline void unlockForReadNodeLinks(idType node_id) const; + inline void lockNodeLinks(idType node_id) const; + inline void unlockNodeLinks(idType node_id) const; inline VisitedNodesHandler *getVisitedList() const; inline void returnVisitedList(VisitedNodesHandler *visited_nodes_handler) const; VecSimIndexInfo info() const override; @@ -517,13 +517,13 @@ void HNSWIndex::unlockIndexDataGuard() const { } template -void HNSWIndex::lockForReadNodeLinks(idType node_id) const { - element_neighbors_locks_[node_id].lock_shared(); +void HNSWIndex::lockNodeLinks(idType node_id) const { + element_neighbors_locks_[node_id].lock(); } template -void HNSWIndex::unlockForReadNodeLinks(idType node_id) const { - element_neighbors_locks_[node_id].unlock_shared(); +void HNSWIndex::unlockNodeLinks(idType node_id) const { + element_neighbors_locks_[node_id].unlock(); } template diff --git a/src/VecSim/algorithms/hnsw/hnsw_batch_iterator.h b/src/VecSim/algorithms/hnsw/hnsw_batch_iterator.h index dc14d1be4..1523d9835 100644 --- a/src/VecSim/algorithms/hnsw/hnsw_batch_iterator.h +++ b/src/VecSim/algorithms/hnsw/hnsw_batch_iterator.h @@ -114,7 +114,7 @@ VecSimQueryResult_Code HNSW_BatchIterator::scanGraphInternal // Take the current node out of the candidates queue and go over his neighbours. candidates.pop(); - this->index->lockForReadNodeLinks(curr_node_id); + this->index->lockNodeLinks(curr_node_id); idType *node_links = this->index->getNodeNeighborsAtLevel(curr_node_id, 0); linkListSize links_num = this->index->getNodeNeighborsCount(node_links); @@ -138,7 +138,7 @@ VecSimQueryResult_Code HNSW_BatchIterator::scanGraphInternal candidates.emplace(candidate_dist, candidate_id); __builtin_prefetch(index->getNodeNeighborsAtLevel(candidates.top().second, 0)); } - this->index->unlockForReadNodeLinks(curr_node_id); + this->index->unlockNodeLinks(curr_node_id); } return VecSim_QueryResult_OK; } diff --git a/src/VecSim/index_factories/hnsw_factory.cpp b/src/VecSim/index_factories/hnsw_factory.cpp index 833c8290f..800c68bbb 100644 --- a/src/VecSim/index_factories/hnsw_factory.cpp +++ b/src/VecSim/index_factories/hnsw_factory.cpp @@ -87,7 +87,7 @@ size_t EstimateInitialSize(const HNSWParams *params) { est += sizeof(size_t) * params->initialCapacity + allocations_overhead; // element level est += sizeof(size_t) * params->initialCapacity + allocations_overhead; // Labels lookup hash table buckets. - est += sizeof(std::shared_mutex) * params->initialCapacity + + est += sizeof(elem_mutex_t) * params->initialCapacity + allocations_overhead; // lock per vector } @@ -152,7 +152,7 @@ size_t EstimateElementSize(const HNSWParams *params) { // lookup hash map. size_t size_meta_data = sizeof(tag_t) + sizeof(size_t) + sizeof(size_t) + size_label_lookup_node; - size_t size_lock = sizeof(std::shared_mutex); + size_t size_lock = sizeof(elem_mutex_t); /* Disclaimer: we are neglecting two additional factors that consume memory: * 1. The overall bucket size in labels_lookup hash table is usually higher than the number of diff --git a/src/VecSim/utils/vecsim_stl.h b/src/VecSim/utils/vecsim_stl.h index c5380c36a..ed29e2da6 100644 --- a/src/VecSim/utils/vecsim_stl.h +++ b/src/VecSim/utils/vecsim_stl.h @@ -91,4 +91,23 @@ class unordered_set alloc) {} }; +struct one_byte_mutex { + void lock() { + if (state.exchange(locked, std::memory_order_acquire) == unlocked) + return; + while (state.exchange(sleeper, std::memory_order_acquire) != unlocked) + state.wait(sleeper, std::memory_order_relaxed); + } + void unlock() { + if (state.exchange(unlocked, std::memory_order_release) == sleeper) + state.notify_one(); + } + +private: + std::atomic state{unlocked}; + + static constexpr uint8_t unlocked = 0; + static constexpr uint8_t locked = 0b01; + static constexpr uint8_t sleeper = 0b10; +}; } // namespace vecsim_stl diff --git a/tests/unit/test_allocator.cpp b/tests/unit/test_allocator.cpp index fee3081b0..df420b837 100644 --- a/tests/unit/test_allocator.cpp +++ b/tests/unit/test_allocator.cpp @@ -415,7 +415,7 @@ TYPED_TEST(IndexAllocatorTest, test_hnsw_reclaim_memory) { // except for the bucket count of the labels_lookup hash table that is calculated separately. size_t size_total_data_per_element = hnswIndex->size_data_per_element_; expected_mem_delta += (sizeof(tag_t) + sizeof(void *) + sizeof(size_t) + - size_total_data_per_element + sizeof(std::shared_mutex)) * + size_total_data_per_element + sizeof(elem_mutex_t)) * block_size; expected_mem_delta += (hnswIndex->label_lookup_.bucket_count() - prev_bucket_count) * sizeof(size_t); From f69e70d336bee8a62d9b7688691c5006a936e294 Mon Sep 17 00:00:00 2001 From: meiravgri Date: Thu, 13 Jul 2023 07:10:01 +0000 Subject: [PATCH 04/10] replace lock to one byte mutex after merging to main --- src/VecSim/algorithms/hnsw/hnsw.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h index 9b0715183..0197abc1a 100644 --- a/src/VecSim/algorithms/hnsw/hnsw.h +++ b/src/VecSim/algorithms/hnsw/hnsw.h @@ -50,8 +50,6 @@ using graphNodeType = pair; // represented as: (element_id, leve using elem_mutex_t = vecsim_stl::one_byte_mutex; -using elem_write_mutex_t = std::unique_lock; -using elem_read_mutex_t = elem_write_mutex_t; ////////////////////////////////////// Auxiliary HNSW structs ////////////////////////////////////// // Vectors flags (for marking a specific vector) typedef enum { @@ -97,7 +95,7 @@ struct LevelData { struct ElementGraphData { size_t toplevel; - std::mutex neighborsGuard; + elem_mutex_t neighborsGuard; LevelData *others; LevelData level0; From b104c6b0f00eb9cc64c3f4bae7b9053a5ff6775a Mon Sep 17 00:00:00 2001 From: meiravgri Date: Thu, 13 Jul 2023 07:14:15 +0000 Subject: [PATCH 05/10] format --- src/VecSim/algorithms/hnsw/hnsw.h | 1 - 1 file changed, 1 deletion(-) diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h index 0197abc1a..c2e372131 100644 --- a/src/VecSim/algorithms/hnsw/hnsw.h +++ b/src/VecSim/algorithms/hnsw/hnsw.h @@ -48,7 +48,6 @@ template using candidatesLabelsMaxHeap = vecsim_stl::abstract_priority_queue; using graphNodeType = pair; // represented as: (element_id, level) - using elem_mutex_t = vecsim_stl::one_byte_mutex; ////////////////////////////////////// Auxiliary HNSW structs ////////////////////////////////////// // Vectors flags (for marking a specific vector) From dcaecc7f907c0a5311331a3eb18f9f001a41a8e8 Mon Sep 17 00:00:00 2001 From: meiravgri Date: Thu, 13 Jul 2023 14:08:08 +0000 Subject: [PATCH 06/10] moved paking after datagraph struct --- src/VecSim/algorithms/hnsw/hnsw.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h index c2e372131..8f8f53c50 100644 --- a/src/VecSim/algorithms/hnsw/hnsw.h +++ b/src/VecSim/algorithms/hnsw/hnsw.h @@ -74,7 +74,6 @@ struct ElementMetaData { ElementMetaData(labelType label = SIZE_MAX) noexcept : label(label), flags(IN_PROCESS) {} }; -#pragma pack() // restore default packing struct LevelData { vecsim_stl::vector *incomingEdges; @@ -113,6 +112,7 @@ struct ElementGraphData { } ~ElementGraphData() = delete; // Should be destroyed using `destroyGraphData` }; +#pragma pack() // restore default packing //////////////////////////////////// HNSW index implementation //////////////////////////////////// From 4908662d6c29f4d1ebe6d941f891288c51368de8 Mon Sep 17 00:00:00 2001 From: meiravgri Date: Sun, 16 Jul 2023 08:05:31 +0000 Subject: [PATCH 07/10] using same query --- tests/benchmark/bm_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/benchmark/bm_common.h b/tests/benchmark/bm_common.h index 9320700af..44c8755ca 100644 --- a/tests/benchmark/bm_common.h +++ b/tests/benchmark/bm_common.h @@ -126,7 +126,7 @@ void BM_VecSimCommon::TopK_Tiered(benchmark::State &st, unsigned s auto query_params = BM_VecSimGeneral::CreateQueryParams(hnswRuntimeParams); size_t cur_iter = search_job->iter; auto hnsw_results = - VecSimIndex_TopKQuery(INDICES[VecSimAlgo_TIERED], QUERIES[cur_iter % N_QUERIES].data(), + VecSimIndex_TopKQuery(INDICES[VecSimAlgo_TIERED], QUERIES[0].data(), search_job->k, &query_params, BY_SCORE); search_job->all_results[cur_iter] = hnsw_results; delete job; From 24cad4c755cf48f759d9c939222780fd6a6712d0 Mon Sep 17 00:00:00 2001 From: meiravgri Date: Sun, 16 Jul 2023 08:49:50 +0000 Subject: [PATCH 08/10] format --- tests/benchmark/bm_common.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/tests/benchmark/bm_common.h b/tests/benchmark/bm_common.h index 44c8755ca..303f5a56c 100644 --- a/tests/benchmark/bm_common.h +++ b/tests/benchmark/bm_common.h @@ -125,9 +125,8 @@ void BM_VecSimCommon::TopK_Tiered(benchmark::State &st, unsigned s HNSWRuntimeParams hnswRuntimeParams = {.efRuntime = search_job->ef}; auto query_params = BM_VecSimGeneral::CreateQueryParams(hnswRuntimeParams); size_t cur_iter = search_job->iter; - auto hnsw_results = - VecSimIndex_TopKQuery(INDICES[VecSimAlgo_TIERED], QUERIES[0].data(), - search_job->k, &query_params, BY_SCORE); + auto hnsw_results = VecSimIndex_TopKQuery(INDICES[VecSimAlgo_TIERED], QUERIES[0].data(), + search_job->k, &query_params, BY_SCORE); search_job->all_results[cur_iter] = hnsw_results; delete job; }; From ab313286c42dceae846ebdf91493b51c6dd3ae50 Mon Sep 17 00:00:00 2001 From: meiravgri Date: Tue, 18 Jul 2023 14:59:18 +0300 Subject: [PATCH 09/10] using one byte only if compiler version allows --- src/VecSim/algorithms/hnsw/hnsw.h | 7 ++++++- src/VecSim/utils/vecsim_stl.h | 14 ++++++++++++++ tests/benchmark/bm_common.h | 5 +++-- 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h index 8f8f53c50..346d8a5db 100644 --- a/src/VecSim/algorithms/hnsw/hnsw.h +++ b/src/VecSim/algorithms/hnsw/hnsw.h @@ -48,8 +48,13 @@ template using candidatesLabelsMaxHeap = vecsim_stl::abstract_priority_queue; using graphNodeType = pair; // represented as: (element_id, level) -using elem_mutex_t = vecsim_stl::one_byte_mutex; +#if ONE_BYTE_MUTEX_AVAILABLE == 1 + using elem_mutex_t = vecsim_stl::one_byte_mutex; +#else + using elem_mutex_t = std::mutex; +#endif ////////////////////////////////////// Auxiliary HNSW structs ////////////////////////////////////// + // Vectors flags (for marking a specific vector) typedef enum { DELETE_MARK = 0x1, // element is logically deleted, but still exists in the graph diff --git a/src/VecSim/utils/vecsim_stl.h b/src/VecSim/utils/vecsim_stl.h index ed29e2da6..9e8ad7463 100644 --- a/src/VecSim/utils/vecsim_stl.h +++ b/src/VecSim/utils/vecsim_stl.h @@ -91,6 +91,19 @@ class unordered_set alloc) {} }; +#define ONE_BYTE_MUTEX_AVAILABLE 0 +#if defined(__clang__) + #define CLANG_VERSION (__clang_major__ * 100 \ + + __clang_minor__ * 10 \ + + __clang_patchlevel__) + #if (CLANG_VERSION >= 1316) //clang 13.1.6 + #define ONE_BYTE_MUTEX_AVAILABLE 1 + #endif +#elif (__GNUC__ >= 11) + #define ONE_BYTE_MUTEX_AVAILABLE 1 +#endif + +#if ONE_BYTE_MUTEX_AVAILABLE != 0 struct one_byte_mutex { void lock() { if (state.exchange(locked, std::memory_order_acquire) == unlocked) @@ -110,4 +123,5 @@ struct one_byte_mutex { static constexpr uint8_t locked = 0b01; static constexpr uint8_t sleeper = 0b10; }; +#endif } // namespace vecsim_stl diff --git a/tests/benchmark/bm_common.h b/tests/benchmark/bm_common.h index 303f5a56c..9320700af 100644 --- a/tests/benchmark/bm_common.h +++ b/tests/benchmark/bm_common.h @@ -125,8 +125,9 @@ void BM_VecSimCommon::TopK_Tiered(benchmark::State &st, unsigned s HNSWRuntimeParams hnswRuntimeParams = {.efRuntime = search_job->ef}; auto query_params = BM_VecSimGeneral::CreateQueryParams(hnswRuntimeParams); size_t cur_iter = search_job->iter; - auto hnsw_results = VecSimIndex_TopKQuery(INDICES[VecSimAlgo_TIERED], QUERIES[0].data(), - search_job->k, &query_params, BY_SCORE); + auto hnsw_results = + VecSimIndex_TopKQuery(INDICES[VecSimAlgo_TIERED], QUERIES[cur_iter % N_QUERIES].data(), + search_job->k, &query_params, BY_SCORE); search_job->all_results[cur_iter] = hnsw_results; delete job; }; From 3c63447f1e76fcd20e1be8c9d8e4920794d6739c Mon Sep 17 00:00:00 2001 From: meiravgri Date: Tue, 18 Jul 2023 12:04:37 +0000 Subject: [PATCH 10/10] format + fix macro definition --- src/VecSim/algorithms/hnsw/hnsw.h | 4 ++-- src/VecSim/utils/vecsim_stl.h | 15 +++++++-------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h index 346d8a5db..74c803a17 100644 --- a/src/VecSim/algorithms/hnsw/hnsw.h +++ b/src/VecSim/algorithms/hnsw/hnsw.h @@ -49,9 +49,9 @@ using candidatesLabelsMaxHeap = vecsim_stl::abstract_priority_queue; // represented as: (element_id, level) #if ONE_BYTE_MUTEX_AVAILABLE == 1 - using elem_mutex_t = vecsim_stl::one_byte_mutex; +using elem_mutex_t = vecsim_stl::one_byte_mutex; #else - using elem_mutex_t = std::mutex; +using elem_mutex_t = std::mutex; #endif ////////////////////////////////////// Auxiliary HNSW structs ////////////////////////////////////// diff --git a/src/VecSim/utils/vecsim_stl.h b/src/VecSim/utils/vecsim_stl.h index 9e8ad7463..87aa37e3c 100644 --- a/src/VecSim/utils/vecsim_stl.h +++ b/src/VecSim/utils/vecsim_stl.h @@ -91,16 +91,15 @@ class unordered_set alloc) {} }; -#define ONE_BYTE_MUTEX_AVAILABLE 0 #if defined(__clang__) - #define CLANG_VERSION (__clang_major__ * 100 \ - + __clang_minor__ * 10 \ - + __clang_patchlevel__) - #if (CLANG_VERSION >= 1316) //clang 13.1.6 - #define ONE_BYTE_MUTEX_AVAILABLE 1 - #endif +#define CLANG_VERSION (__clang_major__ * 100 + __clang_minor__ * 10 + __clang_patchlevel__) +#if (CLANG_VERSION >= 1316) // clang 13.1.6 +#define ONE_BYTE_MUTEX_AVAILABLE 1 +#endif #elif (__GNUC__ >= 11) - #define ONE_BYTE_MUTEX_AVAILABLE 1 +#define ONE_BYTE_MUTEX_AVAILABLE 1 +#else +#define ONE_BYTE_MUTEX_AVAILABLE 0 #endif #if ONE_BYTE_MUTEX_AVAILABLE != 0