From 2f88f74e9b69517ca37a1d2419e8f03bf7917d54 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Thu, 28 Aug 2025 11:04:53 +0000
Subject: [PATCH 01/11] brute force 75% capacity imp

---
 .../algorithms/brute_force/brute_force.h      | 38 +++++++++++++------
 src/VecSim/vec_sim_index.h                    | 22 ++++++++++-
 tests/unit/test_allocator.cpp                 | 11 +++---
 3 files changed, 51 insertions(+), 20 deletions(-)
diff --git a/src/VecSim/algorithms/brute_force/brute_force.h b/src/VecSim/algorithms/brute_force/brute_force.h
index 38ea60c32..233f22d24 100644
--- a/src/VecSim/algorithms/brute_force/brute_force.h
+++ b/src/VecSim/algorithms/brute_force/brute_force.h
@@ -95,14 +95,31 @@ class BruteForceIndex : public VecSimIndexAbstract<DataType, DistType> {
         resizeLabelLookup(idToLabelMapping.size());
     }
 
-    void shrinkByBlock() {
-        assert(indexCapacity() > 0); // should not be called when index is empty
+    void shrinkIfUnderUtilized() {
+        // Reset everything if the index is empty.
+        // TODO: initial capacity
+        if (indexSize() == 0) {
+            idToLabelMapping = vecsim_stl::vector<labelType>(this->allocator);
+            resizeLabelLookup(0);
+            return;
+        }
+        size_t curr_capacity = idToLabelMapping.capacity();
+        assert(idToLabelMapping.size() == curr_capacity); // assuming all entries are valid
 
-        // remove a block size of labels.
-        assert(idToLabelMapping.size() >= this->blockSize);
-        idToLabelMapping.resize(idToLabelMapping.size() - this->blockSize);
-        idToLabelMapping.shrink_to_fit();
-        resizeLabelLookup(idToLabelMapping.size());
+        if (curr_capacity <= this->getMinimalShrinkCapacity())
+            return;
+
+        assert(indexSize() <= idToLabelMapping.size());
+        if (indexSize() < static_cast<size_t>(this->getShrinkThreshold() * curr_capacity)) {
+            size_t new_size = indexSize() + this->blockSize;
+            assert(new_size < curr_capacity);
+            idToLabelMapping.resize(new_size);
+            idToLabelMapping.shrink_to_fit();
+            assert(idToLabelMapping.size() == new_size);
+            assert(idToLabelMapping.capacity() == new_size);
+
+            resizeLabelLookup(new_size);
+        }
     }
 
     void setVectorLabel(idType id, labelType new_label) { idToLabelMapping.at(id) = new_label; }
@@ -186,10 +203,7 @@ void BruteForceIndex<DataType, DistType>::removeVector(idType id_to_delete) {
     }
     this->vectors->removeElement(last_idx);
 
-    // If we reached to a multiply of a block size, we can reduce meta data structures size.
-    if (this->count % this->blockSize == 0) {
-        shrinkByBlock();
-    }
+    shrinkIfUnderUtilized();
 }
 
 template <typename DataType, typename DistType>
@@ -199,7 +213,7 @@ size_t BruteForceIndex<DataType, DistType>::indexSize() const {
 
 template <typename DataType, typename DistType>
 size_t BruteForceIndex<DataType, DistType>::indexCapacity() const {
-    return this->idToLabelMapping.size();
+    return this->idToLabelMapping.capacity();
 }
 
 template <typename DataType, typename DistType>
diff --git a/src/VecSim/vec_sim_index.h b/src/VecSim/vec_sim_index.h
index 439f820d6..9c485673c 100644
--- a/src/VecSim/vec_sim_index.h
+++ b/src/VecSim/vec_sim_index.h
@@ -75,11 +75,25 @@ struct VecSimIndexAbstract : public VecSimIndexInterface {
     size_t blockSize;    // Index's vector block size (determines by how many vectors to resize when
                          // resizing)
     IndexCalculatorInterface<DistType> *indexCalculator; // Distance calculator.
-    PreprocessorsContainerAbstract *preprocessors;       // Stroage and query preprocessors.
+    PreprocessorsContainerAbstract *preprocessors;       // Storage and query preprocessors.
     mutable VecSearchMode lastMode; // The last search mode in RediSearch (used for debug/testing).
     bool isMulti;                   // Determines if the index should multi-index or not.
     void *logCallbackCtx;           // Context for the log callback.
 
+    // Relaxed shrink threshold to decouple shrinking from growth logic and prevent
+    // oscillations when repeatedly adding/removing vectors with IDs divisible by blockSize.
+    // When count < SHRINK_THRESHOLD * capacity, containers may shrink.
+    // Note: Not all containers use this threshold - implementation depends on container type
+    // and algorithm-specific requirements.
+    static constexpr float SHRINK_THRESHOLD = 0.75f;
+    // To ensure the new capacity is not larger than the current capacity, i.e prevent shrinking from
+    // actually increasing the capacity, we need:
+    // new_capacity = count + blockSize < capacity
+    // Since count < SHRINK_THRESHOLD * capacity (worst case: count ≈ SHRINK_THRESHOLD * capacity):
+    // SHRINK_THRESHOLD * capacity + blockSize < capacity
+    // Therefore: capacity > blockSize / (1 - SHRINK_THRESHOLD) = 4 * blockSize
+    // Cached value to avoid repeated calculations.
+    const size_t minimalShrinkCapacity;
     RawDataContainer *vectors; // The raw vectors data container.
 
     /**
@@ -97,6 +111,9 @@ struct VecSimIndexAbstract : public VecSimIndexInterface {
         return info;
     }
 
+    float getShrinkThreshold() const { return SHRINK_THRESHOLD; }
+    size_t getMinimalShrinkCapacity() const { return minimalShrinkCapacity; }
+
 public:
     /**
      * @brief Construct a new Vec Sim Index object
@@ -108,7 +125,8 @@ struct VecSimIndexAbstract : public VecSimIndexInterface {
           dataSize(params.dataSize), metric(params.metric),
           blockSize(params.blockSize ? params.blockSize : DEFAULT_BLOCK_SIZE),
           indexCalculator(components.indexCalculator), preprocessors(components.preprocessors),
-          lastMode(EMPTY_MODE), isMulti(params.multi), logCallbackCtx(params.logCtx) {
+          lastMode(EMPTY_MODE), isMulti(params.multi), logCallbackCtx(params.logCtx),
+          minimalShrinkCapacity(static_cast<size_t>(blockSize / (1.0f - SHRINK_THRESHOLD))) {
         assert(VecSimType_sizeof(vecType));
         assert(dataSize);
         this->vectors = new (this->allocator) DataBlocksContainer(
diff --git a/tests/unit/test_allocator.cpp b/tests/unit/test_allocator.cpp
index 13e25ad31..3738d7b55 100644
--- a/tests/unit/test_allocator.cpp
+++ b/tests/unit/test_allocator.cpp
@@ -159,9 +159,7 @@ TYPED_TEST(IndexAllocatorTest, test_bf_index_block_size_1) {
     int deleteCommandAllocationDelta = allocator->getAllocationSize() - before;
     expectedAllocationDelta -=
         sizeof(TEST_DATA_T) * dim + vecsimAllocationOverhead; // Free the vector in the vector block
-    expectedAllocationDelta -= sizeof(labelType);             // resize idToLabelMapping
-    expectedAllocationDelta -=
-        sizeof(std::pair<labelType, idType>) + vecsimAllocationOverhead; // remove one label:id pair
+    // idToLabelMapping and label:id should not change since count < minimalShrinkCapacity
 
     // Assert that the reclaiming of memory did occur, and it is limited, as some STL
     // collection allocate additional structures for their internal implementation.
@@ -184,12 +182,13 @@ TYPED_TEST(IndexAllocatorTest, test_bf_index_block_size_1) {
         (sizeof(DataBlock) + vecsimAllocationOverhead); // Free the vector block
     expectedAllocationDelta -=
         sizeof(DataBlock *) + vecsimAllocationOverhead; // remove from vectorBlocks vector
+    // We resize maps containers to 0.
     expectedAllocationDelta -=
-        sizeof(labelType) + vecsimAllocationOverhead; // resize idToLabelMapping
+        2 * sizeof(labelType) + vecsimAllocationOverhead; // remove two idToLabelMapping
+    expectedAllocationDelta -= 2 * sizeof(std::pair<labelType, idType>) +
+                               vecsimAllocationOverhead; // remove two label:id pair
     expectedAllocationDelta -= (sizeof(TEST_DATA_T) * dim +
                                 vecsimAllocationOverhead); // Free the vector in the vector block
-    expectedAllocationDelta -=
-        sizeof(std::pair<labelType, idType>) + vecsimAllocationOverhead; // remove one label:id pair
 
     // Assert that the reclaiming of memory did occur, and it is limited, as some STL
     // collection allocate additional structures for their internal implementation.

From af1f98756561c2385db985722db12883eb70d31d Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Thu, 28 Aug 2025 14:32:59 +0000
Subject: [PATCH 02/11] same solution brute force and hnsw

---
 .../algorithms/brute_force/brute_force.h      |  68 +++++++-----
 src/VecSim/algorithms/hnsw/hnsw.h             |  77 +++++++++----
 src/VecSim/algorithms/hnsw/hnsw_tiered.h      |   2 +-
 .../containers/data_blocks_container.cpp      |   4 +-
 src/VecSim/containers/data_blocks_container.h |   4 +-
 src/VecSim/vec_sim_common.h                   |  10 +-
 src/VecSim/vec_sim_index.h                    |  20 +---
 tests/unit/test_allocator.cpp                 | 104 +++++++++++-------
 tests/unit/test_common.cpp                    |  52 +++++++--
 tests/unit/test_hnsw.cpp                      |  29 +++++
 10 files changed, 247 insertions(+), 123 deletions(-)

diff --git a/src/VecSim/algorithms/brute_force/brute_force.h b/src/VecSim/algorithms/brute_force/brute_force.h
index 233f22d24..72518a8ca 100644
--- a/src/VecSim/algorithms/brute_force/brute_force.h
+++ b/src/VecSim/algorithms/brute_force/brute_force.h
@@ -89,36 +89,51 @@ class BruteForceIndex : public VecSimIndexAbstract<DataType, DistType> {
     // Private internal function that implements generic single vector deletion.
     virtual void removeVector(idType id);
 
-    void growByBlock() {
-        idToLabelMapping.resize(idToLabelMapping.size() + this->blockSize);
+    void resizeIndexCommon(size_t new_max_elements) {
+        assert(new_max_elements % this->blockSize == 0 &&
+               "new_max_elements must be a multiple of blockSize");
+        this->log(VecSimCommonStrings::LOG_VERBOSE_STRING, "Resizing FLAT index from %zu to %zu",
+                  idToLabelMapping.capacity(), new_max_elements);
+        assert(idToLabelMapping.capacity() == idToLabelMapping.size());
+        idToLabelMapping.resize(new_max_elements);
         idToLabelMapping.shrink_to_fit();
-        resizeLabelLookup(idToLabelMapping.size());
+        assert(idToLabelMapping.capacity() == idToLabelMapping.size());
+        resizeLabelLookup(new_max_elements);
     }
 
-    void shrinkIfUnderUtilized() {
-        // Reset everything if the index is empty.
-        // TODO: initial capacity
-        if (indexSize() == 0) {
-            idToLabelMapping = vecsim_stl::vector<labelType>(this->allocator);
-            resizeLabelLookup(0);
-            return;
-        }
-        size_t curr_capacity = idToLabelMapping.capacity();
-        assert(idToLabelMapping.size() == curr_capacity); // assuming all entries are valid
+    void growByBlock() {
+        assert(indexCapacity() == idToLabelMapping.capacity());
+        assert(indexCapacity() % this->blockSize == 0);
+        assert(indexCapacity() == indexSize() - 1);
+        assert((dynamic_cast<DataBlocksContainer *>(this->vectors)->numBlocks() ==
+                (indexSize() - 1) / this->blockSize));
 
-        if (curr_capacity <= this->getMinimalShrinkCapacity())
-            return;
+        resizeIndexCommon(indexCapacity() + this->blockSize);
+    }
+
+    void shrinkByBlock() {
+        // assert(indexCapacity() > 0); // should not be called when index is empty
 
-        assert(indexSize() <= idToLabelMapping.size());
-        if (indexSize() < static_cast<size_t>(this->getShrinkThreshold() * curr_capacity)) {
-            size_t new_size = indexSize() + this->blockSize;
-            assert(new_size < curr_capacity);
-            idToLabelMapping.resize(new_size);
-            idToLabelMapping.shrink_to_fit();
-            assert(idToLabelMapping.size() == new_size);
-            assert(idToLabelMapping.capacity() == new_size);
+        // // remove a block size of labels.
+        // assert(idToLabelMapping.size() >= this->blockSize);
+        // idToLabelMapping.resize(idToLabelMapping.size() - this->blockSize);
+        // idToLabelMapping.shrink_to_fit();
+        // resizeLabelLookup(idToLabelMapping.size());
+        assert(indexCapacity() >= this->blockSize);
+        assert(indexCapacity() % this->blockSize == 0);
+        assert(dynamic_cast<DataBlocksContainer *>(this->vectors)->numBlocks() ==
+               indexSize() / this->blockSize);
 
-            resizeLabelLookup(new_size);
+        if (indexSize() == 0) {
+            resizeIndexCommon(0);
+        } else if (indexCapacity() >= (indexSize() + 2 * this->blockSize)) {
+
+            assert(indexCapacity() == idToLabelMapping.capacity());
+            assert(idToLabelMapping.size() == idToLabelMapping.capacity());
+            assert(dynamic_cast<DataBlocksContainer *>(this->vectors)->size() +
+                       2 * this->blockSize ==
+                   idToLabelMapping.capacity());
+            resizeIndexCommon(indexCapacity() - this->blockSize);
         }
     }
 
@@ -203,7 +218,10 @@ void BruteForceIndex<DataType, DistType>::removeVector(idType id_to_delete) {
     }
     this->vectors->removeElement(last_idx);
 
-    shrinkIfUnderUtilized();
+    // If we reached to a multiply of a block size, we can reduce meta data structures size.
+    if (this->count % this->blockSize == 0) {
+        shrinkByBlock();
+    }
 }
 
 template <typename DataType, typename DistType>
diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h
index 996747d80..90edf77d3 100644
--- a/src/VecSim/algorithms/hnsw/hnsw.h
+++ b/src/VecSim/algorithms/hnsw/hnsw.h
@@ -235,6 +235,11 @@ class HNSWIndex : public VecSimIndexAbstract<DataType, DistType>,
     double getEpsilon() const;
     size_t indexSize() const override;
     size_t indexCapacity() const override;
+    /**
+     * Checks if the index capacity is full to hint the caller a resize is needed.
+     * @note Must be called with indexDataGuard locked.
+     */
+    size_t isCapacityFull() const;
     size_t getEfConstruction() const;
     size_t getM() const;
     size_t getMaxLevel() const;
@@ -349,6 +354,11 @@ size_t HNSWIndex<DataType, DistType>::indexCapacity() const {
     return this->maxElements;
 }
 
+template <typename DataType, typename DistType>
+size_t HNSWIndex<DataType, DistType>::isCapacityFull() const {
+    return indexSize() == this->maxElements;
+}
+
 template <typename DataType, typename DistType>
 size_t HNSWIndex<DataType, DistType>::getEfConstruction() const {
     return this->efConstruction;
@@ -1281,31 +1291,59 @@ template <typename DataType, typename DistType>
 void HNSWIndex<DataType, DistType>::resizeIndexCommon(size_t new_max_elements) {
     assert(new_max_elements % this->blockSize == 0 &&
            "new_max_elements must be a multiple of blockSize");
-    this->log(VecSimCommonStrings::LOG_VERBOSE_STRING,
-              "Updating HNSW index capacity from %zu to %zu", this->maxElements, new_max_elements);
+    this->log(VecSimCommonStrings::LOG_VERBOSE_STRING, "Resizing HNSW index from %zu to %zu",
+              idToMetaData.capacity(), new_max_elements);
     resizeLabelLookup(new_max_elements);
     visitedNodesHandlerPool.resize(new_max_elements);
+    assert(idToMetaData.capacity() == idToMetaData.size());
     idToMetaData.resize(new_max_elements);
     idToMetaData.shrink_to_fit();
-
-    maxElements = new_max_elements;
+    assert(idToMetaData.capacity() == idToMetaData.size());
 }
 
 template <typename DataType, typename DistType>
 void HNSWIndex<DataType, DistType>::growByBlock() {
-    size_t new_max_elements = maxElements + this->blockSize;
+    assert(this->maxElements % this->blockSize == 0);
+    assert(this->maxElements == indexSize());
+    assert(graphDataBlocks.size() == this->maxElements / this->blockSize);
+    assert(idToMetaData.capacity() == maxElements ||
+           idToMetaData.capacity() == maxElements + this->blockSize);
+
+    this->log(VecSimCommonStrings::LOG_VERBOSE_STRING,
+              "Updating HNSW index capacity from %zu to %zu", maxElements,
+              maxElements + this->blockSize);
+    maxElements += this->blockSize;
+
     graphDataBlocks.emplace_back(this->blockSize, this->elementGraphDataSize, this->allocator);
 
-    resizeIndexCommon(new_max_elements);
+    if (idToMetaData.capacity() == indexSize()) {
+        resizeIndexCommon(maxElements);
+    }
 }
 
 template <typename DataType, typename DistType>
 void HNSWIndex<DataType, DistType>::shrinkByBlock() {
-    assert(maxElements >= this->blockSize);
-    size_t new_max_elements = maxElements - this->blockSize;
-    graphDataBlocks.pop_back();
+    assert(this->maxElements >= this->blockSize);
+    assert(this->maxElements % this->blockSize == 0);
+
+    if (indexSize() % this->blockSize == 0) {
+        this->log(VecSimCommonStrings::LOG_VERBOSE_STRING,
+                  "Updating HNSW index capacity from %zu to %zu", maxElements,
+                  maxElements - this->blockSize);
+        graphDataBlocks.pop_back();
+        assert(graphDataBlocks.size() == indexSize() / this->blockSize);
+
+        // assuming idToMetaData reflects the capacity of the heavy reallocation containers.
+        if (indexSize() == 0) {
+            resizeIndexCommon(0);
+        } else if (idToMetaData.capacity() >= (indexSize() + 2 * this->blockSize)) {
+            assert(this->maxElements + this->blockSize == idToMetaData.capacity());
+            resizeIndexCommon(idToMetaData.capacity() - this->blockSize);
+        }
 
-    resizeIndexCommon(new_max_elements);
+        // Take the lower bound into account.
+        maxElements -= this->blockSize;
+    }
 }
 
 template <typename DataType, typename DistType>
@@ -1660,9 +1698,7 @@ void HNSWIndex<DataType, DistType>::removeAndSwap(idType internalId) {
     // If we need to free a complete block and there is at least one block between the
     // capacity and the size.
     this->vectors->removeElement(curElementCount);
-    if (curElementCount % this->blockSize == 0) {
-        shrinkByBlock();
-    }
+    shrinkByBlock();
 }
 
 template <typename DataType, typename DistType>
@@ -1738,6 +1774,9 @@ void HNSWIndex<DataType, DistType>::removeVectorInPlace(const idType element_int
 template <typename DataType, typename DistType>
 HNSWAddVectorState HNSWIndex<DataType, DistType>::storeNewElement(labelType label,
                                                                   const void *vector_data) {
+    if (isCapacityFull()) {
+        growByBlock();
+    }
     HNSWAddVectorState state{};
 
     // Choose randomly the maximum level in which the new element will be in the index.
@@ -1765,13 +1804,11 @@ HNSWAddVectorState HNSWIndex<DataType, DistType>::storeNewElement(labelType labe
         throw e;
     }
 
-    if (indexSize() > indexCapacity()) {
-        growByBlock();
-    } else if (state.newElementId % this->blockSize == 0) {
-        // If we had an initial capacity, we might have to allocate new blocks for the graph data.
-        this->graphDataBlocks.emplace_back(this->blockSize, this->elementGraphDataSize,
-                                           this->allocator);
-    }
+    // else if (state.newElementId % this->blockSize == 0) {
+    //     // If we had an initial capacity, we might have to allocate new blocks for the graph
+    //     data. this->graphDataBlocks.emplace_back(this->blockSize, this->elementGraphDataSize,
+    //                                        this->allocator);
+    // }
 
     // Insert the new element to the data block
     this->vectors->addElement(vector_data, state.newElementId);
diff --git a/src/VecSim/algorithms/hnsw/hnsw_tiered.h b/src/VecSim/algorithms/hnsw/hnsw_tiered.h
index 46b3b1244..ec61325fe 100644
--- a/src/VecSim/algorithms/hnsw/hnsw_tiered.h
+++ b/src/VecSim/algorithms/hnsw/hnsw_tiered.h
@@ -437,7 +437,7 @@ void TieredHNSWIndex<DataType, DistType>::insertVectorToHNSW(
     this->mainIndexGuard.lock_shared();
     hnsw_index->lockIndexDataGuard();
     // Check if resizing is needed for HNSW index (requires write lock).
-    if (hnsw_index->indexCapacity() == hnsw_index->indexSize()) {
+    if (hnsw_index->isCapacityFull()) {
         // Release the inner HNSW data lock before we re-acquire the global HNSW lock.
         this->mainIndexGuard.unlock_shared();
         hnsw_index->unlockIndexDataGuard();
diff --git a/src/VecSim/containers/data_blocks_container.cpp b/src/VecSim/containers/data_blocks_container.cpp
index 98240e744..062f69cf7 100644
--- a/src/VecSim/containers/data_blocks_container.cpp
+++ b/src/VecSim/containers/data_blocks_container.cpp
@@ -64,6 +64,8 @@ std::unique_ptr<RawDataContainer::Iterator> DataBlocksContainer::getIterator() c
     return std::make_unique<DataBlocksContainer::Iterator>(*this);
 }
 
+size_t DataBlocksContainer::numBlocks() const { return this->blocks.size(); }
+
 #ifdef BUILD_TESTS
 void DataBlocksContainer::saveVectorsData(std::ostream &output) const {
     // Save data blocks
@@ -114,8 +116,6 @@ void DataBlocksContainer::restoreBlocks(std::istream &input, size_t num_vectors,
 
 void DataBlocksContainer::shrinkToFit() { this->blocks.shrink_to_fit(); }
 
-size_t DataBlocksContainer::numBlocks() const { return this->blocks.size(); }
-
 #endif
 /********************************** Iterator API ************************************************/
 
diff --git a/src/VecSim/containers/data_blocks_container.h b/src/VecSim/containers/data_blocks_container.h
index 2e6e7a542..fca9f3884 100644
--- a/src/VecSim/containers/data_blocks_container.h
+++ b/src/VecSim/containers/data_blocks_container.h
@@ -28,8 +28,10 @@ class DataBlocksContainer : public VecsimBaseObject, public RawDataContainer {
                         std::shared_ptr<VecSimAllocator> allocator, unsigned char alignment = 0);
     ~DataBlocksContainer();
 
+    // Number of elements in the container.
     size_t size() const override;
 
+    // Number of blocks allocated.
     size_t capacity() const;
 
     size_t blockSize() const;
@@ -46,13 +48,13 @@ class DataBlocksContainer : public VecsimBaseObject, public RawDataContainer {
 
     std::unique_ptr<RawDataContainer::Iterator> getIterator() const override;
 
+    size_t numBlocks() const;
 #ifdef BUILD_TESTS
     void saveVectorsData(std::ostream &output) const override;
     // Use that in deserialization when file was created with old version (v3) that serialized
     // the blocks themselves and not just thw raw vector data.
     void restoreBlocks(std::istream &input, size_t num_vectors, Serializer::EncodingVersion);
     void shrinkToFit();
-    size_t numBlocks() const;
 #endif
 
     class Iterator : public RawDataContainer::Iterator {
diff --git a/src/VecSim/vec_sim_common.h b/src/VecSim/vec_sim_common.h
index 643155b80..1d513498c 100644
--- a/src/VecSim/vec_sim_common.h
+++ b/src/VecSim/vec_sim_common.h
@@ -147,11 +147,11 @@ typedef struct {
 } HNSWParams;
 
 typedef struct {
-    VecSimType type;        // Datatype to index.
-    size_t dim;             // Vector's dimension.
-    VecSimMetric metric;    // Distance metric to use in the index.
-    bool multi;             // Determines if the index should multi-index or not.
-    size_t initialCapacity; // Deprecated.
+    VecSimType type;     // Datatype to index.
+    size_t dim;          // Vector's dimension.
+    VecSimMetric metric; // Distance metric to use in the index.
+    bool multi;          // Determines if the index should multi-index or not.
+    size_t initialCapacity;
     size_t blockSize;
 } BFParams;
 
diff --git a/src/VecSim/vec_sim_index.h b/src/VecSim/vec_sim_index.h
index 9c485673c..e46635ca0 100644
--- a/src/VecSim/vec_sim_index.h
+++ b/src/VecSim/vec_sim_index.h
@@ -80,20 +80,6 @@ struct VecSimIndexAbstract : public VecSimIndexInterface {
     bool isMulti;                   // Determines if the index should multi-index or not.
     void *logCallbackCtx;           // Context for the log callback.
 
-    // Relaxed shrink threshold to decouple shrinking from growth logic and prevent
-    // oscillations when repeatedly adding/removing vectors with IDs divisible by blockSize.
-    // When count < SHRINK_THRESHOLD * capacity, containers may shrink.
-    // Note: Not all containers use this threshold - implementation depends on container type
-    // and algorithm-specific requirements.
-    static constexpr float SHRINK_THRESHOLD = 0.75f;
-    // To ensure the new capacity is not larger than the current capacity, i.e prevent shrinking from
-    // actually increasing the capacity, we need:
-    // new_capacity = count + blockSize < capacity
-    // Since count < SHRINK_THRESHOLD * capacity (worst case: count ≈ SHRINK_THRESHOLD * capacity):
-    // SHRINK_THRESHOLD * capacity + blockSize < capacity
-    // Therefore: capacity > blockSize / (1 - SHRINK_THRESHOLD) = 4 * blockSize
-    // Cached value to avoid repeated calculations.
-    const size_t minimalShrinkCapacity;
     RawDataContainer *vectors; // The raw vectors data container.
 
     /**
@@ -111,9 +97,6 @@ struct VecSimIndexAbstract : public VecSimIndexInterface {
         return info;
     }
 
-    float getShrinkThreshold() const { return SHRINK_THRESHOLD; }
-    size_t getMinimalShrinkCapacity() const { return minimalShrinkCapacity; }
-
 public:
     /**
      * @brief Construct a new Vec Sim Index object
@@ -125,8 +108,7 @@ struct VecSimIndexAbstract : public VecSimIndexInterface {
           dataSize(params.dataSize), metric(params.metric),
           blockSize(params.blockSize ? params.blockSize : DEFAULT_BLOCK_SIZE),
           indexCalculator(components.indexCalculator), preprocessors(components.preprocessors),
-          lastMode(EMPTY_MODE), isMulti(params.multi), logCallbackCtx(params.logCtx),
-          minimalShrinkCapacity(static_cast<size_t>(blockSize / (1.0f - SHRINK_THRESHOLD))) {
+          lastMode(EMPTY_MODE), isMulti(params.multi), logCallbackCtx(params.logCtx) {
         assert(VecSimType_sizeof(vecType));
         assert(dataSize);
         this->vectors = new (this->allocator) DataBlocksContainer(
diff --git a/tests/unit/test_allocator.cpp b/tests/unit/test_allocator.cpp
index 3738d7b55..9f260f716 100644
--- a/tests/unit/test_allocator.cpp
+++ b/tests/unit/test_allocator.cpp
@@ -372,70 +372,91 @@ TYPED_TEST(IndexAllocatorTest, test_hnsw_reclaim_memory) {
     // Add vectors up to the size of a whole block, and calculate the total memory delta.
     size_t block_size = hnswIndex->basicInfo().blockSize;
 
-    size_t accumulated_mem_delta = allocator->getAllocationSize();
+    size_t prev_bucket_count = hnswIndex->labelLookup.bucket_count();
     for (size_t i = 0; i < block_size; i++) {
         GenerateAndAddVector<TEST_DATA_T>(hnswIndex, d, i, i);
     }
     // Get the memory delta after adding the block.
-    accumulated_mem_delta = allocator->getAllocationSize() - accumulated_mem_delta;
+    size_t one_block_mem_delta = allocator->getAllocationSize() - initial_memory_size;
+
+    size_t one_block_buckets = hnswIndex->labelLookup.bucket_count();
+    // @param expected_size - The expected number of elements in the index.
+    // @param expected_data_container_blocks - The expected number of blocks in the data containers.
+    // @param expected_map_containers_capacity - The expected capacity of the map containers in
+    // number of elements.
+    auto verify_containers_size = [&](size_t expected_size, size_t expected_data_container_blocks,
+                                      size_t expected_map_containers_size) {
+        SCOPED_TRACE("Verifying containers size for size " + std::to_string(expected_size));
+        ASSERT_EQ(hnswIndex->indexSize(), expected_size);
+        ASSERT_EQ(hnswIndex->indexCapacity(), expected_data_container_blocks * block_size);
+        ASSERT_EQ(hnswIndex->graphDataBlocks.size(), expected_data_container_blocks);
+        ASSERT_EQ(dynamic_cast<DataBlocksContainer *>(hnswIndex->vectors)->numBlocks(),
+                  expected_data_container_blocks);
+        ASSERT_EQ(hnswIndex->vectors->size(), expected_size);
+
+        ASSERT_EQ(hnswIndex->idToMetaData.capacity(), expected_map_containers_size);
+        ASSERT_EQ(hnswIndex->idToMetaData.size(), expected_map_containers_size);
+        ASSERT_GE(hnswIndex->labelLookup.bucket_count(), expected_map_containers_size);
+        // Also validate that there are no unidirectional connections (these add memory to the
+        // incoming edges sets).
+        ASSERT_EQ(hnswIndex->checkIntegrity().unidirectional_connections, 0);
+    };
 
     // Validate that a single block exists.
-    ASSERT_EQ(hnswIndex->indexSize(), block_size);
-    ASSERT_EQ(hnswIndex->indexCapacity(), block_size);
-    ASSERT_EQ(allocator->getAllocationSize(), initial_memory_size + accumulated_mem_delta);
-    // Also validate that there are no unidirectional connections (these add memory to the incoming
-    // edges sets).
-    ASSERT_EQ(hnswIndex->checkIntegrity().unidirectional_connections, 0);
+    verify_containers_size(block_size, 1, block_size);
 
+    size_t one_block_mem = allocator->getAllocationSize();
     // Add another vector, expect resizing of the index to contain two blocks.
-    size_t prev_bucket_count = hnswIndex->labelLookup.bucket_count();
-    size_t mem_delta = allocator->getAllocationSize();
     GenerateAndAddVector<TEST_DATA_T>(hnswIndex, d, block_size, block_size);
-    mem_delta = allocator->getAllocationSize() - mem_delta;
-
-    ASSERT_EQ(hnswIndex->indexSize(), block_size + 1);
-    ASSERT_EQ(hnswIndex->indexCapacity(), 2 * block_size);
-    ASSERT_EQ(hnswIndex->checkIntegrity().unidirectional_connections, 0);
+    verify_containers_size(block_size + 1, 2, 2 * block_size);
+    size_t mem_delta = allocator->getAllocationSize() - one_block_mem;
 
     // Compute the expected memory allocation due to the last vector insertion.
     size_t vec_max_level = hnswIndex->getGraphDataByInternalId(block_size)->toplevel;
-    size_t expected_mem_delta =
-        (vec_max_level + 1) * (sizeof(vecsim_stl::vector<idType>) + vecsimAllocationOverhead) +
-        hashTableNodeSize;
+    size_t last_vec_graph_data_mem =
+        (sizeof(vecsim_stl::vector<idType>) + vecsimAllocationOverhead) + hashTableNodeSize;
     if (vec_max_level > 0) {
-        expected_mem_delta += hnswIndex->levelDataSize * vec_max_level + vecsimAllocationOverhead;
+        last_vec_graph_data_mem +=
+            hnswIndex->levelDataSize * vec_max_level + vecsimAllocationOverhead;
     }
+    size_t expected_mem_delta = last_vec_graph_data_mem;
     // Also account for all the memory allocation caused by the resizing that this vector triggered
     // except for the bucket count of the labels_lookup hash table that is calculated separately.
+    // Calculate the expected memory delta for adding a block.
+    size_t data_containers_block_mem =
+        2 * (sizeof(DataBlock) + vecsimAllocationOverhead) + hnswIndex->getAlignment();
     size_t size_total_data_per_element = hnswIndex->elementGraphDataSize + hnswIndex->dataSize;
+    data_containers_block_mem += size_total_data_per_element * block_size;
+    // account for idToMetaData and visitedNodesHandlerPool entries.
     expected_mem_delta +=
-        (sizeof(tag_t) + sizeof(labelType) + sizeof(elementFlags) + size_total_data_per_element) *
-        block_size;
+        (sizeof(tag_t) + sizeof(ElementMetaData)) * block_size + data_containers_block_mem;
+    // Account for the allocation of a new bucket in the labels_lookup hash table.
     expected_mem_delta +=
-        (hnswIndex->labelLookup.bucket_count() - prev_bucket_count) * sizeof(size_t);
+        (hnswIndex->labelLookup.bucket_count() - one_block_buckets) * sizeof(size_t);
     // New blocks allocated - 1 aligned block for vectors and 1 unaligned block for graph data.
-    auto *data_blocks = dynamic_cast<DataBlocksContainer *>(hnswIndex->vectors);
-    expected_mem_delta +=
-        2 * (sizeof(DataBlock) + vecsimAllocationOverhead) + hnswIndex->getAlignment();
-    expected_mem_delta += (data_blocks->capacity() - data_blocks->numBlocks()) * sizeof(DataBlock);
-    expected_mem_delta +=
-        (hnswIndex->graphDataBlocks.capacity() - hnswIndex->graphDataBlocks.size()) *
-        sizeof(DataBlock);
 
     ASSERT_EQ(expected_mem_delta, mem_delta);
 
-    // Remove the last vector, expect resizing back to a single block, and return to the previous
-    // memory consumption.
+    // Remove the last vector, expect datablocks containers (vectors buffer and graph data) resizing
+    // back to a single block. Index-size container such as id to label mapping, are only freed when
+    // there two empty blocks.
+    size_t before_delete_mem = allocator->getAllocationSize();
+    size_t graph_data_blocks_capacity = hnswIndex->graphDataBlocks.capacity();
+    auto vectors_blocks = dynamic_cast<DataBlocksContainer *>(hnswIndex->vectors);
+    size_t vectors_blocks_capacity = vectors_blocks->capacity();
     VecSimIndex_DeleteVector(hnswIndex, block_size);
-    ASSERT_EQ(hnswIndex->indexSize(), block_size);
-    ASSERT_EQ(hnswIndex->indexCapacity(), block_size);
-    ASSERT_EQ(hnswIndex->checkIntegrity().unidirectional_connections, 0);
-    size_t expected_allocation_size = initial_memory_size + accumulated_mem_delta;
-    expected_allocation_size +=
-        (data_blocks->capacity() - data_blocks->numBlocks()) * sizeof(DataBlock);
-    expected_allocation_size +=
-        (hnswIndex->graphDataBlocks.capacity() - hnswIndex->graphDataBlocks.size()) *
-        sizeof(DataBlock);
+    verify_containers_size(block_size, 1, 2 * block_size);
+
+    size_t expected_allocation_size =
+        before_delete_mem - last_vec_graph_data_mem - hnswIndex->getAlignment();
+    // Free the buffer of the last block in both data containers.
+    expected_allocation_size -=
+        size_total_data_per_element * block_size + 2 * vecsimAllocationOverhead;
+    expected_allocation_size -=
+        (graph_data_blocks_capacity - hnswIndex->graphDataBlocks.capacity()) *
+        (sizeof(DataBlock) + vecsimAllocationOverhead);
+    expected_allocation_size -= (vectors_blocks_capacity - vectors_blocks->capacity()) *
+                                (sizeof(DataBlock) + vecsimAllocationOverhead);
     ASSERT_EQ(allocator->getAllocationSize(), expected_allocation_size);
 
     // Remove the rest of the vectors, and validate that the memory returns to its initial state.
@@ -450,7 +471,8 @@ TYPED_TEST(IndexAllocatorTest, test_hnsw_reclaim_memory) {
     size_t hash_table_memory = hnswIndex->labelLookup.bucket_count() * sizeof(size_t);
     // Data block vectors do not shrink on resize so extra memory is expected.
     size_t block_vectors_memory =
-        sizeof(DataBlock) * (hnswIndex->graphDataBlocks.capacity() + data_blocks->capacity()) +
+        sizeof(DataBlock) * (hnswIndex->graphDataBlocks.capacity() +
+                             dynamic_cast<DataBlocksContainer *>(hnswIndex->vectors)->capacity()) +
         2 * vecsimAllocationOverhead;
     // Current memory should be back as it was initially. The label_lookup hash table is an
     // exception, since in some platforms, empty buckets remain even when the capacity is set to
diff --git a/tests/unit/test_common.cpp b/tests/unit/test_common.cpp
index 759aee001..f4e1405f1 100644
--- a/tests/unit/test_common.cpp
+++ b/tests/unit/test_common.cpp
@@ -570,16 +570,50 @@ TEST(CommonAPITest, testlogTieredIndex) {
     GenerateAndAddVector<float>(tiered_index, 4, 1);
     mock_thread_pool.thread_iteration();
     tiered_index->deleteVector(1);
-    ASSERT_EQ(log.logBuffer.size(), 4);
-    ASSERT_EQ(log.logBuffer[0],
-              "verbose: " + log.prefix + "Updating HNSW index capacity from 0 to 1024");
-    ASSERT_EQ(log.logBuffer[1],
+    auto buffer_as_string = [&]() {
+        std::string buffer;
+        for (size_t i = 0; i < log.logBuffer.size(); i++) {
+            buffer += log.logBuffer[i] + "\n";
+        }
+        return buffer;
+    };
+    ASSERT_EQ(log.logBuffer.size(), 8) << buffer_as_string();
+    size_t log_iter = 0;
+    ASSERT_EQ(log.logBuffer[log_iter++],
+              "verbose: " + log.prefix + "Resizing FLAT index from 0 to 1024")
+        << "failed at log index:" << log_iter - 1 << "." << std::endl
+        << "expected log: " << buffer_as_string();
+    ASSERT_EQ(log.logBuffer[log_iter++],
+              "verbose: " + log.prefix + "Updating HNSW index capacity from 0 to 1024")
+        << "failed at log index:" << log_iter - 1 << std::endl
+        << "expected log: " << buffer_as_string();
+    ASSERT_EQ(log.logBuffer[log_iter++],
+              "verbose: " + log.prefix + "Resizing HNSW index from 0 to 1024")
+        << "failed at log index:" << log_iter - 1 << std::endl
+        << "expected log: " << buffer_as_string();
+    ASSERT_EQ(log.logBuffer[log_iter++],
+              "verbose: " + log.prefix + "Resizing FLAT index from 1024 to 0")
+        << "failed at log index:" << log_iter - 1 << "." << std::endl
+        << "expected log: " << buffer_as_string();
+
+    ASSERT_EQ(log.logBuffer[log_iter++],
               "verbose: " + log.prefix +
-                  "Tiered HNSW index GC: there are 1 ready swap jobs. Start executing 1 swap jobs");
-    ASSERT_EQ(log.logBuffer[2],
-              "verbose: " + log.prefix + "Updating HNSW index capacity from 1024 to 0");
-    ASSERT_EQ(log.logBuffer[3],
-              "verbose: " + log.prefix + "Tiered HNSW index GC: done executing 1 swap jobs");
+                  "Tiered HNSW index GC: there are 1 ready swap jobs. Start executing 1 swap jobs")
+        << "failed at log index:" << log_iter - 1 << std::endl
+        << "expected log: " << buffer_as_string();
+    ASSERT_EQ(log.logBuffer[log_iter++],
+              "verbose: " + log.prefix + "Updating HNSW index capacity from 1024 to 0")
+        << "failed at log index:" << log_iter - 1 << std::endl
+        << "expected log: " << buffer_as_string();
+    ASSERT_EQ(log.logBuffer[log_iter++],
+              "verbose: " + log.prefix + "Resizing HNSW index from 1024 to 0")
+        << "failed at log index:" << log_iter - 1 << std::endl
+        << "expected log: " << buffer_as_string();
+
+    ASSERT_EQ(log.logBuffer[log_iter++],
+              "verbose: " + log.prefix + "Tiered HNSW index GC: done executing 1 swap jobs")
+        << "failed at log index:" << log_iter - 1 << std::endl
+        << "expected log: " << buffer_as_string();
 }
 
 TEST(CommonAPITest, NormalizeBfloat16) {
diff --git a/tests/unit/test_hnsw.cpp b/tests/unit/test_hnsw.cpp
index 0f67073ba..1b1d4325c 100644
--- a/tests/unit/test_hnsw.cpp
+++ b/tests/unit/test_hnsw.cpp
@@ -57,6 +57,35 @@ TYPED_TEST(HNSWTest, hnsw_vector_add_test) {
     VecSimIndex_Free(index);
 }
 
+TYPED_TEST(HNSWTest, hnsw_vector_add_test2) {
+    size_t dim = 4;
+    size_t bs = 1;
+    HNSWParams params = {
+        .dim = dim, .metric = VecSimMetric_L2, .blockSize = bs, .M = 16, .efConstruction = 200};
+
+    VecSimIndex *index = this->CreateNewIndex(params);
+
+    ASSERT_EQ(VecSimIndex_IndexSize(index), 0);
+    size_t num_docs = bs * 3;
+    for (size_t i = 0; i < num_docs; i++) {
+        GenerateAndAddVector<TEST_DATA_T>(index, dim, i);
+    }
+
+    // delete the last vector
+    for (size_t i = 0; i < 2 * bs; i++) {
+        VecSimIndex_DeleteVector(index, num_docs - i - 1);
+    }
+    // add 1 vec
+    GenerateAndAddVector<TEST_DATA_T>(index, dim, num_docs);
+
+    // override last vector
+    GenerateAndAddVector<TEST_DATA_T>(index, dim, num_docs - 1);
+    // try to add it again
+    GenerateAndAddVector<TEST_DATA_T>(index, dim, num_docs - 1);
+    ASSERT_EQ(VecSimIndex_IndexSize(index), num_docs);
+    VecSimIndex_Free(index);
+}
+
 TYPED_TEST(HNSWTest, hnsw_blob_sanity_test) {
     size_t dim = 4;
     size_t bs = 1;

From 4a826b282a042119eaf0c994b40144254416c8fa Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Thu, 28 Aug 2025 15:26:06 +0000
Subject: [PATCH 03/11] align flat growby block with hnsw

---
 src/VecSim/algorithms/brute_force/brute_force.h | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/src/VecSim/algorithms/brute_force/brute_force.h b/src/VecSim/algorithms/brute_force/brute_force.h
index 72518a8ca..1a9a738fc 100644
--- a/src/VecSim/algorithms/brute_force/brute_force.h
+++ b/src/VecSim/algorithms/brute_force/brute_force.h
@@ -104,9 +104,9 @@ class BruteForceIndex : public VecSimIndexAbstract<DataType, DistType> {
     void growByBlock() {
         assert(indexCapacity() == idToLabelMapping.capacity());
         assert(indexCapacity() % this->blockSize == 0);
-        assert(indexCapacity() == indexSize() - 1);
+        assert(indexCapacity() == indexSize());
         assert((dynamic_cast<DataBlocksContainer *>(this->vectors)->numBlocks() ==
-                (indexSize() - 1) / this->blockSize));
+                (indexSize()) / this->blockSize));
 
         resizeIndexCommon(indexCapacity() + this->blockSize);
     }
@@ -175,14 +175,15 @@ BruteForceIndex<DataType, DistType>::BruteForceIndex(
 
 template <typename DataType, typename DistType>
 void BruteForceIndex<DataType, DistType>::appendVector(const void *vector_data, labelType label) {
+    // Resize the index meta data structures if needed
+    if (indexSize() >= indexCapacity()) {
+        growByBlock();
+    }
+
     auto processed_blob = this->preprocessForStorage(vector_data);
     // Give the vector new id and increase count.
     idType id = this->count++;
 
-    // Resize the index meta data structures if needed
-    if (indexSize() > indexCapacity()) {
-        growByBlock();
-    }
     // add vector data to vector raw data container
     this->vectors->addElement(processed_blob.get(), id);
 

From f9aed70f9e3cf7cbdd973dff23b6b2c485149b75 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sat, 30 Aug 2025 17:00:59 +0000
Subject: [PATCH 04/11] resisng tests for BF

---
 tests/unit/test_bruteforce.cpp | 163 ++++++++++++++++++++++++++++-----
 1 file changed, 139 insertions(+), 24 deletions(-)

diff --git a/tests/unit/test_bruteforce.cpp b/tests/unit/test_bruteforce.cpp
index a316324ea..56307bf88 100644
--- a/tests/unit/test_bruteforce.cpp
+++ b/tests/unit/test_bruteforce.cpp
@@ -111,51 +111,166 @@ TYPED_TEST(BruteForceTest, brute_force_vector_update_test) {
 
 TYPED_TEST(BruteForceTest, resize_and_align_index) {
     size_t dim = 4;
-    size_t n = 14;
     size_t blockSize = 10;
+    size_t curr_label = 0;
 
     BFParams params = {.dim = dim, .metric = VecSimMetric_L2, .blockSize = blockSize};
 
     VecSimIndex *index = this->CreateNewIndex(params);
 
     BruteForceIndex<TEST_DATA_T, TEST_DIST_T> *bf_index = this->CastToBF(index);
-    ASSERT_EQ(VecSimIndex_IndexSize(index), 0);
+    auto verify_index_size = [&](size_t expected_size, size_t expected_capacity, std::string msg) {
+        SCOPED_TRACE("verify_index_size: " + msg);
+        ASSERT_EQ(VecSimIndex_IndexSize(index), expected_size);
+        ASSERT_EQ(bf_index->idToLabelMapping.size(), expected_capacity);
+        ASSERT_EQ(bf_index->indexCapacity(), expected_capacity);
+        ASSERT_EQ(dynamic_cast<DataBlocksContainer *>(bf_index->vectors)->numBlocks(),
+                  expected_size / blockSize + (expected_size % blockSize != 0))
+            << "expected_size: " << expected_size << " expected_capacity: " << expected_capacity;
+    };
+    // Empty index with no initial capacity
+    verify_index_size(0, 0, "empty index");
 
-    for (size_t i = 0; i < n; i++) {
-        GenerateAndAddVector<TEST_DATA_T>(index, dim, i, i);
+    // Add one vector, index capacity should grow to blockSize.
+    GenerateAndAddVector<TEST_DATA_T>(index, dim, curr_label++);
+    verify_index_size(1, blockSize, "add one vector");
+
+    // Add vector up to blocksize, index capacity should remain the same.
+    while (curr_label < blockSize) {
+        GenerateAndAddVector<TEST_DATA_T>(index, dim, curr_label++);
     }
-    ASSERT_EQ(bf_index->idToLabelMapping.size(), 2 * blockSize);
-    ASSERT_EQ(VecSimIndex_IndexSize(index), n);
+    verify_index_size(blockSize, blockSize, "add up to blocksize");
 
     // remove invalid id
     VecSimIndex_DeleteVector(index, 3459);
 
     // This should do nothing
-    ASSERT_EQ(VecSimIndex_IndexSize(index), n);
-    ASSERT_EQ(bf_index->idToLabelMapping.size(), 2 * blockSize);
+    verify_index_size(blockSize, blockSize, "remove invalid id");
 
-    // Add another vector, since index size equals to the capacity, this should cause resizing
-    // (to fit a multiplication of block_size).
-    GenerateAndAddVector<TEST_DATA_T>(index, dim, n);
-    ASSERT_EQ(VecSimIndex_IndexSize(index), n + 1);
-    // Capacity and size should remain blockSize * 2.
-    ASSERT_EQ(bf_index->idToLabelMapping.size(), 2 * blockSize);
-    ASSERT_EQ(bf_index->idToLabelMapping.capacity(), 2 * blockSize);
+    // Add another vector, since index size equals to block size, this should cause resizing
+    // of the capacity by one blocksize
+    GenerateAndAddVector<TEST_DATA_T>(index, dim, curr_label++);
+    verify_index_size(blockSize + 1, 2 * blockSize,
+                      "add one more vector after reaching block size");
 
-    // Now size = n + 1 (= 15), capacity = 2 * bs (= 20). Test capacity overflow again
+    // Now size = blocksize + 1 (= 11), capacity = 2 * bs (= 20). Test capacity overflow again
     // to check that it stays aligned with block size.
+    size_t add_vectors_count = blockSize + 2; // 12
+    while (curr_label < add_vectors_count + blockSize + 1) {
+        GenerateAndAddVector<TEST_DATA_T>(index, dim, curr_label++);
+    }
+
+    // Size should be blocksize + 1 + add_vectors_count (= 23).
+    verify_index_size(
+        blockSize + 1 + add_vectors_count, 3 * blockSize,
+        "add more vectors after reaching 2 * blocksize capacity to trigger another resize");
+
+    // Delete vectors so that indexsize % blocksize == 0 (and then delete one more)
+    size_t num_deleted = 0;
+    auto remove_to_one_below_blocksize = [&](size_t initial_label_to_remove) {
+        while (VecSimIndex_IndexSize(index) % blockSize != 0) {
+            ASSERT_EQ(VecSimIndex_DeleteVector(index, initial_label_to_remove++), 1)
+                << "tried to remove label: " << initial_label_to_remove - 1;
+            num_deleted++;
+        }
+        VecSimIndex_DeleteVector(index, initial_label_to_remove);
+        num_deleted++;
+    };
+
+    // First trigger of remove_to_one_below_blocksize will result in one free block.
+    // This should not trigger shrinking of metadata containers.
+    remove_to_one_below_blocksize(0); // remove first block labels.
+    verify_index_size(
+        blockSize * 2 - 1, 3 * blockSize,
+        "delete vectors so that indexsize % blocksize == 0, but there is only one free block");
+
+    // Second trigger of remove_to_one_below_blocksize will result in two free blocks.
+    // This should trigger shrinking of metadata containers by one block.
+    remove_to_one_below_blocksize(num_deleted);
+    verify_index_size(
+        blockSize - 1, 2 * blockSize,
+        "delete vectors so that indexsize % blocksize == 0 and there are two free blocks");
+
+    // Now there is one block in use and one free. adding vectors up to blocksize should not trigger
+    // another resize.
+    GenerateAndAddVector<TEST_DATA_T>(index, dim, curr_label++);
+    verify_index_size(blockSize, 2 * blockSize,
+                      "add vectors up to blocksize after deleting two blocks");
+
+    // Delete all vectors.
+    while (VecSimIndex_IndexSize(index) > 0) {
+        VecSimIndex_DeleteVector(index, num_deleted++);
+    }
+    verify_index_size(0, 0, "delete all vectors");
+    VecSimIndex_Free(index);
+}
+
+TYPED_TEST(BruteForceTest, brute_force_no_oscillation_test) {
+    size_t dim = 4;
+    size_t blockSize = 2;
+    size_t cycles = 5; // Number of add/delete cycles to test
+
+    BFParams params = {.dim = dim, .metric = VecSimMetric_L2, .blockSize = blockSize};
+    VecSimIndex *index = this->CreateNewIndex(params);
+    BruteForceIndex<TEST_DATA_T, TEST_DIST_T> *bf_index = this->CastToBF(index);
+
+    auto verify_no_oscillation = [&](size_t expected_size, size_t expected_capacity,
+                                     const std::string &msg) {
+        SCOPED_TRACE("verify_no_oscillation: " + msg);
+        ASSERT_EQ(VecSimIndex_IndexSize(index), expected_size);
+        ASSERT_EQ(bf_index->indexCapacity(), expected_capacity);
+    };
+
+    // Initial state: empty index
+    verify_no_oscillation(0, 0, "initial empty state");
 
-    size_t add_vectors_count = 8;
-    for (size_t i = 0; i < add_vectors_count; i++) {
-        GenerateAndAddVector<TEST_DATA_T>(index, dim, n + 2 + i, i);
+    size_t current_label = 0;
+
+    // Add initial 3 blocks
+    size_t initial_num_blocks = 3;
+    for (size_t i = 0; i < initial_num_blocks * blockSize; i++) {
+        GenerateAndAddVector<TEST_DATA_T>(index, dim, current_label++);
+    }
+    verify_no_oscillation(initial_num_blocks * blockSize, initial_num_blocks * blockSize,
+                          "initial " + std::to_string(initial_num_blocks) +
+                              " blocks vectors added");
+
+    // Perform oscillation cycles: delete block, add block, delete block, add block...
+    for (size_t cycle = 0; cycle < cycles; cycle++) {
+        // Delete blockSize vectors (size becomes blockSize, but capacity should remain 2 *
+        // blockSize due to buffer zone)
+        for (size_t i = 0; i < blockSize; i++) {
+            VecSimIndex_DeleteVector(index, cycle * blockSize + i);
+        }
+        verify_no_oscillation((initial_num_blocks - 1) * blockSize, initial_num_blocks * blockSize,
+                              "cycle " + std::to_string(cycle) +
+                                  " - after deleting block of vectors");
+
+        // Add blockSize vectors back (size becomes 2 * blockSize, capacity should remain 2 *
+        // blockSize)
+        for (size_t i = 0; i < blockSize; i++) {
+            GenerateAndAddVector<TEST_DATA_T>(index, dim, current_label++);
+        }
+        verify_no_oscillation(initial_num_blocks * blockSize, initial_num_blocks * blockSize,
+                              "cycle " + std::to_string(cycle) +
+                                  " - after adding blockSize vectors back");
     }
 
-    // Size should be n + 1 + 8 (= 25).
-    ASSERT_EQ(VecSimIndex_IndexSize(index), n + 1 + add_vectors_count);
+    // Final verification: delete enough vectors to trigger actual shrinking
+    // Delete blocksize vectors to have only one block of vectors (2 free blocks = shrinking
+    // condition)
+    size_t vectors_to_delete = 2 * blockSize;
+    for (size_t i = 0; i < vectors_to_delete; i++) {
+        VecSimIndex_DeleteVector(index, cycles * blockSize + i);
+    }
+    verify_no_oscillation(blockSize, 2 * blockSize,
+                          "final shrinking to trigger shrinking by one block");
 
-    // Check new capacity size, should be blockSize * 3.
-    ASSERT_EQ(bf_index->idToLabelMapping.size(), 3 * blockSize);
-    ASSERT_EQ(bf_index->idToLabelMapping.capacity(), 3 * blockSize);
+    // Verify we can still grow normally after the oscillation test
+    for (size_t i = 0; i < blockSize; i++) {
+        GenerateAndAddVector<TEST_DATA_T>(index, dim, current_label++);
+    }
+    verify_no_oscillation(2 * blockSize, 2 * blockSize, "growth after oscillation test");
 
     VecSimIndex_Free(index);
 }

From b4be8d619b4d8a074781d38d0bb40b0a80b392a7 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sun, 31 Aug 2025 09:40:28 +0000
Subject: [PATCH 05/11] allocator tests that accounts for one block buffer

---
 tests/unit/test_allocator.cpp | 253 ++++++++++++++++++++++++++--------
 1 file changed, 195 insertions(+), 58 deletions(-)

diff --git a/tests/unit/test_allocator.cpp b/tests/unit/test_allocator.cpp
index 9f260f716..17b0da346 100644
--- a/tests/unit/test_allocator.cpp
+++ b/tests/unit/test_allocator.cpp
@@ -93,8 +93,11 @@ TYPED_TEST_SUITE(IndexAllocatorTest, DataTypeSet);
 TYPED_TEST(IndexAllocatorTest, test_bf_index_block_size_1) {
     // Create only the minimal struct.
     size_t dim = 128;
-    BFParams params = {
-        .type = TypeParam::get_index_type(), .dim = dim, .metric = VecSimMetric_IP, .blockSize = 1};
+    size_t blockSize = 1;
+    BFParams params = {.type = TypeParam::get_index_type(),
+                       .dim = dim,
+                       .metric = VecSimMetric_IP,
+                       .blockSize = blockSize};
     auto *bfIndex = dynamic_cast<BruteForceIndex_Single<TEST_DATA_T, TEST_DIST_T> *>(
         BruteForceFactory::NewIndex(&params));
     auto allocator = bfIndex->getAllocator();
@@ -110,94 +113,228 @@ TYPED_TEST(IndexAllocatorTest, test_bf_index_block_size_1) {
     size_t memory = VecSimIndex_StatsInfo(bfIndex).memory;
     ASSERT_EQ(allocator->getAllocationSize(), memory);
 
+    // @param expected_size - The expected number of elements in the index.
+    // @param expected_data_container_blocks - The expected number of blocks in the data containers.
+    // @param expected_map_containers_capacity - The expected capacity of the map containers in
+    // number of elements.
+    auto verify_containers_size = [&](size_t expected_size, size_t expected_data_container_blocks,
+                                      size_t expected_map_containers_size) {
+        ASSERT_EQ(bfIndex->indexSize(), expected_size);
+        ASSERT_EQ(dynamic_cast<DataBlocksContainer *>(bfIndex->vectors)->numBlocks(),
+                  expected_data_container_blocks);
+        ASSERT_EQ(bfIndex->vectors->size(), expected_size);
+
+        ASSERT_EQ(bfIndex->indexCapacity(), expected_map_containers_size);
+        ASSERT_EQ(bfIndex->idToLabelMapping.capacity(), expected_map_containers_size);
+        ASSERT_EQ(bfIndex->idToLabelMapping.size(), expected_map_containers_size);
+        ASSERT_GE(bfIndex->labelToIdLookup.bucket_count(), expected_map_containers_size);
+    };
+    // =========== Add label 1 ===========
     int before = allocator->getAllocationSize();
+    size_t buckets_num_before = bfIndex->labelToIdLookup.bucket_count();
+    auto vectors_blocks = dynamic_cast<DataBlocksContainer *>(bfIndex->vectors);
+    size_t vectors_blocks_capacity = vectors_blocks->capacity();
+
     VecSimIndex_AddVector(bfIndex, vec, 1);
     int addCommandAllocationDelta = allocator->getAllocationSize() - before;
-    int64_t expectedAllocationDelta = 0;
-    expectedAllocationDelta +=
+    int64_t expectedAllocationDelta =
         sizeof(labelType) + vecsimAllocationOverhead; // resize idToLabelMapping
-    expectedAllocationDelta += sizeof(DataBlock) + vecsimAllocationOverhead; // New vector block
     expectedAllocationDelta +=
-        sizeof(TEST_DATA_T) * dim + vecsimAllocationOverhead; // keep the vector in the vector block
+        (vectors_blocks->capacity() - vectors_blocks_capacity) * sizeof(DataBlock) +
+        vecsimAllocationOverhead; // New vectors blocks
+    expectedAllocationDelta += blockSize * sizeof(TEST_DATA_T) * dim + vecsimAllocationOverhead +
+                               bfIndex->getAlignment(); // block vectors buffer
+    expectedAllocationDelta += hashTableNodeSize;       // New node in the label lookup
+    // Account for the allocation of a new buckets in the labels_lookup hash table.
     expectedAllocationDelta +=
-        sizeof(std::pair<labelType, idType>) + vecsimAllocationOverhead; // keep the mapping
+        (bfIndex->labelToIdLookup.bucket_count() - buckets_num_before) * sizeof(size_t);
     // Assert that the additional allocated delta did occur, and it is limited, as some STL
     // collection allocate additional structures for their internal implementation.
-    ASSERT_EQ(allocator->getAllocationSize(), expectedAllocationSize + addCommandAllocationDelta);
-    ASSERT_LE(expectedAllocationSize + expectedAllocationDelta, allocator->getAllocationSize());
-    ASSERT_LE(expectedAllocationDelta, addCommandAllocationDelta);
-    memory = VecSimIndex_StatsInfo(bfIndex).memory;
-    ASSERT_EQ(allocator->getAllocationSize(), memory);
+    {
+        SCOPED_TRACE("Verifying allocation delta for adding first vector");
+        verify_containers_size(1, 1, 1);
+        ASSERT_EQ(allocator->getAllocationSize(),
+                  expectedAllocationSize + addCommandAllocationDelta);
+        ASSERT_LE(expectedAllocationSize + expectedAllocationDelta, allocator->getAllocationSize());
+        ASSERT_LE(expectedAllocationDelta, addCommandAllocationDelta);
+        memory = VecSimIndex_StatsInfo(bfIndex).memory;
+        ASSERT_EQ(allocator->getAllocationSize(), memory);
+    }
+
+    // =========== labels = [1], vector blocks = 1, maps capacity = 1. Add label 2 + 3 ===========
 
     // Prepare for next assertion test
     expectedAllocationSize = memory;
     expectedAllocationDelta = 0;
 
     before = allocator->getAllocationSize();
+    vectors_blocks_capacity = vectors_blocks->capacity();
+    buckets_num_before = bfIndex->labelToIdLookup.bucket_count();
+
     VecSimIndex_AddVector(bfIndex, vec, 2);
+    VecSimIndex_AddVector(bfIndex, vec, 3);
     addCommandAllocationDelta = allocator->getAllocationSize() - before;
-    expectedAllocationDelta += sizeof(DataBlock) + vecsimAllocationOverhead; // New vector block
-    expectedAllocationDelta += sizeof(labelType); // resize idToLabelMapping
+    expectedAllocationDelta += (vectors_blocks->capacity() - vectors_blocks_capacity) *
+                               sizeof(DataBlock);     // New vector blocks
+    expectedAllocationDelta += 2 * sizeof(labelType); // resize idToLabelMapping
     expectedAllocationDelta +=
-        sizeof(TEST_DATA_T) * dim + vecsimAllocationOverhead; // keep the vector in the vector block
+        2 * (blockSize * sizeof(TEST_DATA_T) * dim + vecsimAllocationOverhead +
+             bfIndex->getAlignment());                // Two block vectors buffer
+    expectedAllocationDelta += 2 * hashTableNodeSize; // New nodes in the label lookup
     expectedAllocationDelta +=
-        sizeof(std::pair<labelType, idType>) + vecsimAllocationOverhead; // keep the mapping
-    // Assert that the additional allocated delta did occur, and it is limited, as some STL
-    // collection allocate additional structures for their internal implementation.
-    ASSERT_EQ(allocator->getAllocationSize(), expectedAllocationSize + addCommandAllocationDelta);
-    ASSERT_LE(expectedAllocationSize + expectedAllocationDelta, allocator->getAllocationSize());
-    ASSERT_LE(expectedAllocationDelta, addCommandAllocationDelta);
-    memory = VecSimIndex_StatsInfo(bfIndex).memory;
-    ASSERT_EQ(allocator->getAllocationSize(), memory);
+        (bfIndex->labelToIdLookup.bucket_count() - buckets_num_before) * sizeof(size_t);
+    {
+        SCOPED_TRACE("Index size = 1Verifying allocation delta for adding two more vectors");
+        verify_containers_size(3, 3, 3);
+        ASSERT_EQ(allocator->getAllocationSize(),
+                  expectedAllocationSize + addCommandAllocationDelta);
+        ASSERT_EQ(expectedAllocationSize + expectedAllocationDelta, allocator->getAllocationSize());
+        ASSERT_EQ(expectedAllocationDelta, addCommandAllocationDelta);
+        memory = VecSimIndex_StatsInfo(bfIndex).memory;
+        ASSERT_EQ(allocator->getAllocationSize(), memory);
+    }
+
+    // =========== labels = [1, 2, 3], vector blocks = 3, maps capacity = 3. Delete label 1
+    // ===========
 
     // Prepare for next assertion test
     expectedAllocationSize = memory;
     expectedAllocationDelta = 0;
 
     before = allocator->getAllocationSize();
-    VecSimIndex_DeleteVector(bfIndex, 2);
-    int deleteCommandAllocationDelta = allocator->getAllocationSize() - before;
-    expectedAllocationDelta -=
-        sizeof(TEST_DATA_T) * dim + vecsimAllocationOverhead; // Free the vector in the vector block
-    // idToLabelMapping and label:id should not change since count < minimalShrinkCapacity
+    vectors_blocks_capacity = vectors_blocks->capacity();
+    buckets_num_before = bfIndex->labelToIdLookup.bucket_count();
+    {
+        SCOPED_TRACE("Verifying allocation delta for deleting a vector from index size 3");
+        ASSERT_EQ(VecSimIndex_DeleteVector(bfIndex, 1), 1);
+        int deleteCommandAllocationDelta = allocator->getAllocationSize() - before;
+        verify_containers_size(2, 2, 3);
+        // Removing blocks doesn't change vectors_blocks->capacity(), but the block buffer is freed.
+        ASSERT_EQ(vectors_blocks->capacity(), vectors_blocks_capacity);
+        expectedAllocationDelta -=
+            blockSize * sizeof(TEST_DATA_T) * dim + vecsimAllocationOverhead +
+            bfIndex->getAlignment();                  // Free the vector buffer in the vector block
+        expectedAllocationDelta -= hashTableNodeSize; // Remove node from the label lookup
+        // idToLabelMapping and label:id should not change since count > capacity - 2 * blockSize
+        ASSERT_EQ(bfIndex->labelToIdLookup.bucket_count(), buckets_num_before);
+
+        ASSERT_EQ(allocator->getAllocationSize(),
+                  expectedAllocationSize + deleteCommandAllocationDelta);
+        ASSERT_EQ(expectedAllocationSize + expectedAllocationDelta, allocator->getAllocationSize());
+        ASSERT_EQ(expectedAllocationDelta, deleteCommandAllocationDelta);
+
+        memory = VecSimIndex_StatsInfo(bfIndex).memory;
+        ASSERT_EQ(allocator->getAllocationSize(), memory);
+    }
 
-    // Assert that the reclaiming of memory did occur, and it is limited, as some STL
-    // collection allocate additional structures for their internal implementation.
-    ASSERT_EQ(allocator->getAllocationSize(),
-              expectedAllocationSize + deleteCommandAllocationDelta);
-    ASSERT_GE(expectedAllocationSize + expectedAllocationDelta, allocator->getAllocationSize());
-    ASSERT_GE(expectedAllocationDelta, deleteCommandAllocationDelta);
+    // =========== labels = [2, 3], vector blocks = 2, maps capacity = 3. Add label 4 ===========
 
-    memory = VecSimIndex_StatsInfo(bfIndex).memory;
-    ASSERT_EQ(allocator->getAllocationSize(), memory);
+    // Prepare for next assertion test
+    expectedAllocationSize = memory;
+    expectedAllocationDelta = 0;
+
+    before = allocator->getAllocationSize();
+    vectors_blocks_capacity = vectors_blocks->capacity();
+    buckets_num_before = bfIndex->labelToIdLookup.bucket_count();
+    size_t idToLabel_size_before = bfIndex->idToLabelMapping.size();
+
+    VecSimIndex_AddVector(bfIndex, vec, 4);
+    addCommandAllocationDelta = allocator->getAllocationSize() - before;
+    expectedAllocationDelta += (vectors_blocks->capacity() - vectors_blocks_capacity) *
+                               sizeof(DataBlock); // New vector block
+    expectedAllocationDelta += blockSize * sizeof(TEST_DATA_T) * dim + vecsimAllocationOverhead +
+                               bfIndex->getAlignment(); // block vectors buffer
+    expectedAllocationDelta += hashTableNodeSize;       // New node in the label lookup
+    {
+        SCOPED_TRACE(
+            "Verifying allocation delta for adding a vector to index size 2 with capacity 3");
+        verify_containers_size(3, 3, 3);
+        ASSERT_EQ(allocator->getAllocationSize(),
+                  expectedAllocationSize + addCommandAllocationDelta);
+        ASSERT_EQ(expectedAllocationSize + expectedAllocationDelta, allocator->getAllocationSize());
+        ASSERT_EQ(expectedAllocationDelta, addCommandAllocationDelta);
+        memory = VecSimIndex_StatsInfo(bfIndex).memory;
+        ASSERT_EQ(allocator->getAllocationSize(), memory);
+
+        // idToLabelMapping and label:id should not change since if we one free block
+        ASSERT_EQ(bfIndex->labelToIdLookup.bucket_count(), buckets_num_before);
+        ASSERT_EQ(bfIndex->idToLabelMapping.size(), idToLabel_size_before);
+    }
+
+    // =========== labels = [2, 3, 4], vector blocks = 3, maps capacity = 3. Delete label 2 + 3
+    // ===========
 
     // Prepare for next assertion test
     expectedAllocationSize = memory;
     expectedAllocationDelta = 0;
 
     before = allocator->getAllocationSize();
-    VecSimIndex_DeleteVector(bfIndex, 1);
-    deleteCommandAllocationDelta = allocator->getAllocationSize() - before;
-    expectedAllocationDelta -=
-        (sizeof(DataBlock) + vecsimAllocationOverhead); // Free the vector block
-    expectedAllocationDelta -=
-        sizeof(DataBlock *) + vecsimAllocationOverhead; // remove from vectorBlocks vector
-    // We resize maps containers to 0.
-    expectedAllocationDelta -=
-        2 * sizeof(labelType) + vecsimAllocationOverhead; // remove two idToLabelMapping
-    expectedAllocationDelta -= 2 * sizeof(std::pair<labelType, idType>) +
-                               vecsimAllocationOverhead; // remove two label:id pair
-    expectedAllocationDelta -= (sizeof(TEST_DATA_T) * dim +
-                                vecsimAllocationOverhead); // Free the vector in the vector block
-
-    // Assert that the reclaiming of memory did occur, and it is limited, as some STL
-    // collection allocate additional structures for their internal implementation.
-    ASSERT_EQ(allocator->getAllocationSize(),
-              expectedAllocationSize + deleteCommandAllocationDelta);
-    ASSERT_LE(expectedAllocationSize + expectedAllocationDelta, allocator->getAllocationSize());
-    ASSERT_LE(expectedAllocationDelta, deleteCommandAllocationDelta);
-    memory = VecSimIndex_StatsInfo(bfIndex).memory;
-    ASSERT_EQ(allocator->getAllocationSize(), memory);
+    vectors_blocks_capacity = vectors_blocks->capacity();
+    buckets_num_before = bfIndex->labelToIdLookup.bucket_count();
+    {
+        SCOPED_TRACE("Verifying allocation delta for deleting two vectors from index size 3");
+        ASSERT_EQ(VecSimIndex_DeleteVector(bfIndex, 2), 1);
+        ASSERT_EQ(VecSimIndex_DeleteVector(bfIndex, 3), 1);
+
+        int deleteCommandAllocationDelta = allocator->getAllocationSize() - before;
+        verify_containers_size(1, 1, 2);
+        // Removing blocks doesn't change vectors_blocks->capacity(), but the block buffer is freed.
+        ASSERT_EQ(vectors_blocks->capacity(), vectors_blocks_capacity);
+        expectedAllocationDelta -=
+            2 * (blockSize * sizeof(TEST_DATA_T) * dim + vecsimAllocationOverhead +
+                 bfIndex->getAlignment()); // Free the vector buffer in the vector block
+        expectedAllocationDelta -= 2 * hashTableNodeSize; // Remove nodes from the label lookup
+        // idToLabelMapping and label:id should shrink by block since count >= capacity - 2 *
+        // blockSize
+        expectedAllocationDelta -= sizeof(labelType); // remove one idToLabelMapping
+        expectedAllocationDelta -=
+            (buckets_num_before - bfIndex->labelToIdLookup.bucket_count()) * sizeof(size_t);
+        ASSERT_EQ(allocator->getAllocationSize(),
+                  expectedAllocationSize + deleteCommandAllocationDelta);
+        ASSERT_EQ(expectedAllocationSize + expectedAllocationDelta, allocator->getAllocationSize());
+        ASSERT_EQ(expectedAllocationDelta, deleteCommandAllocationDelta);
+
+        memory = VecSimIndex_StatsInfo(bfIndex).memory;
+        ASSERT_EQ(allocator->getAllocationSize(), memory);
+    }
+
+    // =========== labels = [4], vector blocks = 1, maps capacity = 2. Delete last label ===========
+
+    // Prepare for next assertion test
+    expectedAllocationSize = memory;
+    expectedAllocationDelta = 0;
+
+    before = allocator->getAllocationSize();
+    vectors_blocks_capacity = vectors_blocks->capacity();
+    buckets_num_before = bfIndex->labelToIdLookup.bucket_count();
+    {
+        SCOPED_TRACE("Verifying allocation delta for emptying the index");
+        ASSERT_EQ(VecSimIndex_DeleteVector(bfIndex, 4), 1);
+
+        int deleteCommandAllocationDelta = allocator->getAllocationSize() - before;
+        verify_containers_size(0, 0, 0);
+        // Removing blocks doesn't change vectors_blocks->capacity(), but the block buffer is freed.
+        ASSERT_EQ(vectors_blocks->capacity(), vectors_blocks_capacity);
+        expectedAllocationDelta -=
+            (blockSize * sizeof(TEST_DATA_T) * dim + vecsimAllocationOverhead +
+             bfIndex->getAlignment());                // Free the vector buffer in the vector block
+        expectedAllocationDelta -= hashTableNodeSize; // Remove nodes from the label lookup
+        // idToLabelMapping and label:id should shrink by block since count >= capacity - 2 *
+        // blockSize
+        expectedAllocationDelta -=
+            2 * sizeof(labelType) +
+            vecsimAllocationOverhead; // remove two idToLabelMapping and free the container
+        expectedAllocationDelta -=
+            (buckets_num_before - bfIndex->labelToIdLookup.bucket_count()) * sizeof(size_t);
+        ASSERT_EQ(allocator->getAllocationSize(),
+                  expectedAllocationSize + deleteCommandAllocationDelta);
+        ASSERT_EQ(expectedAllocationSize + expectedAllocationDelta, allocator->getAllocationSize());
+        ASSERT_EQ(expectedAllocationDelta, deleteCommandAllocationDelta);
+
+        memory = VecSimIndex_StatsInfo(bfIndex).memory;
+        ASSERT_EQ(allocator->getAllocationSize(), memory);
+    }
+
     VecSimIndex_Free(bfIndex);
 }
 

From 09f162bfd05bee38be0c387bdef23784c13ec277 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sun, 31 Aug 2025 13:29:34 +0000
Subject: [PATCH 06/11] tests

---
 src/VecSim/algorithms/hnsw/hnsw_tiered.h      |  16 +-
 .../hnsw/hnsw_tiered_tests_friends.h          |   1 +
 src/VecSim/vec_sim_tiered_index.h             |  11 ++
 tests/unit/test_allocator.cpp                 |   1 +
 tests/unit/test_bruteforce_multi.cpp          | 160 +++++++++++++-----
 tests/unit/test_hnsw.cpp                      |  29 ----
 tests/unit/test_hnsw_tiered.cpp               |  83 +++++++++
 7 files changed, 220 insertions(+), 81 deletions(-)

diff --git a/src/VecSim/algorithms/hnsw/hnsw_tiered.h b/src/VecSim/algorithms/hnsw/hnsw_tiered.h
index ec61325fe..600e91e56 100644
--- a/src/VecSim/algorithms/hnsw/hnsw_tiered.h
+++ b/src/VecSim/algorithms/hnsw/hnsw_tiered.h
@@ -314,7 +314,7 @@ template <typename DataType, typename DistType>
 void TieredHNSWIndex<DataType, DistType>::executeReadySwapJobs(size_t maxJobsToRun) {
 
     // Execute swap jobs - acquire hnsw write lock.
-    this->mainIndexGuard.lock();
+    this->lockMainIndexGuard();
     TIERED_LOG(VecSimCommonStrings::LOG_VERBOSE_STRING,
                "Tiered HNSW index GC: there are %zu ready swap jobs. Start executing %zu swap jobs",
                readySwapJobs, std::min(readySwapJobs, maxJobsToRun));
@@ -339,7 +339,7 @@ void TieredHNSWIndex<DataType, DistType>::executeReadySwapJobs(size_t maxJobsToR
     readySwapJobs -= idsToRemove.size();
     TIERED_LOG(VecSimCommonStrings::LOG_VERBOSE_STRING,
                "Tiered HNSW index GC: done executing %zu swap jobs", idsToRemove.size());
-    this->mainIndexGuard.unlock();
+    this->unlockMainIndexGuard();
 }
 
 template <typename DataType, typename DistType>
@@ -441,7 +441,7 @@ void TieredHNSWIndex<DataType, DistType>::insertVectorToHNSW(
         // Release the inner HNSW data lock before we re-acquire the global HNSW lock.
         this->mainIndexGuard.unlock_shared();
         hnsw_index->unlockIndexDataGuard();
-        this->mainIndexGuard.lock();
+        this->lockMainIndexGuard();
         hnsw_index->lockIndexDataGuard();
 
         // Hold the index data lock while we store the new element. If the new node's max level is
@@ -466,7 +466,7 @@ void TieredHNSWIndex<DataType, DistType>::insertVectorToHNSW(
         if (state.elementMaxLevel > state.currMaxLevel) {
             hnsw_index->unlockIndexDataGuard();
         }
-        this->mainIndexGuard.unlock();
+        this->unlockMainIndexGuard();
     } else {
         // Do the same as above except for changing the capacity, but with *shared* lock held:
         // Hold the index data lock while we store the new element. If the new node's max level is
@@ -713,9 +713,9 @@ int TieredHNSWIndex<DataType, DistType>::addVector(const void *blob, labelType l
         auto storage_blob = this->frontendIndex->preprocessForStorage(blob);
         // Insert the vector to the HNSW index. Internally, we will never have to overwrite the
         // label since we already checked it outside.
-        this->mainIndexGuard.lock();
+        this->lockMainIndexGuard();
         hnsw_index->addVector(storage_blob.get(), label);
-        this->mainIndexGuard.unlock();
+        this->unlockMainIndexGuard();
         return ret;
     }
     if (this->frontendIndex->indexSize() >= this->flatBufferLimit) {
@@ -841,9 +841,9 @@ int TieredHNSWIndex<DataType, DistType>::deleteVector(labelType label) {
         }
     } else {
         // delete in place.
-        this->mainIndexGuard.lock();
+        this->lockMainIndexGuard();
         num_deleted_vectors += this->deleteLabelFromHNSWInplace(label);
-        this->mainIndexGuard.unlock();
+        this->unlockMainIndexGuard();
     }
 
     return num_deleted_vectors;
diff --git a/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h b/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h
index db8ec87e9..772d72724 100644
--- a/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h
+++ b/src/VecSim/algorithms/hnsw/hnsw_tiered_tests_friends.h
@@ -63,6 +63,7 @@ INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_deleteBothAsyncAndInplaceMulti_
 INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_deleteInplaceMultiSwapId_Test)
 INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_deleteInplaceAvoidUpdatedMarkedDeleted_Test)
 INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_switchDeleteModes_Test)
+INDEX_TEST_FRIEND_CLASS(HNSWTieredIndexTestBasic_HNSWResize_Test)
 
 friend class CommonAPITest_SearchDifferentScores_Test;
 friend class BF16TieredTest;
diff --git a/src/VecSim/vec_sim_tiered_index.h b/src/VecSim/vec_sim_tiered_index.h
index e7a9808b9..fe9720ab0 100644
--- a/src/VecSim/vec_sim_tiered_index.h
+++ b/src/VecSim/vec_sim_tiered_index.h
@@ -50,7 +50,17 @@ class VecSimTieredIndex : public VecSimIndexInterface {
 
     mutable std::shared_mutex flatIndexGuard;
     mutable std::shared_mutex mainIndexGuard;
+    void lockMainIndexGuard() const {
+        mainIndexGuard.lock();
+#ifdef BUILD_TESTS
+        mainIndexGuard_write_lock_count++;
+#endif
+    }
 
+    void unlockMainIndexGuard() const { mainIndexGuard.unlock(); }
+#ifdef BUILD_TESTS
+    mutable std::atomic_int mainIndexGuard_write_lock_count = 0;
+#endif
     size_t flatBufferLimit;
 
     void submitSingleJob(AsyncJob *job) {
@@ -89,6 +99,7 @@ class VecSimTieredIndex : public VecSimIndexInterface {
 
 #ifdef BUILD_TESTS
 public:
+    int getMainIndexGuardWriteLockCount() const { return mainIndexGuard_write_lock_count; }
 #endif
     // For both topK and range, Use withSet=false if you can guarantee that shared ids between the
     // two lists will also have identical scores. In this case, any duplicates will naturally align
diff --git a/tests/unit/test_allocator.cpp b/tests/unit/test_allocator.cpp
index 17b0da346..66bf3dce6 100644
--- a/tests/unit/test_allocator.cpp
+++ b/tests/unit/test_allocator.cpp
@@ -526,6 +526,7 @@ TYPED_TEST(IndexAllocatorTest, test_hnsw_reclaim_memory) {
         SCOPED_TRACE("Verifying containers size for size " + std::to_string(expected_size));
         ASSERT_EQ(hnswIndex->indexSize(), expected_size);
         ASSERT_EQ(hnswIndex->indexCapacity(), expected_data_container_blocks * block_size);
+        ASSERT_EQ(hnswIndex->indexCapacity(), hnswIndex->maxElements);
         ASSERT_EQ(hnswIndex->graphDataBlocks.size(), expected_data_container_blocks);
         ASSERT_EQ(dynamic_cast<DataBlocksContainer *>(hnswIndex->vectors)->numBlocks(),
                   expected_data_container_blocks);
diff --git a/tests/unit/test_bruteforce_multi.cpp b/tests/unit/test_bruteforce_multi.cpp
index 307e3ea95..8d238daf4 100644
--- a/tests/unit/test_bruteforce_multi.cpp
+++ b/tests/unit/test_bruteforce_multi.cpp
@@ -70,71 +70,143 @@ TYPED_TEST(BruteForceMultiTest, vector_add_multiple_test) {
 /**** resizing cases ****/
 
 TYPED_TEST(BruteForceMultiTest, resize_and_align_index) {
-    size_t dim = 4;
-    size_t n = 15;
-    size_t blockSize = 10;
-    size_t n_labels = 3;
+    constexpr size_t dim = 4;
+    constexpr size_t blockSize = 10;
+    constexpr size_t per_label = 3;
+    constexpr size_t labels = 7;
+    constexpr size_t n = labels * per_label;
+    size_t added_vectors = 0;
+    size_t curr_label = 0;
 
     BFParams params = {.dim = dim, .metric = VecSimMetric_L2, .blockSize = blockSize};
 
     VecSimIndex *index = this->CreateNewIndex(params);
 
     auto bf_index = this->CastToBF_Multi(index);
-    ASSERT_EQ(VecSimIndex_IndexSize(index), 0);
+    DataBlocksContainer *vectors_container = dynamic_cast<DataBlocksContainer *>(bf_index->vectors);
+    auto verify_index_size = [&](size_t expected_num_vectors, size_t expected_capacity,
+                                 std::string msg) {
+        SCOPED_TRACE("verify_index_size: " + msg);
+        VecSimIndexDebugInfo info = VecSimIndex_DebugInfo(index);
+        ASSERT_EQ(info.commonInfo.indexSize, expected_num_vectors);
+        ASSERT_EQ(info.commonInfo.indexLabelCount,
+                  (expected_num_vectors + per_label - 1) / per_label);
+        ASSERT_EQ(bf_index->idToLabelMapping.size(), expected_capacity);
+        ASSERT_EQ(bf_index->getVectorsContainer()->size(), expected_num_vectors);
+        ASSERT_EQ(bf_index->indexCapacity(), expected_capacity);
+        ASSERT_EQ(vectors_container->numBlocks(),
+                  expected_num_vectors / blockSize + (expected_num_vectors % blockSize != 0))
+            << "expected_size: " << expected_num_vectors
+            << " expected_capacity: " << expected_capacity;
+    };
 
-    for (size_t i = 0; i < n; i++) {
-        GenerateAndAddVector<TEST_DATA_T>(index, dim, i % n_labels, i);
-    }
+    // Empty index with no initial capacity
+    verify_index_size(0, 0, "empty index");
 
-    VecSimIndexDebugInfo info = VecSimIndex_DebugInfo(index);
-    ASSERT_EQ(info.commonInfo.indexSize, n);
-    ASSERT_EQ(info.commonInfo.indexLabelCount, n_labels);
-    ASSERT_EQ(bf_index->idToLabelMapping.size(), n - n % blockSize + blockSize);
-    ASSERT_EQ(bf_index->getVectorsContainer()->size(), n);
+    // Add one label (vectors per label < blockSize), index capacity should grow to blockSize.
+    for (size_t i = 0; i < per_label; i++) {
+        GenerateAndAddVector<TEST_DATA_T>(index, dim, 0);
+        added_vectors++;
+    }
+    curr_label++;
+    verify_index_size(per_label, blockSize,
+                      "add one label with " + std::to_string(per_label) + " vectors");
+
+    ASSERT_LE(added_vectors, blockSize);
+    // Add vector up to blocksize, index capacity should remain the same.
+    while (added_vectors < blockSize) {
+        GenerateAndAddVector<TEST_DATA_T>(index, dim, curr_label);
+        added_vectors++;
+        curr_label += added_vectors % per_label == 0;
+    }
+    verify_index_size(blockSize, blockSize, "add up to blocksize");
 
     // remove invalid id
-    ASSERT_EQ(bf_index->deleteVector(3459), 0);
+    VecSimIndex_DeleteVector(index, 3459);
 
     // This should do nothing
-    info = VecSimIndex_DebugInfo(index);
-    ASSERT_EQ(info.commonInfo.indexSize, n);
-    ASSERT_EQ(info.commonInfo.indexLabelCount, n_labels);
-    ASSERT_EQ(bf_index->idToLabelMapping.size(), n - n % blockSize + blockSize);
-    ASSERT_EQ(bf_index->getVectorsContainer()->size(), n);
-
-    // Add another vector (index capacity should remain the same).
-    GenerateAndAddVector<TEST_DATA_T>(index, dim, 0);
-    info = VecSimIndex_DebugInfo(index);
-    ASSERT_EQ(info.commonInfo.indexSize, n + 1);
-    // Label count doesn't increase because label 0 already exists
-    ASSERT_EQ(info.commonInfo.indexLabelCount, n_labels);
-    // Check new capacity size, should be blockSize * 2.
-    ASSERT_EQ(bf_index->idToLabelMapping.size(), 2 * blockSize);
-
-    // Now size = n + 1 = 16, capacity = 2 * bs = 20. Test capacity overflow again
+    verify_index_size(blockSize, blockSize, "remove invalid id");
+
+    // Add another vector, since index size equals to block size, this should cause resizing
+    // of the capacity by one blocksize
+    GenerateAndAddVector<TEST_DATA_T>(index, dim, curr_label);
+    added_vectors++;
+    ASSERT_EQ(added_vectors, blockSize + 1);
+    curr_label += added_vectors % per_label == 0;
+    verify_index_size(blockSize + 1, 2 * blockSize,
+                      "add one more vector after reaching block size");
+
+    // Now size = blockSize + 1 = 11, capacity = 2 * bs = 20. Test capacity overflow again
     // to check that it stays aligned with blocksize.
-
-    size_t add_vectors_count = 8;
-    for (size_t i = 0; i < add_vectors_count; i++) {
-        GenerateAndAddVector<TEST_DATA_T>(index, dim, i % n_labels, i);
+    size_t add_vectors_count = blockSize + 2; // 12
+    while (added_vectors < add_vectors_count + blockSize + 1) {
+        GenerateAndAddVector<TEST_DATA_T>(index, dim, curr_label);
+        added_vectors++;
+        curr_label += added_vectors % per_label == 0;
     }
 
-    // Size should be n + 1 + 8 = 24.
-    size_t new_n = n + 1 + add_vectors_count;
-    info = VecSimIndex_DebugInfo(index);
+    // Size should be blocksize + 1 + add_vectors_count (= 23).
+    verify_index_size(
+        blockSize + 1 + add_vectors_count, 3 * blockSize,
+        "add more vectors after reaching 2 * blocksize capacity to trigger another resize");
 
-    ASSERT_EQ(info.commonInfo.indexSize, new_n);
-    // Label count doesn't increase because label 0 already exists
-    ASSERT_EQ(info.commonInfo.indexLabelCount, n_labels);
     size_t total_vectors = 0;
     for (auto label_ids : bf_index->labelToIdsLookup) {
         total_vectors += label_ids.second.size();
     }
-    ASSERT_EQ(total_vectors, new_n);
-
-    // Check new capacity size, should be blockSize * 3.
-    ASSERT_EQ(bf_index->idToLabelMapping.size(), 3 * blockSize);
+    ASSERT_EQ(total_vectors, added_vectors);
+
+    // Delete vectors until one block plus some vectors are removed.
+    size_t current_vectors_block_count = (added_vectors + blockSize - 1) / blockSize;
+    ASSERT_EQ(current_vectors_block_count, 3);
+    size_t num_deleted = 0;
+    size_t label_to_delete = 0;
+    auto remove_to_one_below_blocksize = [&]() {
+        while (vectors_container->numBlocks() == current_vectors_block_count) {
+            int ret = VecSimIndex_DeleteVector(index, label_to_delete++);
+            ASSERT_EQ(ret, per_label) << "tried to remove label: " << label_to_delete - 1;
+            num_deleted += ret;
+        }
+        int ret = VecSimIndex_DeleteVector(index, label_to_delete++);
+        ASSERT_EQ(ret, per_label) << "tried to remove label: " << label_to_delete - 1;
+        num_deleted += ret;
+    };
 
+    // First trigger of remove_to_one_below_blocksize will result in one free block.
+    // This should not trigger shrinking of metadata containers.
+    remove_to_one_below_blocksize(); // remove first block labels.
+    ASSERT_LT(added_vectors - num_deleted, 2 * blockSize);
+    verify_index_size(
+        added_vectors - num_deleted, 3 * blockSize,
+        "delete vectors so that indexsize < 2 * blocksize, but there is only one free block");
+
+    // Second trigger of remove_to_one_below_blocksize will result in two free blocks.
+    // This should trigger shrinking of metadata containers by one block.
+    current_vectors_block_count--;
+    remove_to_one_below_blocksize();
+    ASSERT_LT(added_vectors - num_deleted, blockSize);
+    verify_index_size(
+        added_vectors - num_deleted, 2 * blockSize,
+        "delete vectors so that indexsize % blocksize == 0 and there are two free blocks");
+
+    // Now there is one block in use and one free. adding vectors up to blocksize (and below 2 *
+    // blocksize) should not trigger another resize.
+    while (vectors_container->numBlocks() == current_vectors_block_count) {
+        GenerateAndAddVector<TEST_DATA_T>(index, dim, curr_label);
+        added_vectors++;
+        curr_label += added_vectors % per_label == 0;
+    }
+    verify_index_size(added_vectors - num_deleted, 2 * blockSize,
+                      "add vectors up to blocksize after deleting two blocks");
+
+    // Delete all vectors.
+    while (VecSimIndex_IndexSize(index) > 0) {
+        int ret = VecSimIndex_DeleteVector(index, label_to_delete++);
+        ASSERT_LE(ret, per_label) << "tried to remove label: " << label_to_delete - 1;
+        num_deleted += ret;
+    }
+    ASSERT_EQ(num_deleted, added_vectors);
+    verify_index_size(0, 0, "delete all vectors");
     VecSimIndex_Free(index);
 }
 
diff --git a/tests/unit/test_hnsw.cpp b/tests/unit/test_hnsw.cpp
index 1b1d4325c..0f67073ba 100644
--- a/tests/unit/test_hnsw.cpp
+++ b/tests/unit/test_hnsw.cpp
@@ -57,35 +57,6 @@ TYPED_TEST(HNSWTest, hnsw_vector_add_test) {
     VecSimIndex_Free(index);
 }
 
-TYPED_TEST(HNSWTest, hnsw_vector_add_test2) {
-    size_t dim = 4;
-    size_t bs = 1;
-    HNSWParams params = {
-        .dim = dim, .metric = VecSimMetric_L2, .blockSize = bs, .M = 16, .efConstruction = 200};
-
-    VecSimIndex *index = this->CreateNewIndex(params);
-
-    ASSERT_EQ(VecSimIndex_IndexSize(index), 0);
-    size_t num_docs = bs * 3;
-    for (size_t i = 0; i < num_docs; i++) {
-        GenerateAndAddVector<TEST_DATA_T>(index, dim, i);
-    }
-
-    // delete the last vector
-    for (size_t i = 0; i < 2 * bs; i++) {
-        VecSimIndex_DeleteVector(index, num_docs - i - 1);
-    }
-    // add 1 vec
-    GenerateAndAddVector<TEST_DATA_T>(index, dim, num_docs);
-
-    // override last vector
-    GenerateAndAddVector<TEST_DATA_T>(index, dim, num_docs - 1);
-    // try to add it again
-    GenerateAndAddVector<TEST_DATA_T>(index, dim, num_docs - 1);
-    ASSERT_EQ(VecSimIndex_IndexSize(index), num_docs);
-    VecSimIndex_Free(index);
-}
-
 TYPED_TEST(HNSWTest, hnsw_blob_sanity_test) {
     size_t dim = 4;
     size_t bs = 1;
diff --git a/tests/unit/test_hnsw_tiered.cpp b/tests/unit/test_hnsw_tiered.cpp
index 7bc9daf54..7e48e0af0 100644
--- a/tests/unit/test_hnsw_tiered.cpp
+++ b/tests/unit/test_hnsw_tiered.cpp
@@ -4330,3 +4330,86 @@ TYPED_TEST(HNSWTieredIndexTestBasic, HNSWWithPreprocessor) {
         VecSimBatchIterator_Free(batchIterator);
     }
 }
+
+TYPED_TEST(HNSWTieredIndexTestBasic, HNSWResize) {
+    size_t dim = 4;
+    constexpr size_t blockSize = 10;
+    size_t resize_operations = 0;
+    HNSWParams params = {.type = TypeParam::get_index_type(),
+                         .dim = dim,
+                         .metric = VecSimMetric_L2,
+                         .blockSize = blockSize};
+    VecSimParams hnsw_params = CreateParams(params);
+
+    auto mock_thread_pool = tieredIndexMock();
+    TieredIndexParams tiered_params = {.jobQueue = &mock_thread_pool.jobQ,
+                                       .jobQueueCtx = mock_thread_pool.ctx,
+                                       .submitCb = tieredIndexMock::submit_callback,
+                                       .flatBufferLimit = SIZE_MAX,
+                                       .primaryIndexParams = &hnsw_params,
+                                       .specificParams = {TieredHNSWParams{.swapJobThreshold = 1}}};
+    auto *tiered_index = reinterpret_cast<TieredHNSWIndex<TEST_DATA_T, TEST_DIST_T> *>(
+        TieredFactory::NewIndex(&tiered_params));
+    mock_thread_pool.ctx->index_strong_ref.reset(tiered_index);
+
+    auto hnsw_index = this->CastToHNSW(tiered_index);
+    ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), 0);
+
+    // add a vector
+    GenerateAndAddVector<TEST_DATA_T>(tiered_index, dim, 0);
+    mock_thread_pool.thread_iteration();
+    resize_operations++;
+
+    // first vector should trigger resize
+    ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations);
+    ASSERT_EQ(hnsw_index->indexSize(), 1);
+    ASSERT_EQ(hnsw_index->indexCapacity(), blockSize);
+
+    // add up to block size
+    for (size_t i = 1; i < blockSize; i++) {
+        GenerateAndAddVector<TEST_DATA_T>(tiered_index, dim, i);
+        mock_thread_pool.thread_iteration();
+    }
+    // should not trigger resize
+    ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations);
+    ASSERT_EQ(hnsw_index->indexSize(), blockSize);
+    ASSERT_EQ(hnsw_index->indexCapacity(), blockSize);
+
+    // add one more vector to trigger another resize
+    GenerateAndAddVector<TEST_DATA_T>(tiered_index, dim, blockSize);
+    mock_thread_pool.thread_iteration();
+    resize_operations++;
+
+    ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations);
+    ASSERT_EQ(hnsw_index->indexSize(), blockSize + 1);
+    ASSERT_EQ(hnsw_index->indexCapacity(), 2 * blockSize);
+
+    // delete a vector to shrink data blocks
+    ASSERT_EQ(VecSimIndex_DeleteVector(tiered_index, 0), 1) << "Failed to delete vector 0";
+
+    mock_thread_pool.init_threads();
+    mock_thread_pool.thread_pool_join();
+    tiered_index->executeReadySwapJobs();
+    resize_operations++;
+    ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations);
+    ASSERT_EQ(hnsw_index->indexSize(), blockSize);
+    ASSERT_EQ(hnsw_index->indexCapacity(), blockSize);
+
+    // add this vector again and verify lock was acquired to resize
+    GenerateAndAddVector<TEST_DATA_T>(tiered_index, dim, 0);
+    mock_thread_pool.thread_iteration();
+    resize_operations++;
+    ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations);
+    ASSERT_EQ(hnsw_index->indexSize(), blockSize + 1);
+    ASSERT_EQ(hnsw_index->indexCapacity(), 2 * blockSize);
+
+    // add up to block size (count = 2 blockSize), the lock shouldn't be acquired because no resize
+    // is required
+    for (size_t i = blockSize + 1; i < 2 * blockSize; i++) {
+        GenerateAndAddVector<TEST_DATA_T>(tiered_index, dim, i);
+        mock_thread_pool.thread_iteration();
+    }
+    ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations);
+    ASSERT_EQ(hnsw_index->indexSize(), 2 * blockSize);
+    ASSERT_EQ(hnsw_index->indexCapacity(), 2 * blockSize);
+}

From 14ee8b3140fd02ef9fb7736e0caeb11ec2c58bcd Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sun, 31 Aug 2025 13:52:24 +0000
Subject: [PATCH 07/11] remove comment

---
 src/VecSim/algorithms/hnsw/hnsw.h |  6 ------
 src/VecSim/vec_sim_common.h       | 10 +++++-----
 src/VecSim/vec_sim_index.h        |  2 +-
 3 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/src/VecSim/algorithms/hnsw/hnsw.h b/src/VecSim/algorithms/hnsw/hnsw.h
index 90edf77d3..d87ddfcc9 100644
--- a/src/VecSim/algorithms/hnsw/hnsw.h
+++ b/src/VecSim/algorithms/hnsw/hnsw.h
@@ -1804,12 +1804,6 @@ HNSWAddVectorState HNSWIndex<DataType, DistType>::storeNewElement(labelType labe
         throw e;
     }
 
-    // else if (state.newElementId % this->blockSize == 0) {
-    //     // If we had an initial capacity, we might have to allocate new blocks for the graph
-    //     data. this->graphDataBlocks.emplace_back(this->blockSize, this->elementGraphDataSize,
-    //                                        this->allocator);
-    // }
-
     // Insert the new element to the data block
     this->vectors->addElement(vector_data, state.newElementId);
     this->graphDataBlocks.back().addElement(cur_egd);
diff --git a/src/VecSim/vec_sim_common.h b/src/VecSim/vec_sim_common.h
index 1d513498c..643155b80 100644
--- a/src/VecSim/vec_sim_common.h
+++ b/src/VecSim/vec_sim_common.h
@@ -147,11 +147,11 @@ typedef struct {
 } HNSWParams;
 
 typedef struct {
-    VecSimType type;     // Datatype to index.
-    size_t dim;          // Vector's dimension.
-    VecSimMetric metric; // Distance metric to use in the index.
-    bool multi;          // Determines if the index should multi-index or not.
-    size_t initialCapacity;
+    VecSimType type;        // Datatype to index.
+    size_t dim;             // Vector's dimension.
+    VecSimMetric metric;    // Distance metric to use in the index.
+    bool multi;             // Determines if the index should multi-index or not.
+    size_t initialCapacity; // Deprecated.
     size_t blockSize;
 } BFParams;
 
diff --git a/src/VecSim/vec_sim_index.h b/src/VecSim/vec_sim_index.h
index e46635ca0..439f820d6 100644
--- a/src/VecSim/vec_sim_index.h
+++ b/src/VecSim/vec_sim_index.h
@@ -75,7 +75,7 @@ struct VecSimIndexAbstract : public VecSimIndexInterface {
     size_t blockSize;    // Index's vector block size (determines by how many vectors to resize when
                          // resizing)
     IndexCalculatorInterface<DistType> *indexCalculator; // Distance calculator.
-    PreprocessorsContainerAbstract *preprocessors;       // Storage and query preprocessors.
+    PreprocessorsContainerAbstract *preprocessors;       // Stroage and query preprocessors.
     mutable VecSearchMode lastMode; // The last search mode in RediSearch (used for debug/testing).
     bool isMulti;                   // Determines if the index should multi-index or not.
     void *logCallbackCtx;           // Context for the log callback.

From 12a76881d88a5e66c0558c5cdcc500c1a7b4ec25 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sun, 31 Aug 2025 13:52:57 +0000
Subject: [PATCH 08/11] remove

---
 src/VecSim/algorithms/brute_force/brute_force.h | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/src/VecSim/algorithms/brute_force/brute_force.h b/src/VecSim/algorithms/brute_force/brute_force.h
index 1a9a738fc..e70edb88c 100644
--- a/src/VecSim/algorithms/brute_force/brute_force.h
+++ b/src/VecSim/algorithms/brute_force/brute_force.h
@@ -112,13 +112,6 @@ class BruteForceIndex : public VecSimIndexAbstract<DataType, DistType> {
     }
 
     void shrinkByBlock() {
-        // assert(indexCapacity() > 0); // should not be called when index is empty
-
-        // // remove a block size of labels.
-        // assert(idToLabelMapping.size() >= this->blockSize);
-        // idToLabelMapping.resize(idToLabelMapping.size() - this->blockSize);
-        // idToLabelMapping.shrink_to_fit();
-        // resizeLabelLookup(idToLabelMapping.size());
         assert(indexCapacity() >= this->blockSize);
         assert(indexCapacity() % this->blockSize == 0);
         assert(dynamic_cast<DataBlocksContainer *>(this->vectors)->numBlocks() ==

From 56e6d6cdf32d54a4b396640af3cb60b030457649 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Thu, 11 Sep 2025 17:54:15 +0300
Subject: [PATCH 09/11] fix resize test for macos

---
 tests/unit/test_allocator.cpp | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/tests/unit/test_allocator.cpp b/tests/unit/test_allocator.cpp
index 66bf3dce6..abebdc45b 100644
--- a/tests/unit/test_allocator.cpp
+++ b/tests/unit/test_allocator.cpp
@@ -324,8 +324,17 @@ TYPED_TEST(IndexAllocatorTest, test_bf_index_block_size_1) {
         expectedAllocationDelta -=
             2 * sizeof(labelType) +
             vecsimAllocationOverhead; // remove two idToLabelMapping and free the container
+        auto calcReserveHashTableToZero = [](size_t initial_buckets) {
+            std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
+            auto dummy_lookup =
+                vecsim_stl::unordered_map<size_t, unsigned int>(initial_buckets, allocator);
+            size_t memory_before = allocator->getAllocationSize();
+            dummy_lookup.reserve(0);
+            size_t memory_after = allocator->getAllocationSize();
+            return memory_before - memory_after;
+        };
         expectedAllocationDelta -=
-            (buckets_num_before - bfIndex->labelToIdLookup.bucket_count()) * sizeof(size_t);
+            calcReserveHashTableToZero(buckets_num_before); // resizing labelToIdLookup to 0
         ASSERT_EQ(allocator->getAllocationSize(),
                   expectedAllocationSize + deleteCommandAllocationDelta);
         ASSERT_EQ(expectedAllocationSize + expectedAllocationDelta, allocator->getAllocationSize());

From 0c49f5f08917858ef9b16a38d78d30f6e9f98974 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Fri, 12 Sep 2025 07:40:19 +0300
Subject: [PATCH 10/11] change to estimation

---
 tests/unit/test_allocator.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/unit/test_allocator.cpp b/tests/unit/test_allocator.cpp
index abebdc45b..101505d3c 100644
--- a/tests/unit/test_allocator.cpp
+++ b/tests/unit/test_allocator.cpp
@@ -337,8 +337,8 @@ TYPED_TEST(IndexAllocatorTest, test_bf_index_block_size_1) {
             calcReserveHashTableToZero(buckets_num_before); // resizing labelToIdLookup to 0
         ASSERT_EQ(allocator->getAllocationSize(),
                   expectedAllocationSize + deleteCommandAllocationDelta);
-        ASSERT_EQ(expectedAllocationSize + expectedAllocationDelta, allocator->getAllocationSize());
-        ASSERT_EQ(expectedAllocationDelta, deleteCommandAllocationDelta);
+        ASSERT_LE(expectedAllocationSize + expectedAllocationDelta, allocator->getAllocationSize());
+        ASSERT_GE(expectedAllocationDelta, deleteCommandAllocationDelta);
 
         memory = VecSimIndex_StatsInfo(bfIndex).memory;
         ASSERT_EQ(allocator->getAllocationSize(), memory);

From b4f316a1433a42dff4f237250237fd02c604bea9 Mon Sep 17 00:00:00 2001
From: meiravgri <meirav.grimberg@redis.com>
Date: Sat, 13 Sep 2025 18:30:47 +0300
Subject: [PATCH 11/11] generelize test

---
 tests/unit/test_allocator.cpp | 20 +++++++-------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/tests/unit/test_allocator.cpp b/tests/unit/test_allocator.cpp
index 101505d3c..c8f3bbb3b 100644
--- a/tests/unit/test_allocator.cpp
+++ b/tests/unit/test_allocator.cpp
@@ -324,21 +324,15 @@ TYPED_TEST(IndexAllocatorTest, test_bf_index_block_size_1) {
         expectedAllocationDelta -=
             2 * sizeof(labelType) +
             vecsimAllocationOverhead; // remove two idToLabelMapping and free the container
-        auto calcReserveHashTableToZero = [](size_t initial_buckets) {
-            std::shared_ptr<VecSimAllocator> allocator = VecSimAllocator::newVecsimAllocator();
-            auto dummy_lookup =
-                vecsim_stl::unordered_map<size_t, unsigned int>(initial_buckets, allocator);
-            size_t memory_before = allocator->getAllocationSize();
-            dummy_lookup.reserve(0);
-            size_t memory_after = allocator->getAllocationSize();
-            return memory_before - memory_after;
-        };
-        expectedAllocationDelta -=
-            calcReserveHashTableToZero(buckets_num_before); // resizing labelToIdLookup to 0
+        // resizing labelToIdLookup to 0
+        size_t buckets_after = bfIndex->labelToIdLookup.bucket_count();
+        ASSERT_EQ(bfIndex->labelToIdLookup.size(), 0);
+        ASSERT_LE(buckets_after, buckets_num_before);
+        expectedAllocationDelta -= (buckets_num_before - buckets_after) * sizeof(size_t);
         ASSERT_EQ(allocator->getAllocationSize(),
                   expectedAllocationSize + deleteCommandAllocationDelta);
-        ASSERT_LE(expectedAllocationSize + expectedAllocationDelta, allocator->getAllocationSize());
-        ASSERT_GE(expectedAllocationDelta, deleteCommandAllocationDelta);
+        ASSERT_LE(abs(expectedAllocationDelta), abs(deleteCommandAllocationDelta));
+        ASSERT_GE(expectedAllocationSize + expectedAllocationDelta, allocator->getAllocationSize());
 
         memory = VecSimIndex_StatsInfo(bfIndex).memory;
         ASSERT_EQ(allocator->getAllocationSize(), memory);