Skip to content

Commit 74fa6a8

Browse files
committed
Fix incorrect assertion in BM_VecSimBasics::UpdateAtBlockSize benchmark (#819)
* VecSimIndexInterface: introduce indexMetaDataCapacity for testing returns metadata containers capacity * cover for tiered index (cherry picked from commit f24b65a)
1 parent 4390716 commit 74fa6a8

File tree

10 files changed

+65
-5
lines changed

10 files changed

+65
-5
lines changed

src/VecSim/algorithms/brute_force/brute_force.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,8 @@ class BruteForceIndex : public VecSimIndexAbstract<DataType, DistType> {
8080
idToLabelMapping.shrink_to_fit();
8181
resizeLabelLookup(idToLabelMapping.size());
8282
}
83+
84+
size_t indexMetaDataCapacity() const override { return idToLabelMapping.capacity(); }
8385
#endif
8486

8587
protected:

src/VecSim/algorithms/hnsw/hnsw.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,8 @@ class HNSWIndex : public VecSimIndexAbstract<DataType, DistType>,
310310
resizeLabelLookup(idToMetaData.size());
311311
}
312312
}
313+
314+
size_t indexMetaDataCapacity() const override { return idToMetaData.capacity(); }
313315
#endif
314316

315317
protected:

src/VecSim/algorithms/hnsw/hnsw_tiered.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,10 @@ class TieredHNSWIndex : public VecSimTieredIndex<DataType, DistType> {
248248

249249
#ifdef BUILD_TESTS
250250
void getDataByLabel(labelType label, std::vector<std::vector<DataType>> &vectors_output) const;
251+
size_t indexMetaDataCapacity() const override {
252+
return this->backendIndex->indexMetaDataCapacity() +
253+
this->frontendIndex->indexMetaDataCapacity();
254+
}
251255
#endif
252256
};
253257

src/VecSim/algorithms/svs/svs.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -705,6 +705,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
705705

706706
public:
707707
void fitMemory() override {}
708+
size_t indexMetaDataCapacity() const override { return this->indexCapacity(); }
708709
std::vector<std::vector<char>> getStoredVectorDataByLabel(labelType label) const override {
709710

710711
// For compressed/quantized indices, this function is not meaningful

src/VecSim/vec_sim_interface.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,5 +217,15 @@ struct VecSimIndexInterface : public VecsimBaseObject {
217217
}
218218
#ifdef BUILD_TESTS
219219
virtual void fitMemory() = 0;
220+
/**
221+
* @brief get the capacity of the meta data containers.
222+
*
223+
* @return The capacity of the meta data containers in number of elements.
224+
* The value returned from this function may differ from the indexCapacity() function. For
225+
* example, in HNSW, the capacity of the meta data containers is the capacity of the labels
226+
* lookup table, while the capacity of the data containers is the capacity of the vectors
227+
* container.
228+
*/
229+
virtual size_t indexMetaDataCapacity() const = 0;
220230
#endif
221231
};

tests/benchmark/bm_vecsim_basics.h

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,9 @@ void BM_VecSimBasics<index_type_t>::UpdateAtBlockSize(benchmark::State &st) {
318318
// Calculate vectors needed to reach next block boundary
319319
size_t vecs_to_blocksize =
320320
BM_VecSimGeneral::block_size - (initial_index_size % BM_VecSimGeneral::block_size);
321+
size_t initial_index_cap = index->indexMetaDataCapacity();
322+
assert(initial_index_cap == N_VECTORS + vecs_to_blocksize);
323+
321324
assert(vecs_to_blocksize < BM_VecSimGeneral::block_size);
322325
labelType initial_label_count = index->indexLabelCount();
323326
labelType curr_label = initial_label_count;
@@ -342,24 +345,29 @@ void BM_VecSimBasics<index_type_t>::UpdateAtBlockSize(benchmark::State &st) {
342345

343346
// Benchmark loop: repeatedly delete/add same vector to trigger grow-shrink cycles
344347
labelType label_to_update = curr_label - 1;
345-
size_t index_cap = index->indexCapacity();
348+
size_t index_cap = index->indexMetaDataCapacity();
349+
std::cout << "index_cap after adding vectors " << index_cap << std::endl;
350+
assert(index_cap == initial_index_cap + BM_VecSimGeneral::block_size);
351+
346352
for (auto _ : st) {
347353
// Remove the vector directly from hnsw
348354
size_t ret = VecSimIndex_DeleteVector(
349355
GET_INDEX(st.range(0) == INDEX_TIERED_HNSW ? INDEX_HNSW : st.range(0)),
350356
label_to_update);
351357
assert(ret == 1);
352-
assert(index->indexCapacity() == index_cap - BM_VecSimGeneral::block_size);
353-
// Capacity should shrink by one block after deletion
358+
359+
// Capacity should not change
360+
size_t curr_cap = index->indexMetaDataCapacity();
361+
assert(curr_cap == index_cap);
354362
ret = VecSimIndex_AddVector(index, QUERIES[(added_vec_count - 1) % N_QUERIES].data(),
355363
label_to_update);
356364
assert(ret == 1);
357365
BM_VecSimGeneral::mock_thread_pool->thread_pool_wait();
358366
assert(VecSimIndex_IndexSize(
359367
GET_INDEX(st.range(0) == INDEX_TIERED_HNSW ? INDEX_HNSW : st.range(0))) ==
360368
N_VECTORS + added_vec_count);
361-
// Capacity should grow back to original size after addition
362-
assert(index->indexCapacity() == index_cap);
369+
// Capacity should not change
370+
assert(index->indexMetaDataCapacity() == index_cap);
363371
}
364372
assert(VecSimIndex_IndexSize(index) == N_VECTORS + added_vec_count);
365373

tests/unit/test_allocator.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ TYPED_TEST(IndexAllocatorTest, test_bf_index_block_size_1) {
126126

127127
ASSERT_EQ(bfIndex->indexCapacity(), expected_map_containers_size);
128128
ASSERT_EQ(bfIndex->idToLabelMapping.capacity(), expected_map_containers_size);
129+
ASSERT_EQ(bfIndex->indexMetaDataCapacity(), expected_map_containers_size);
129130
ASSERT_EQ(bfIndex->idToLabelMapping.size(), expected_map_containers_size);
130131
ASSERT_GE(bfIndex->labelToIdLookup.bucket_count(), expected_map_containers_size);
131132
};
@@ -536,6 +537,7 @@ TYPED_TEST(IndexAllocatorTest, test_hnsw_reclaim_memory) {
536537
ASSERT_EQ(hnswIndex->vectors->size(), expected_size);
537538

538539
ASSERT_EQ(hnswIndex->idToMetaData.capacity(), expected_map_containers_size);
540+
ASSERT_EQ(hnswIndex->indexMetaDataCapacity(), expected_map_containers_size);
539541
ASSERT_EQ(hnswIndex->idToMetaData.size(), expected_map_containers_size);
540542
ASSERT_GE(hnswIndex->labelLookup.bucket_count(), expected_map_containers_size);
541543
// Also validate that there are no unidirectional connections (these add memory to the

tests/unit/test_hnsw_tiered.cpp

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4364,6 +4364,11 @@ TYPED_TEST(HNSWTieredIndexTestBasic, HNSWResize) {
43644364
ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations);
43654365
ASSERT_EQ(hnsw_index->indexSize(), 1);
43664366
ASSERT_EQ(hnsw_index->indexCapacity(), blockSize);
4367+
ASSERT_EQ(hnsw_index->indexMetaDataCapacity(), blockSize);
4368+
ASSERT_EQ(tiered_index->frontendIndex->indexMetaDataCapacity(), 0);
4369+
ASSERT_EQ(tiered_index->indexMetaDataCapacity(),
4370+
hnsw_index->indexMetaDataCapacity() +
4371+
tiered_index->frontendIndex->indexMetaDataCapacity());
43674372

43684373
// add up to block size
43694374
for (size_t i = 1; i < blockSize; i++) {
@@ -4374,6 +4379,11 @@ TYPED_TEST(HNSWTieredIndexTestBasic, HNSWResize) {
43744379
ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations);
43754380
ASSERT_EQ(hnsw_index->indexSize(), blockSize);
43764381
ASSERT_EQ(hnsw_index->indexCapacity(), blockSize);
4382+
ASSERT_EQ(hnsw_index->indexMetaDataCapacity(), blockSize);
4383+
ASSERT_EQ(tiered_index->frontendIndex->indexMetaDataCapacity(), 0);
4384+
ASSERT_EQ(tiered_index->indexMetaDataCapacity(),
4385+
hnsw_index->indexMetaDataCapacity() +
4386+
tiered_index->frontendIndex->indexMetaDataCapacity());
43774387

43784388
// add one more vector to trigger another resize
43794389
GenerateAndAddVector<TEST_DATA_T>(tiered_index, dim, blockSize);
@@ -4383,6 +4393,11 @@ TYPED_TEST(HNSWTieredIndexTestBasic, HNSWResize) {
43834393
ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations);
43844394
ASSERT_EQ(hnsw_index->indexSize(), blockSize + 1);
43854395
ASSERT_EQ(hnsw_index->indexCapacity(), 2 * blockSize);
4396+
ASSERT_EQ(hnsw_index->indexMetaDataCapacity(), 2 * blockSize);
4397+
ASSERT_EQ(tiered_index->frontendIndex->indexMetaDataCapacity(), 0);
4398+
ASSERT_EQ(tiered_index->indexMetaDataCapacity(),
4399+
hnsw_index->indexMetaDataCapacity() +
4400+
tiered_index->frontendIndex->indexMetaDataCapacity());
43864401

43874402
// delete a vector to shrink data blocks
43884403
ASSERT_EQ(VecSimIndex_DeleteVector(tiered_index, 0), 1) << "Failed to delete vector 0";
@@ -4394,6 +4409,8 @@ TYPED_TEST(HNSWTieredIndexTestBasic, HNSWResize) {
43944409
ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations);
43954410
ASSERT_EQ(hnsw_index->indexSize(), blockSize);
43964411
ASSERT_EQ(hnsw_index->indexCapacity(), blockSize);
4412+
// meta data capacity should not shrink
4413+
ASSERT_EQ(hnsw_index->indexMetaDataCapacity(), 2 * blockSize);
43974414

43984415
// add this vector again and verify lock was acquired to resize
43994416
GenerateAndAddVector<TEST_DATA_T>(tiered_index, dim, 0);
@@ -4402,6 +4419,11 @@ TYPED_TEST(HNSWTieredIndexTestBasic, HNSWResize) {
44024419
ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations);
44034420
ASSERT_EQ(hnsw_index->indexSize(), blockSize + 1);
44044421
ASSERT_EQ(hnsw_index->indexCapacity(), 2 * blockSize);
4422+
ASSERT_EQ(hnsw_index->indexMetaDataCapacity(), 2 * blockSize);
4423+
ASSERT_EQ(tiered_index->frontendIndex->indexMetaDataCapacity(), 0);
4424+
ASSERT_EQ(tiered_index->indexMetaDataCapacity(),
4425+
hnsw_index->indexMetaDataCapacity() +
4426+
tiered_index->frontendIndex->indexMetaDataCapacity());
44054427

44064428
// add up to block size (count = 2 blockSize), the lock shouldn't be acquired because no resize
44074429
// is required
@@ -4412,4 +4434,9 @@ TYPED_TEST(HNSWTieredIndexTestBasic, HNSWResize) {
44124434
ASSERT_EQ(tiered_index->getMainIndexGuardWriteLockCount(), resize_operations);
44134435
ASSERT_EQ(hnsw_index->indexSize(), 2 * blockSize);
44144436
ASSERT_EQ(hnsw_index->indexCapacity(), 2 * blockSize);
4437+
ASSERT_EQ(hnsw_index->indexMetaDataCapacity(), 2 * blockSize);
4438+
ASSERT_EQ(tiered_index->frontendIndex->indexMetaDataCapacity(), 0);
4439+
ASSERT_EQ(tiered_index->indexMetaDataCapacity(),
4440+
hnsw_index->indexMetaDataCapacity() +
4441+
tiered_index->frontendIndex->indexMetaDataCapacity());
44154442
}

tests/unit/test_svs.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -833,6 +833,7 @@ TYPED_TEST(SVSTest, resizeIndex) {
833833
}
834834
// The size (+extra) and the capacity should be equal.
835835
ASSERT_EQ(index->indexCapacity(), VecSimIndex_IndexSize(index) + extra_cap);
836+
ASSERT_EQ(index->indexMetaDataCapacity(), index->indexCapacity());
836837
// The capacity shouldn't be changed.
837838
ASSERT_EQ(index->indexCapacity(), n + extra_cap);
838839

@@ -878,6 +879,7 @@ TYPED_TEST(SVSTest, svs_empty_index) {
878879

879880
// The expected capacity should be 0 for empty index.
880881
ASSERT_EQ(index->indexCapacity(), 0);
882+
ASSERT_EQ(index->indexMetaDataCapacity(), index->indexCapacity());
881883

882884
// Try to remove it again.
883885
VecSimIndex_DeleteVector(index, 1);

tests/unit/test_svs_tiered.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,7 @@ TYPED_TEST(SVSTieredIndexTest, addVector) {
523523
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), 0);
524524
ASSERT_EQ(tiered_index->GetFlatIndex()->indexCapacity(), DEFAULT_BLOCK_SIZE);
525525
ASSERT_EQ(tiered_index->indexCapacity(), DEFAULT_BLOCK_SIZE);
526+
ASSERT_EQ(tiered_index->indexMetaDataCapacity(), tiered_index->indexCapacity());
526527
ASSERT_EQ(tiered_index->GetFlatIndex()->getDistanceFrom_Unsafe(vec_label, vector), 0);
527528
ASSERT_EQ(mock_thread_pool.jobQ.size(), mock_thread_pool.thread_pool_size);
528529

@@ -624,6 +625,7 @@ TYPED_TEST(SVSTieredIndexTest, insertJob) {
624625
? DEFAULT_BLOCK_SIZE
625626
: tiered_index->GetBackendIndex()->indexCapacity();
626627
ASSERT_EQ(tiered_index->indexCapacity(), expected_capacity);
628+
ASSERT_EQ(tiered_index->indexMetaDataCapacity(), tiered_index->indexCapacity());
627629
ASSERT_EQ(tiered_index->GetFlatIndex()->indexCapacity(), 0);
628630
ASSERT_EQ(tiered_index->getDistanceFrom_Unsafe(vec_label, vector), 0);
629631
}

0 commit comments

Comments
 (0)