Skip to content
25 changes: 8 additions & 17 deletions src/VecSim/algorithms/svs/svs.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
}

int deleteVectorsImpl(const labelType *labels, size_t n) {
if (indexSize() == 0) {
if (indexLabelCount() == 0) {
return 0;
}

Expand Down Expand Up @@ -280,22 +280,13 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
return;

// SVS index instance should not be empty
if (indexSize() == 0) {
if (indexLabelCount() == 0) {
this->impl_.reset();
num_marked_deleted = 0;
return;
}

num_marked_deleted += n;
// consolidate index if number of changes bigger than 50% of index size
const float consolidation_threshold = .5f;
// indexSize() should not be 0 see above lines
assert(indexSize() > 0);
// Note: if this function is called after deleteVectorsImpl, indexSize is already updated
if (static_cast<float>(num_marked_deleted) / indexSize() > consolidation_threshold) {
impl_->consolidate();
num_marked_deleted = 0;
}
}

bool isTwoLevelLVQ(const VecSimSvsQuantBits &qbits) {
Expand Down Expand Up @@ -330,7 +321,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl

~SVSIndex() = default;

size_t indexSize() const override { return impl_ ? impl_->size() : 0; }
size_t indexSize() const override { return indexStorageSize(); }

size_t indexStorageSize() const override { return impl_ ? impl_->view_data().size() : 0; }

Expand All @@ -342,7 +333,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
if constexpr (isMulti) {
return impl_ ? impl_->labelcount() : 0;
} else {
return indexSize();
return impl_ ? impl_->size() : 0;
}
}

Expand Down Expand Up @@ -524,7 +515,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
VecSimQueryParams *queryParams) const override {
auto rep = new VecSimQueryReply(this->allocator);
this->lastMode = STANDARD_KNN;
if (k == 0 || this->indexSize() == 0) {
if (k == 0 || this->indexLabelCount() == 0) {
return rep;
}

Expand Down Expand Up @@ -569,7 +560,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
VecSimQueryParams *queryParams) const override {
auto rep = new VecSimQueryReply(this->allocator);
this->lastMode = RANGE_QUERY;
if (radius == 0 || this->indexSize() == 0) {
if (radius == 0 || this->indexLabelCount() == 0) {
return rep;
}

Expand Down Expand Up @@ -642,7 +633,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
// take ownership of the blob copy and pass it to the batch iterator.
auto *queryBlobCopyPtr = queryBlobCopy.release();
// Ownership of queryBlobCopy moves to VecSimBatchIterator that will free it at the end.
if (indexSize() == 0) {
if (indexLabelCount() == 0) {
return new (this->getAllocator())
NullSVS_BatchIterator(queryBlobCopyPtr, queryParams, this->getAllocator());
} else {
Expand All @@ -652,7 +643,7 @@ class SVSIndex : public VecSimIndexAbstract<svs_details::vecsim_dt<DataType>, fl
}

bool preferAdHocSearch(size_t subsetSize, size_t k, bool initial_check) const override {
size_t index_size = this->indexSize();
size_t index_size = this->indexLabelCount();

// Calculate the ratio of the subset size to the total index size.
double subsetRatio = (index_size == 0) ? 0.f : static_cast<double>(subsetSize) / index_size;
Expand Down
2 changes: 1 addition & 1 deletion tests/flow/test_svs_tiered.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ def test_recall_after_deletion(test_logger):
test_logger.info(f"Done deleting half of the index")
assert index.svs_label_count() >= (num_elements // 2) - indices_ctx.tiered_svs_params.updateTriggerThreshold
assert index.svs_label_count() <= (num_elements // 2) + indices_ctx.tiered_svs_params.updateTriggerThreshold
assert svs_index.index_size() == (num_elements // 2)
assert svs_index.index_size() == num_elements

# Create a list of tuples of the vectors that left.
vectors = [vectors[i] for i in range(1, num_elements, 2)]
Expand Down
44 changes: 26 additions & 18 deletions tests/unit/test_svs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -262,20 +262,18 @@ TYPED_TEST(SVSTest, svs_bulk_vectors_add_delete_test) {
runTopKSearchTest(index, query, k, verify_res, nullptr, BY_ID);

// Delete almost all vectors
// First delete small amount of vector to prevent consolidation.
const size_t first_batch_deletion = 10;
ASSERT_EQ(svs_index->deleteVectors(ids.data(), first_batch_deletion), first_batch_deletion);
ASSERT_EQ(VecSimIndex_IndexSize(index), n - first_batch_deletion);
ASSERT_EQ(svs_index->getNumMarkedDeleted(), first_batch_deletion);

// Now delete enough vectors to trigger consolidation.
const size_t keep_num = 1;
ASSERT_EQ(svs_index->deleteVectors(ids.data() + first_batch_deletion,
n - keep_num - first_batch_deletion),
n - keep_num - first_batch_deletion);
ASSERT_EQ(VecSimIndex_IndexSize(index), keep_num);
ASSERT_EQ(svs_index->getNumMarkedDeleted(), 0);
ASSERT_EQ(svs_index->deleteVectors(ids.data(), n - keep_num), n - keep_num);
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
ASSERT_EQ(index->indexLabelCount(), keep_num);
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n - keep_num);

// Delete rest of the vectors
// num_marked_deleted should reset.
ASSERT_EQ(svs_index->deleteVectors(ids.data() + n - keep_num, keep_num), keep_num);
ASSERT_EQ(VecSimIndex_IndexSize(index), 0);
ASSERT_EQ(index->indexLabelCount(), 0);
ASSERT_EQ(svs_index->getNumMarkedDeleted(), 0);
VecSimIndex_Free(index);
}

Expand Down Expand Up @@ -453,14 +451,18 @@ TYPED_TEST(SVSTest, svs_reindexing_same_vector) {
for (size_t i = 0; i < n - 1; i++) {
VecSimIndex_DeleteVector(index, i);
}
ASSERT_EQ(VecSimIndex_IndexSize(index), 1);
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
ASSERT_EQ(index->indexLabelCount(), 1);
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n - 1);

// Reinsert the same vectors under the same ids.
for (size_t i = 0; i < n; i++) {
// i / 10 is in integer (take the "floor value).
GenerateAndAddVector<TEST_DATA_T>(index, dim, i, i / 10);
}
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
ASSERT_EQ(VecSimIndex_IndexSize(index), 2 * n);
ASSERT_EQ(index->indexLabelCount(), n);
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n);

// Run the same query again.
runTopKSearchTest(index, query, k, verify_res);
Expand Down Expand Up @@ -513,14 +515,18 @@ TYPED_TEST(SVSTest, svs_reindexing_same_vector_different_id) {
for (size_t i = 0; i < n - 1; i++) {
VecSimIndex_DeleteVector(index, i);
}
ASSERT_EQ(VecSimIndex_IndexSize(index), 1);
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
ASSERT_EQ(index->indexLabelCount(), 1);
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n - 1);

// Reinsert the same vectors under different ids than before.
for (size_t i = 0; i < n; i++) {
GenerateAndAddVector<TEST_DATA_T>(index, dim, i + 10,
i / 10); // i / 10 is in integer (take the "floor" value).
}
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
ASSERT_EQ(VecSimIndex_IndexSize(index), 2 * n);
ASSERT_EQ(index->indexLabelCount(), n);
ASSERT_EQ(svs_index->getNumMarkedDeleted(), n);

// Run the same query again.
auto verify_res_different_id = [&](size_t id, double score, size_t index) {
Expand Down Expand Up @@ -922,7 +928,8 @@ TYPED_TEST(SVSTest, test_delete_vector) {

// Here the shift should happen.
VecSimIndex_DeleteVector(index, 1);
ASSERT_EQ(VecSimIndex_IndexSize(index), n - 1);
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
ASSERT_EQ(index->indexLabelCount(), n - 1);

TEST_DATA_T query[] = {0.0, 0.0};
auto verify_res = [&](size_t id, double score, size_t index) {
Expand Down Expand Up @@ -3026,7 +3033,8 @@ TYPED_TEST(SVSTest, logging_runtime_params) {
index->addVector(v[i].data(), ids[i]);
}
ASSERT_EQ(svs_index->getNumMarkedDeleted(), 10);
ASSERT_EQ(VecSimIndex_IndexSize(index), n);
ASSERT_EQ(VecSimIndex_IndexSize(index), n + 10);
ASSERT_EQ(index->indexLabelCount(), n);

float query[] = {50, 50, 50, 50};
auto verify_res = [&](size_t id, double score, size_t index) { EXPECT_EQ(id, (index + 45)); };
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/test_svs_multi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -536,7 +536,7 @@ TYPED_TEST(SVSMultiTest, test_dynamic_svs_info_iterator) {
VecSimIndex_DeleteVector(index, 0);
info = VecSimIndex_DebugInfo(index);
infoIter = VecSimIndex_DebugInfoIterator(index);
ASSERT_EQ(2, info.commonInfo.indexSize);
ASSERT_EQ(4, info.commonInfo.indexSize);
ASSERT_EQ(1, info.commonInfo.indexLabelCount);
compareSVSIndexInfoToIterator(info, infoIter);
VecSimDebugInfoIterator_Free(infoIter);
Expand Down
76 changes: 48 additions & 28 deletions tests/unit/test_svs_tiered.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -894,7 +894,7 @@ TYPED_TEST(SVSTieredIndexTestBasic, KNNSearch) {
VecSimIndex_DeleteVector(svs_index, i);
}
ASSERT_EQ(flat_index->indexSize(), n * 2 / 3);
ASSERT_EQ(svs_index->indexSize(), n / 2);
ASSERT_EQ(svs_index->indexLabelCount(), n / 2);
k = n * 2 / 3;
cur_memory_usage = allocator->getAllocationSize();
runTopKSearchTest(tiered_index, query_0, k, ver_res_0);
Expand All @@ -909,7 +909,7 @@ TYPED_TEST(SVSTieredIndexTestBasic, KNNSearch) {
VecSimIndex_DeleteVector(flat_index, i);
}
ASSERT_EQ(flat_index->indexSize(), n / 6);
ASSERT_EQ(svs_index->indexSize(), n / 2);
ASSERT_EQ(svs_index->indexLabelCount(), n / 2);
k = n / 4;
cur_memory_usage = allocator->getAllocationSize();
runTopKSearchTest(tiered_index, query_0, k, ver_res_0);
Expand All @@ -923,7 +923,7 @@ TYPED_TEST(SVSTieredIndexTestBasic, KNNSearch) {
GenerateAndAddVector<TEST_DATA_T>(flat_index, dim, i, i);
}
ASSERT_EQ(flat_index->indexSize(), n * 2 / 3);
ASSERT_EQ(svs_index->indexSize(), 0);
ASSERT_EQ(svs_index->indexLabelCount(), 0);
k = n / 3;
cur_memory_usage = allocator->getAllocationSize();
runTopKSearchTest(tiered_index, query_0, k, ver_res_0);
Expand Down Expand Up @@ -1133,27 +1133,33 @@ TYPED_TEST(SVSTieredIndexTestBasic, markedDeleted) {

// Override a vector while in the backend
GenerateAndAddVector<TEST_DATA_T>(tiered_index, dim, 1);
ASSERT_EQ(tiered_index->indexSize(), n);
ASSERT_EQ(tiered_index->indexSize(), n + 1);
ASSERT_EQ(tiered_index->indexLabelCount(), n);
ASSERT_EQ(tiered_index->GetBackendIndex()->indexLabelCount(), n - 1);
ASSERT_EQ(tiered_index->getNumMarkedDeleted(), 1);
ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), 1);

ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), n - 1);
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), n);
ASSERT_EQ(tiered_index->GetFlatIndex()->indexSize(), 1);

// Delete the overriden vector
VecSimIndex_DeleteVector(tiered_index, 1);
ASSERT_EQ(tiered_index->indexSize(), n - 1);
ASSERT_EQ(tiered_index->indexSize(), n);
ASSERT_EQ(tiered_index->getNumMarkedDeleted(), 1);
ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), 1);
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), n - 1);
ASSERT_EQ(tiered_index->indexLabelCount(), n - 1);
ASSERT_EQ(tiered_index->GetBackendIndex()->indexLabelCount(), n - 1);
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), n);
ASSERT_EQ(tiered_index->GetFlatIndex()->indexSize(), 0);

// Delete another arbirtrary vector
// Delete another arbitrary vector
VecSimIndex_DeleteVector(tiered_index, 0);
ASSERT_EQ(tiered_index->indexSize(), n - 2);
ASSERT_EQ(tiered_index->indexSize(), n);
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), n);
ASSERT_EQ(tiered_index->GetBackendIndex()->indexLabelCount(), n - 2);
ASSERT_EQ(tiered_index->indexLabelCount(), n - 2);
ASSERT_EQ(tiered_index->getNumMarkedDeleted(), 2);
ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), 2);
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), n - 2);
ASSERT_EQ(tiered_index->GetFlatIndex()->indexSize(), 0);

// Empty Index
Expand All @@ -1167,6 +1173,8 @@ TYPED_TEST(SVSTieredIndexTestBasic, markedDeleted) {
ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), 0);
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), 0);
ASSERT_EQ(tiered_index->GetFlatIndex()->indexSize(), 0);
ASSERT_EQ(tiered_index->GetBackendIndex()->indexLabelCount(), 0);
ASSERT_EQ(tiered_index->indexLabelCount(), 0);
}

TYPED_TEST(SVSTieredIndexTestBasic, deleteVectorMulti) {
Expand Down Expand Up @@ -2917,18 +2925,26 @@ TYPED_TEST(SVSTieredIndexTest, writeInPlaceMode) {
ASSERT_EQ(tiered_index->GetFlatIndex()->indexSize(), 0);

// Overwrite inplace - only in single-value mode
size_t expected_marked_deleted = 0;
if (!TypeParam::isMulti()) {
TEST_DATA_T overwritten_vec[] = {1, 1, 1, 1};
tiered_index->addVector(overwritten_vec, vec_label);
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), 2);
expected_marked_deleted++;
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), 3);
ASSERT_EQ(tiered_index->indexSize(), 3);
ASSERT_EQ(tiered_index->indexLabelCount(), 2);
ASSERT_EQ(tiered_index->GetFlatIndex()->indexSize(), 0);
ASSERT_EQ(tiered_index->getDistanceFrom_Unsafe(vec_label, overwritten_vec), 0);
ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), expected_marked_deleted);
}
// Validate that the vector is removed in place.
// Validate that the vector is marked as deleted.
tiered_index->deleteVector(vec_label);
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), 1);
ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), 0);
EXPECT_EQ(tiered_index->statisticInfo().numberOfMarkedDeleted, 0);
expected_marked_deleted++;
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), TypeParam::isMulti() ? 2 : 3);
ASSERT_EQ(tiered_index->indexLabelCount(), 1);

ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), expected_marked_deleted);
EXPECT_EQ(tiered_index->statisticInfo().numberOfMarkedDeleted, expected_marked_deleted);
}

TYPED_TEST(SVSTieredIndexTest, switchWriteModes) {
Expand Down Expand Up @@ -3024,7 +3040,7 @@ TYPED_TEST(SVSTieredIndexTest, switchWriteModes) {
mock_thread_pool.thread_pool_join();
// Verify that vectors were moved to SVS as expected
auto sz_f = tiered_index->GetFlatIndex()->indexSize();
auto sz_b = tiered_index->GetBackendIndex()->indexSize();
auto sz_b = tiered_index->GetBackendIndex()->indexLabelCount();
EXPECT_LE(sz_f, this->getUpdateThreshold());
if (TypeParam::isMulti()) {
ASSERT_EQ(tiered_index->indexLabelCount(), 2 * n_labels);
Expand Down Expand Up @@ -3097,25 +3113,23 @@ TYPED_TEST(SVSTieredIndexTestBasic, runGCAPI) {
ASSERT_EQ(tiered_index->indexSize(), n);
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), n);

// Delete all the vectors and wait for the thread pool to finish running the update jobs.
for (size_t i = 0; i < threshold; i++) {
tiered_index->deleteVector(i);
}
ASSERT_EQ(tiered_index->GetSVSIndex()->getNumMarkedDeleted(), threshold);
EXPECT_EQ(tiered_index->statisticInfo().numberOfMarkedDeleted, threshold);

// Launch the BG threads loop that takes jobs from the queue and executes them.
mock_thread_pool.init_threads();
mock_thread_pool.thread_pool_join();

ASSERT_EQ(tiered_index->indexSize(), n - threshold);
ASSERT_EQ(tiered_index->indexSize(), n);
ASSERT_EQ(tiered_index->indexLabelCount(), n - threshold);
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), n);
ASSERT_EQ(tiered_index->GetBackendIndex()->indexLabelCount(), n - threshold);
ASSERT_EQ(tiered_index->GetSVSIndex()->indexStorageSize(), n);
ASSERT_EQ(mock_thread_pool.jobQ.size(), 0);
auto size_before_gc = tiered_index->getAllocationSize();

// Run the GC API call, expect that we will clean up the SVS index.
VecSimTieredIndex_GC(tiered_index);
ASSERT_EQ(tiered_index->indexSize(), n - threshold);
ASSERT_EQ(tiered_index->GetBackendIndex()->indexSize(), n - threshold);
ASSERT_EQ(tiered_index->GetSVSIndex()->indexStorageSize(), n - threshold);
auto size_after_gc = tiered_index->getAllocationSize();
// Expect that the size of the index was reduced.
Expand Down Expand Up @@ -3177,7 +3191,7 @@ TYPED_TEST(SVSTieredIndexTestBasic, switchDeleteModes) {
mock_thread_pool.thread_pool_join();
// Verify that vectors were moved to SVS as expected
auto sz_f = tiered_index->GetFlatIndex()->indexSize();
auto sz_b = tiered_index->GetBackendIndex()->indexSize();
auto sz_b = tiered_index->GetBackendIndex()->indexLabelCount();
EXPECT_LE(sz_f, update_threshold);
EXPECT_EQ(sz_f + sz_b, n);
}
Expand Down Expand Up @@ -3270,14 +3284,17 @@ TYPED_TEST(SVSTieredIndexTestBasic, testSwapJournalSingle) {
// For single-value index, following vectors should be in the index:
// 0:deleted, 1: 10, 2: deleted, 3:3, ..., n-2:deleted n-1: 10(n-1), n+1: n+1;
// total: n-2 vectors and labels
ASSERT_EQ(tiered_index->indexSize(), n - 2);
ASSERT_EQ(tiered_index->indexLabelCount(), n - 2);
EXPECT_EQ(tiered_index->GetBackendIndex()->indexLabelCount(), n - 5);

// We added 3 vectors to the flat index and removed 5 vectors from the backend index.
// Backend index: 0:deleted, 1:deleted, 2:deleted, 3:3, ..., n-2:deleted, n-1:deleted;
// total: n-5
EXPECT_EQ(tiered_index->GetBackendIndex()->indexSize(), n - 5);
EXPECT_EQ(tiered_index->GetBackendIndex()->indexSize(), n);
ASSERT_EQ(tiered_index->getNumMarkedDeleted(), 5);
// Frontend index: 1:10, n-1:10(n-1), n+1:n+1
ASSERT_EQ(tiered_index->GetFlatIndex()->indexSize(), 3);
ASSERT_EQ(tiered_index->indexSize(), n + tiered_index->GetFlatIndex()->indexSize());

double abs_err = 1e-2; // Allow a larger relative error for quantization.
TEST_DATA_T expected_vector[dim];
Expand Down Expand Up @@ -3401,13 +3418,16 @@ TYPED_TEST(SVSTieredIndexTestBasic, testSwapJournalMulti) {
// For multi-value index, following vectors should be in the index:
// 0: deleted, 1: (1,10), 2: deleted, 3:3, ..., n-2: deleted n-1: 10(n-1), n+1: n+1;
// total: n-2 labels, n-1 vectors
ASSERT_EQ(tiered_index->indexSize(), n - 1);
ASSERT_EQ(tiered_index->indexLabelCount(), n - 2);
EXPECT_EQ(tiered_index->GetBackendIndex()->indexLabelCount(), n - 4);

// We added 3 vectors to the flat index and removed 4 vectors from the backend index.
// Backend index: 0:deleted, 1:1, 2:deleted, 3:3, ..., n-2:deleted, n-1:deleted; total: n-4
EXPECT_EQ(tiered_index->GetBackendIndex()->indexSize(), n - 4);
EXPECT_EQ(tiered_index->GetBackendIndex()->indexSize(), n);
ASSERT_EQ(tiered_index->getNumMarkedDeleted(), 4);
// Frontend index: 1:10, n-1:10(n-1), n+1:n+1
ASSERT_EQ(tiered_index->GetFlatIndex()->indexSize(), 3);
ASSERT_EQ(tiered_index->indexSize(), n + tiered_index->GetFlatIndex()->indexSize());

double abs_err = 1e-2; // Allow a larger relative error for quantization.
TEST_DATA_T expected_vector[dim];
Expand Down
Loading