From fdaf51479b19b2c3243cc25b1c159ec15f9e6fed Mon Sep 17 00:00:00 2001 From: edward_xu Date: Thu, 26 Sep 2024 21:06:35 +0800 Subject: [PATCH 1/2] use fixed seed to improve reproducibility. --- src/search/hnsw_indexer.cc | 9 ++++----- src/search/hnsw_indexer.h | 5 ++++- tests/cppunit/hnsw_index_test.cc | 3 ++- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/src/search/hnsw_indexer.cc b/src/search/hnsw_indexer.cc index f03e4c9580c..cb518df08da 100644 --- a/src/search/hnsw_indexer.cc +++ b/src/search/hnsw_indexer.cc @@ -166,14 +166,13 @@ StatusOr ComputeSimilarity(const VectorItem& left, const VectorItem& rig } } -HnswIndex::HnswIndex(const SearchKey& search_key, HnswVectorFieldMetadata* vector, engine::Storage* storage) +HnswIndex::HnswIndex(const SearchKey& search_key, HnswVectorFieldMetadata* vector, engine::Storage* storage, + std::random_device::result_type seed) : search_key(search_key), metadata(vector), storage(storage), - m_level_normalization_factor(1.0 / std::log(metadata->m)) { - std::random_device rand_dev; - generator = std::mt19937(rand_dev()); -} + generator(std::mt19937(seed)), + m_level_normalization_factor(1.0 / std::log(metadata->m)) {} uint16_t HnswIndex::RandomizeLayer() { std::uniform_real_distribution level_dist(0.0, 1.0); diff --git a/src/search/hnsw_indexer.h b/src/search/hnsw_indexer.h index 30bdf94ac46..aee42e0a13b 100644 --- a/src/search/hnsw_indexer.h +++ b/src/search/hnsw_indexer.h @@ -88,7 +88,10 @@ struct HnswIndex { std::mt19937 generator; double m_level_normalization_factor; - HnswIndex(const SearchKey& search_key, HnswVectorFieldMetadata* vector, engine::Storage* storage); + HnswIndex(const SearchKey& search_key, HnswVectorFieldMetadata* vector, engine::Storage* storage, + std::random_device::result_type seed); + HnswIndex(const SearchKey& search_key, HnswVectorFieldMetadata* vector, engine::Storage* storage) + : HnswIndex(search_key, vector, storage, std::random_device()()) {} static StatusOr> DecodeNodesToVectorItems(const std::vector& node_key, uint16_t level, const SearchKey& search_key, diff --git a/tests/cppunit/hnsw_index_test.cc b/tests/cppunit/hnsw_index_test.cc index e09e9830262..5934ae211dc 100644 --- a/tests/cppunit/hnsw_index_test.cc +++ b/tests/cppunit/hnsw_index_test.cc @@ -37,6 +37,7 @@ struct HnswIndexTest : TestBase { std::string idx_name = "hnsw_test_idx"; std::string key = "vector"; std::unique_ptr hnsw_index; + const std::random_device::result_type seed = 14863; // fixed seed for reproducibility HnswIndexTest() { metadata.vector_type = redis::VectorType::FLOAT64; @@ -44,7 +45,7 @@ struct HnswIndexTest : TestBase { metadata.m = 3; metadata.distance_metric = redis::DistanceMetric::L2; auto search_key = redis::SearchKey(ns, idx_name, key); - hnsw_index = std::make_unique(search_key, &metadata, storage_.get()); + hnsw_index = std::make_unique(search_key, &metadata, storage_.get(), seed); } void TearDown() override { hnsw_index.reset(); } From 05b821d2a8221b1c37ba7ccd3907582426234e2e Mon Sep 17 00:00:00 2001 From: edward_xu Date: Fri, 27 Sep 2024 08:09:28 +0800 Subject: [PATCH 2/2] refactor `HnswIndex` ctor with default argument. --- src/search/hnsw_indexer.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/search/hnsw_indexer.h b/src/search/hnsw_indexer.h index d4cb22269a2..579352a8b22 100644 --- a/src/search/hnsw_indexer.h +++ b/src/search/hnsw_indexer.h @@ -93,9 +93,7 @@ struct HnswIndex { double m_level_normalization_factor; HnswIndex(const SearchKey& search_key, HnswVectorFieldMetadata* vector, engine::Storage* storage, - std::random_device::result_type seed); - HnswIndex(const SearchKey& search_key, HnswVectorFieldMetadata* vector, engine::Storage* storage) - : HnswIndex(search_key, vector, storage, std::random_device()()) {} + std::random_device::result_type seed = std::random_device()()); static StatusOr> DecodeNodesToVectorItems(engine::Context& ctx, const std::vector& node_key,