@@ -235,6 +235,11 @@ class HNSWIndex : public VecSimIndexAbstract<DataType, DistType>,
235235 double getEpsilon () const ;
236236 size_t indexSize () const override ;
237237 size_t indexCapacity () const override ;
238+ /* *
239+ * Checks if the index capacity is full to hint the caller a resize is needed.
240+ * @note Must be called with indexDataGuard locked.
241+ */
242+ size_t isCapacityFull () const ;
238243 size_t getEfConstruction () const ;
239244 size_t getM () const ;
240245 size_t getMaxLevel () const ;
@@ -349,6 +354,11 @@ size_t HNSWIndex<DataType, DistType>::indexCapacity() const {
349354 return this ->maxElements ;
350355}
351356
357+ template <typename DataType, typename DistType>
358+ size_t HNSWIndex<DataType, DistType>::isCapacityFull() const {
359+ return indexSize () == this ->maxElements ;
360+ }
361+
352362template <typename DataType, typename DistType>
353363size_t HNSWIndex<DataType, DistType>::getEfConstruction() const {
354364 return this ->efConstruction ;
@@ -1281,31 +1291,59 @@ template <typename DataType, typename DistType>
12811291void HNSWIndex<DataType, DistType>::resizeIndexCommon(size_t new_max_elements) {
12821292 assert (new_max_elements % this ->blockSize == 0 &&
12831293 " new_max_elements must be a multiple of blockSize" );
1284- this ->log (VecSimCommonStrings::LOG_VERBOSE_STRING,
1285- " Updating HNSW index capacity from %zu to %zu " , this -> maxElements , new_max_elements);
1294+ this ->log (VecSimCommonStrings::LOG_VERBOSE_STRING, " Resizing HNSW index from %zu to %zu " ,
1295+ idToMetaData. capacity () , new_max_elements);
12861296 resizeLabelLookup (new_max_elements);
12871297 visitedNodesHandlerPool.resize (new_max_elements);
1298+ assert (idToMetaData.capacity () == idToMetaData.size ());
12881299 idToMetaData.resize (new_max_elements);
12891300 idToMetaData.shrink_to_fit ();
1290-
1291- maxElements = new_max_elements;
1301+ assert (idToMetaData.capacity () == idToMetaData.size ());
12921302}
12931303
12941304template <typename DataType, typename DistType>
12951305void HNSWIndex<DataType, DistType>::growByBlock() {
1296- size_t new_max_elements = maxElements + this ->blockSize ;
1306+ assert (this ->maxElements % this ->blockSize == 0 );
1307+ assert (this ->maxElements == indexSize ());
1308+ assert (graphDataBlocks.size () == this ->maxElements / this ->blockSize );
1309+ assert (idToMetaData.capacity () == maxElements ||
1310+ idToMetaData.capacity () == maxElements + this ->blockSize );
1311+
1312+ this ->log (VecSimCommonStrings::LOG_VERBOSE_STRING,
1313+ " Updating HNSW index capacity from %zu to %zu" , maxElements,
1314+ maxElements + this ->blockSize );
1315+ maxElements += this ->blockSize ;
1316+
12971317 graphDataBlocks.emplace_back (this ->blockSize , this ->elementGraphDataSize , this ->allocator );
12981318
1299- resizeIndexCommon (new_max_elements);
1319+ if (idToMetaData.capacity () == indexSize ()) {
1320+ resizeIndexCommon (maxElements);
1321+ }
13001322}
13011323
13021324template <typename DataType, typename DistType>
13031325void HNSWIndex<DataType, DistType>::shrinkByBlock() {
1304- assert (maxElements >= this ->blockSize );
1305- size_t new_max_elements = maxElements - this ->blockSize ;
1306- graphDataBlocks.pop_back ();
1326+ assert (this ->maxElements >= this ->blockSize );
1327+ assert (this ->maxElements % this ->blockSize == 0 );
1328+
1329+ if (indexSize () % this ->blockSize == 0 ) {
1330+ this ->log (VecSimCommonStrings::LOG_VERBOSE_STRING,
1331+ " Updating HNSW index capacity from %zu to %zu" , maxElements,
1332+ maxElements - this ->blockSize );
1333+ graphDataBlocks.pop_back ();
1334+ assert (graphDataBlocks.size () == indexSize () / this ->blockSize );
1335+
1336+ // assuming idToMetaData reflects the capacity of the heavy reallocation containers.
1337+ if (indexSize () == 0 ) {
1338+ resizeIndexCommon (0 );
1339+ } else if (idToMetaData.capacity () >= (indexSize () + 2 * this ->blockSize )) {
1340+ assert (this ->maxElements + this ->blockSize == idToMetaData.capacity ());
1341+ resizeIndexCommon (idToMetaData.capacity () - this ->blockSize );
1342+ }
13071343
1308- resizeIndexCommon (new_max_elements);
1344+ // Take the lower bound into account.
1345+ maxElements -= this ->blockSize ;
1346+ }
13091347}
13101348
13111349template <typename DataType, typename DistType>
@@ -1660,9 +1698,7 @@ void HNSWIndex<DataType, DistType>::removeAndSwap(idType internalId) {
16601698 // If we need to free a complete block and there is at least one block between the
16611699 // capacity and the size.
16621700 this ->vectors ->removeElement (curElementCount);
1663- if (curElementCount % this ->blockSize == 0 ) {
1664- shrinkByBlock ();
1665- }
1701+ shrinkByBlock ();
16661702}
16671703
16681704template <typename DataType, typename DistType>
@@ -1738,6 +1774,9 @@ void HNSWIndex<DataType, DistType>::removeVectorInPlace(const idType element_int
17381774template <typename DataType, typename DistType>
17391775HNSWAddVectorState HNSWIndex<DataType, DistType>::storeNewElement(labelType label,
17401776 const void *vector_data) {
1777+ if (isCapacityFull ()) {
1778+ growByBlock ();
1779+ }
17411780 HNSWAddVectorState state{};
17421781
17431782 // Choose randomly the maximum level in which the new element will be in the index.
@@ -1765,14 +1804,6 @@ HNSWAddVectorState HNSWIndex<DataType, DistType>::storeNewElement(labelType labe
17651804 throw e;
17661805 }
17671806
1768- if (indexSize () > indexCapacity ()) {
1769- growByBlock ();
1770- } else if (state.newElementId % this ->blockSize == 0 ) {
1771- // If we had an initial capacity, we might have to allocate new blocks for the graph data.
1772- this ->graphDataBlocks .emplace_back (this ->blockSize , this ->elementGraphDataSize ,
1773- this ->allocator );
1774- }
1775-
17761807 // Insert the new element to the data block
17771808 this ->vectors ->addElement (vector_data, state.newElementId );
17781809 this ->graphDataBlocks .back ().addElement (cur_egd);
0 commit comments