Skip to content

Commit

Permalink
Fix recover hnsw (#2434)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?

Fix recover cosine hnsw when column data is dumpped.

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
- [x] Test cases
  • Loading branch information
small-turtle-1 authored Jan 8, 2025
1 parent c4d2fd6 commit ce1bd1a
Show file tree
Hide file tree
Showing 3 changed files with 81 additions and 4 deletions.
53 changes: 53 additions & 0 deletions python/restart_test/test_memidx.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,59 @@ def check():
# select count(*) from test_memidx1;
# # result: 13

# recover cose hnsw from mmap column
def test_mem_hnsw_cos(self, infinity_runner: InfinityRunner):
# 100M quota in 7.toml not dump index when insert 8192 rows
row_n = 8192
config1 = "test/data/config/restart_test/test_memidx/7.toml"
uri = common_values.TEST_LOCAL_HOST
infinity_runner.clear()

decorator1 = infinity_runner_decorator_factory(config1, uri, infinity_runner)

@decorator1
def part1(infinity_obj):
db_obj = infinity_obj.get_database("default_db")
table_obj = db_obj.create_table(
"test_memidx1",
{"c1": {"type": "int"}, "c2": {"type": "vector,4,float"}},
)
res = table_obj.create_index(
"idx1",
index.IndexInfo(
"c2",
index.IndexType.Hnsw,
{
"M": "16",
"ef_construction": "20",
"metric": "cosine",
"block_size": "1",
"encode": "lvq",
},
),
)
table_obj.insert([{"c1": 2, "c2": [0.1, 0.2, 0.3, -0.2]} for i in range(row_n)])
# wait for 8192 lines to dump
time.sleep(3)

@decorator1
def part2(infinity_obj):
time.sleep(2)

part1()

data_dir = "/var/infinity/data"
cnt = 0
for path in pathlib.Path(data_dir).rglob("*.col"):
print(path)
cnt += 1
assert cnt == 2
if cnt != 2:
print("Warning: memidx dump not triggered. skip this test")
return

part2()

def test_mem_ivf(self, infinity_runner: InfinityRunner):
config1 = "test/data/config/restart_test/test_memidx/1.toml"
config2 = "test/data/config/restart_test/test_memidx/2.toml"
Expand Down
10 changes: 6 additions & 4 deletions src/storage/knn_index/knn_hnsw/data_store/lvq_vec_store.cppm
Original file line number Diff line number Diff line change
Expand Up @@ -110,20 +110,22 @@ public:
}

void CompressTo(const DataType *src, LVQData *dest) const {
UniquePtr<DataType[]> normalized;
if (normalize_) {
normalized = MakeUniqueForOverwrite<DataType[]>(this->dim_);
DataType norm = 0;
DataType *src_without_const = const_cast<DataType *>(src);
for (SizeT j = 0; j < this->dim_; ++j) {
norm += src_without_const[j] * src_without_const[j];
norm += src[j] * src[j];
}
norm = std::sqrt(norm);
if (norm == 0) {
std::fill(dest->compress_vec_, dest->compress_vec_ + this->dim_, 0);
std::fill(normalized.get(), normalized.get() + this->dim_, 0);
} else {
for (SizeT j = 0; j < this->dim_; ++j) {
src_without_const[j] /= norm;
normalized[j] = src[j] / norm;
}
}
src = normalized.get();
}

CompressType *compress = dest->compress_vec_;
Expand Down
22 changes: 22 additions & 0 deletions test/data/config/restart_test/test_memidx/7.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
[general]
version = "0.5.0"
time_zone = "utc-8"

[network]
[log]
log_to_stdout = true
log_level = "trace"

[storage]
persistence_dir = ""
optimize_interval = "0s"
cleanup_interval = "0s"
compact_interval = "0s"

[buffer]
memindex_memory_quota = "100MB"
[wal]
delta_checkpoint_interval = "1s"
full_checkpoint_interval = "0s"

[resource]

0 comments on commit ce1bd1a

Please sign in to comment.