Skip to content

Commit

Permalink
Merge #137319
Browse files Browse the repository at this point in the history
137319: vecstore: implement partition manipulation functions r=mw5h a=mw5h

Stub out the persistent storage vecstore implementation. Implement the methods for partition creation, retrieval and deletion. Add a simple test to ensure that these functions work properly.

Co-authored-by: Matt White <matt.white@cockroachlabs.com>
  • Loading branch information
craig[bot] and mw5h committed Jan 14, 2025
2 parents 30a6267 + 780f5a6 commit 7aa395d
Show file tree
Hide file tree
Showing 8 changed files with 854 additions and 176 deletions.
17 changes: 17 additions & 0 deletions pkg/sql/vecindex/vecstore/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ go_library(
"in_memory_store.go",
"in_memory_txn.go",
"partition.go",
"persistent_store.go",
"persistent_txn.go",
"search_set.go",
"store.go",
"vecstorepb.go",
Expand All @@ -41,6 +43,11 @@ go_library(
importpath = "github.com/cockroachdb/cockroach/pkg/sql/vecindex/vecstore",
visibility = ["//visibility:public"],
deps = [
"//pkg/kv",
"//pkg/kv/kvpb",
"//pkg/kv/kvserver/concurrency/isolation",
"//pkg/roachpb",
"//pkg/sql/sem/builtins",
"//pkg/sql/vecindex/internal",
"//pkg/sql/vecindex/quantize",
"//pkg/util/container/heap",
Expand All @@ -61,16 +68,26 @@ go_test(
"in_memory_store_test.go",
"main_test.go",
"partition_test.go",
"persistent_store_test.go",
"search_set_test.go",
"store_test.go",
"vecstorepb_test.go",
],
embed = [":vecstore"],
deps = [
"//pkg/base",
"//pkg/keys",
"//pkg/roachpb",
"//pkg/security/securityassets",
"//pkg/security/securitytest",
"//pkg/server",
"//pkg/sql/randgen",
"//pkg/sql/rowenc",
"//pkg/sql/sem/tree",
"//pkg/sql/types",
"//pkg/sql/vecindex/internal",
"//pkg/sql/vecindex/quantize",
"//pkg/testutils/serverutils",
"//pkg/util/leaktest",
"//pkg/util/log",
"//pkg/util/num32",
Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/vecindex/vecstore/encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ func EncodePartitionKey(appendTo []byte, key PartitionKey) []byte {
return encoding.EncodeUvarintAscending(appendTo, uint64(key))
}

// EncodedPartitionKeyLen returns the number of bytes needed to encode the
// partition key.
func EncodedPartitionKeyLen(key PartitionKey) int {
return encoding.EncLenUvarintAscending(uint64(key))
}

// EncodeChildKey encodes a child key into the given byte slice. The "appendTo"
// slice is expected to be the prefix shared between all KV entries for a
// partition.
Expand Down
177 changes: 1 addition & 176 deletions pkg/sql/vecindex/vecstore/in_memory_store_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,6 @@ func TestInMemoryStore(t *testing.T) {

ctx := internal.WithWorkspace(context.Background(), &internal.Workspace{})

childKey2 := ChildKey{PartitionKey: 2}
childKey10 := ChildKey{PartitionKey: 10}
childKey20 := ChildKey{PartitionKey: 20}
childKey30 := ChildKey{PartitionKey: 30}
childKey40 := ChildKey{PartitionKey: 40}
childKey50 := ChildKey{PartitionKey: 50}
childKey60 := ChildKey{PartitionKey: 60}

store := NewInMemoryStore(2, 42)
quantizer := quantize.NewUnQuantizer(2)

Expand Down Expand Up @@ -72,174 +64,7 @@ func TestInMemoryStore(t *testing.T) {
}, vectors)
})

t.Run("search empty root partition", func(t *testing.T) {
txn := beginTransaction(ctx, t, store)
defer commitTransaction(ctx, t, store, txn)

searchSet := SearchSet{MaxResults: 2}
partitionCounts := []int{0}
level, err := txn.SearchPartitions(
ctx, []PartitionKey{RootKey}, vector.T{1, 1}, &searchSet, partitionCounts)
require.NoError(t, err)
require.Equal(t, LeafLevel, level)
require.Nil(t, searchSet.PopResults())
require.Equal(t, 0, partitionCounts[0])
})

t.Run("add to root partition", func(t *testing.T) {
txn := beginTransaction(ctx, t, store)
defer commitTransaction(ctx, t, store, txn)

// Add to root partition.
count, err := txn.AddToPartition(ctx, RootKey, vector.T{1, 2}, childKey10)
require.NoError(t, err)
require.Equal(t, 1, count)
count, err = txn.AddToPartition(ctx, RootKey, vector.T{7, 4}, childKey20)
require.NoError(t, err)
require.Equal(t, 2, count)
count, err = txn.AddToPartition(ctx, RootKey, vector.T{4, 3}, childKey30)
require.NoError(t, err)
require.Equal(t, 3, count)

// Add duplicate and expect value to be overwritten
count, err = txn.AddToPartition(ctx, RootKey, vector.T{5, 5}, childKey30)
require.NoError(t, err)
require.Equal(t, 3, count)

// Search root partition.
searchSet := SearchSet{MaxResults: 2}
partitionCounts := []int{0}
level, err := txn.SearchPartitions(
ctx, []PartitionKey{RootKey}, vector.T{1, 1}, &searchSet, partitionCounts)
require.NoError(t, err)
require.Equal(t, Level(1), level)
result1 := SearchResult{QuerySquaredDistance: 1, ErrorBound: 0, CentroidDistance: 2.2361, ParentPartitionKey: 1, ChildKey: childKey10}
result2 := SearchResult{QuerySquaredDistance: 32, ErrorBound: 0, CentroidDistance: 7.0711, ParentPartitionKey: 1, ChildKey: childKey30}
results := searchSet.PopResults()
roundResults(results, 4)
require.Equal(t, SearchResults{result1, result2}, results)
require.Equal(t, 3, partitionCounts[0])
})

var root *Partition
t.Run("get root partition", func(t *testing.T) {
txn := beginTransaction(ctx, t, store)
defer commitTransaction(ctx, t, store, txn)

// Get root partition.
var err error
root, err = txn.GetPartition(ctx, RootKey)
require.NoError(t, err)
require.Equal(t, Level(1), root.Level())
require.Equal(t, []ChildKey{childKey10, childKey20, childKey30}, root.ChildKeys())
require.Equal(t, vector.T{0, 0}, root.Centroid())

// Get partition centroid + full vectors.
results := []VectorWithKey{
{Key: ChildKey{PartitionKey: RootKey}},
{Key: ChildKey{PrimaryKey: PrimaryKey{11}}},
{Key: ChildKey{PrimaryKey: PrimaryKey{0}}},
}
err = txn.GetFullVectors(ctx, results)
require.NoError(t, err)
require.Equal(t, vector.T{0, 0}, results[0].Vector)
require.Equal(t, vector.T{100, 200}, results[1].Vector)
require.Nil(t, results[2].Vector)
})

t.Run("replace root partition", func(t *testing.T) {
txn := beginTransaction(ctx, t, store)
defer commitTransaction(ctx, t, store, txn)

// Replace root partition.
_, err := txn.GetPartition(ctx, RootKey)
require.NoError(t, err)
vectors := vector.T{4, 3}.AsSet()
quantizedSet := quantizer.Quantize(ctx, &vectors)
newRoot := NewPartition(quantizer, quantizedSet, []ChildKey{childKey2}, 2)
require.NoError(t, txn.SetRootPartition(ctx, newRoot))
newRoot, err = txn.GetPartition(ctx, RootKey)
require.NoError(t, err)
require.Equal(t, Level(2), newRoot.Level())
require.Equal(t, []ChildKey{childKey2}, newRoot.ChildKeys())

searchSet := SearchSet{MaxResults: 2}
partitionCounts := []int{0}
level, err := txn.SearchPartitions(
ctx, []PartitionKey{RootKey}, vector.T{2, 2}, &searchSet, partitionCounts)
require.NoError(t, err)
require.Equal(t, Level(2), level)
result3 := SearchResult{QuerySquaredDistance: 5, ErrorBound: 0, CentroidDistance: 0, ParentPartitionKey: 1, ChildKey: childKey2}
require.Equal(t, SearchResults{result3}, searchSet.PopResults())
require.Equal(t, 1, partitionCounts[0])
})

var partitionKey1 PartitionKey
t.Run("insert another partition and update it", func(t *testing.T) {
txn := beginTransaction(ctx, t, store)
defer commitTransaction(ctx, t, store, txn)

_, err := txn.GetPartition(ctx, RootKey)
require.NoError(t, err)
partitionKey1, err = txn.InsertPartition(ctx, root)
require.NoError(t, err)
require.Equal(t, PartitionKey(2), partitionKey1)
count, err := txn.RemoveFromPartition(ctx, partitionKey1, childKey20)
require.NoError(t, err)
require.Equal(t, 2, count)

// Try to remove the same key again.
count, err = txn.RemoveFromPartition(ctx, partitionKey1, childKey20)
require.NoError(t, err)
require.Equal(t, 2, count)

// Add an alternate element and add duplicate, expecting value to be overwritten.
count, err = txn.AddToPartition(ctx, partitionKey1, vector.T{-1, 0}, childKey40)
require.NoError(t, err)
require.Equal(t, 3, count)
count, err = txn.AddToPartition(ctx, partitionKey1, vector.T{1, 1}, childKey40)
require.NoError(t, err)
require.Equal(t, 3, count)

searchSet := SearchSet{MaxResults: 2}
partitionCounts := []int{0}
level, err := txn.SearchPartitions(
ctx, []PartitionKey{partitionKey1}, vector.T{1, 1}, &searchSet, partitionCounts)
require.NoError(t, err)
require.Equal(t, Level(1), level)
result4 := SearchResult{QuerySquaredDistance: 0, ErrorBound: 0, CentroidDistance: 1.4142, ParentPartitionKey: 2, ChildKey: childKey40}
result5 := SearchResult{QuerySquaredDistance: 1, ErrorBound: 0, CentroidDistance: 2.2361, ParentPartitionKey: 2, ChildKey: childKey10}
require.Equal(t, SearchResults{result4, result5}, roundResults(searchSet.PopResults(), 4))
require.Equal(t, 3, partitionCounts[0])
})

t.Run("search multiple partitions at leaf level", func(t *testing.T) {
txn := beginTransaction(ctx, t, store)
defer commitTransaction(ctx, t, store, txn)

_, err := txn.GetPartition(ctx, RootKey)
require.NoError(t, err)

vectors := vector.MakeSet(2)
vectors.Add(vector.T{4, -1})
vectors.Add(vector.T{2, 8})
quantizedSet := quantizer.Quantize(ctx, &vectors)
partition := NewPartition(quantizer, quantizedSet, []ChildKey{childKey50, childKey60}, LeafLevel)
partitionKey2, err := txn.InsertPartition(ctx, partition)
require.NoError(t, err)
require.Equal(t, PartitionKey(3), partitionKey2)

searchSet := SearchSet{MaxResults: 2}
partitionCounts := []int{0, 0}
level, err := txn.SearchPartitions(
ctx, []PartitionKey{partitionKey1, partitionKey2}, vector.T{3, 1}, &searchSet, partitionCounts)
require.NoError(t, err)
require.Equal(t, Level(1), level)
result4 := SearchResult{QuerySquaredDistance: 4, ErrorBound: 0, CentroidDistance: 1.41, ParentPartitionKey: 2, ChildKey: childKey40}
result5 := SearchResult{QuerySquaredDistance: 5, ErrorBound: 0, CentroidDistance: 2.24, ParentPartitionKey: 2, ChildKey: childKey10}
require.Equal(t, SearchResults{result4, result5}, roundResults(searchSet.PopResults(), 2))
require.Equal(t, []int{3, 2}, partitionCounts)
})
commonStoreTests(ctx, t, store, quantizer)

t.Run("delete full vector", func(t *testing.T) {
txn := beginTransaction(ctx, t, store)
Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/vecindex/vecstore/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,19 @@ import (
"os"
"testing"

"github.com/cockroachdb/cockroach/pkg/security/securityassets"
"github.com/cockroachdb/cockroach/pkg/security/securitytest"
"github.com/cockroachdb/cockroach/pkg/server"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
)

//go:generate ../util/leaktest/add-leaktest.sh *_test.go

func TestMain(m *testing.M) {
securityassets.SetLoader(securitytest.EmbeddedAssets)
randutil.SeedForTests()
serverutils.InitTestServerFactory(server.TestServerFactory)

os.Exit(m.Run())
}
64 changes: 64 additions & 0 deletions pkg/sql/vecindex/vecstore/persistent_store.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Copyright 2024 The Cockroach Authors.
//
// Use of this software is governed by the CockroachDB Software License
// included in the /LICENSE file.

package vecstore

import (
"context"

"github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/sql/vecindex/quantize"
)

// PersistentStore implements the Store interface for KV backed vector indices.
type PersistentStore struct {
db *kv.DB // Needed for index maintenance functions
quantizer quantize.Quantizer
rootQuantizer quantize.Quantizer

prefix roachpb.Key
}

var _ Store = (*PersistentStore)(nil)

// NewPersistentStore creates a vecstore.Store interface backed by the KV for a
// single vector index.
func NewPersistentStore(
db *kv.DB, quantizer quantize.Quantizer, prefix roachpb.Key,
) *PersistentStore {
ps := PersistentStore{
db: db,
quantizer: quantizer,
rootQuantizer: quantize.NewUnQuantizer(quantizer.GetOriginalDims()),
prefix: prefix,
}

return &ps
}

// Begin is part of the vecstore.Store interface. Begin creates a new KV
// transaction on behalf of the user and prepares it to operate on the persistent
// vector store.
func (s *PersistentStore) Begin(ctx context.Context) (Txn, error) {
return NewPersistentStoreTxn(s, s.db.NewTxn(ctx, "vecstore.PersistentStore begin transaction")), nil
}

// Commit is part of the vecstore.Store interface. Commit commits the
// underlying KV transaction wrapped by the vecstore.Txn passed in.
func (s *PersistentStore) Commit(ctx context.Context, txn Txn) error {
return txn.(*persistentStoreTxn).kv.Commit(ctx)
}

// Abort is part of the vecstore.Store interface. Abort causes the underlying
// KV transaction wrapped by the passed vecstore.Txn to roll back.
func (s *PersistentStore) Abort(ctx context.Context, txn Txn) error {
return txn.(*persistentStoreTxn).kv.Rollback(ctx)
}

// MergeStats is part of the vecstore.Store interface.
func (s *PersistentStore) MergeStats(ctx context.Context, stats *IndexStats, skipMerge bool) error {
panic("MergeStats() unimplemented")
}
Loading

0 comments on commit 7aa395d

Please sign in to comment.