Skip to content

Commit

Permalink
vecstore: implement partition manipulation functions
Browse files Browse the repository at this point in the history
Add persistent storage to vecstore. Implement the methods for partition
creation, retrieval and deletion. Add a simple test to ensure these
methods are functioning.
  • Loading branch information
mw5h committed Dec 18, 2024
1 parent 4b3782a commit 1c10b1f
Show file tree
Hide file tree
Showing 9 changed files with 430 additions and 9 deletions.
2 changes: 1 addition & 1 deletion pkg/sql/vecindex/quantize/quantizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ type Quantizer interface {

// NewQuantizedVectorSet returns a new empty vector set preallocated to the
// number of vectors specified.
NewQuantizedVectorSet(size int) QuantizedVectorSet
NewQuantizedVectorSet(size int, centroid vector.T) QuantizedVectorSet

// EstimateSquaredDistances returns the estimated squared distances of the
// query vector from each data vector represented in the given quantized
Expand Down
8 changes: 4 additions & 4 deletions pkg/sql/vecindex/quantize/rabitq.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,11 +143,11 @@ func (q *raBitQuantizer) QuantizeInSet(
}

// NewQuantizedVectorSet implements the Quantizer interface
func (q *raBitQuantizer) NewQuantizedVectorSet(size int) QuantizedVectorSet {
dataBuffer := make([]uint64, 0, size*RaBitQCodeSetWidth(q.GetOriginalDims()))
func (q *raBitQuantizer) NewQuantizedVectorSet(size int, centroid vector.T) QuantizedVectorSet {
dataBuffer := make([]uint64, 0, size*RaBitQCodeSetWidth(q.GetRandomDims()))
raBitQuantizedVectorSet := &RaBitQuantizedVectorSet{
Centroid: make([]float32, 0, q.GetOriginalDims()),
Codes: MakeRaBitQCodeSetFromRawData(dataBuffer, q.GetOriginalDims()),
Centroid: centroid,
Codes: MakeRaBitQCodeSetFromRawData(dataBuffer, q.GetRandomDims()),
CodeCounts: make([]uint32, 0, size),
CentroidDistances: make([]float32, 0, size),
DotProducts: make([]float32, 0, size),
Expand Down
8 changes: 4 additions & 4 deletions pkg/sql/vecindex/quantize/unquantizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -76,11 +76,11 @@ func (q *unQuantizer) QuantizeInSet(
}

// NewQuantizedVectorSet implements the Quantizer interface
func (q *unQuantizer) NewQuantizedVectorSet(size int) QuantizedVectorSet {
dataBuffer := make([]float32, 0, size*q.GetOriginalDims())
func (q *unQuantizer) NewQuantizedVectorSet(size int, centroid vector.T) QuantizedVectorSet {
dataBuffer := make([]float32, 0, size*q.GetRandomDims())
unquantizedSet := &UnQuantizedVectorSet{
Centroid: make([]float32, q.GetRandomDims()),
Vectors: vector.MakeSetFromRawData(dataBuffer, q.GetOriginalDims()),
Centroid: centroid,
Vectors: vector.MakeSetFromRawData(dataBuffer, q.GetRandomDims()),
}
return unquantizedSet
}
Expand Down
16 changes: 16 additions & 0 deletions pkg/sql/vecindex/vecstore/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ go_library(
"in_memory_store.go",
"in_memory_txn.go",
"partition.go",
"persistent_store.go",
"persistent_txn.go",
"search_set.go",
"store.go",
"vecstorepb.go",
Expand All @@ -41,6 +43,11 @@ go_library(
importpath = "github.com/cockroachdb/cockroach/pkg/sql/vecindex/vecstore",
visibility = ["//visibility:public"],
deps = [
"//pkg/kv",
"//pkg/kv/kvpb",
"//pkg/kv/kvserver/concurrency/isolation",
"//pkg/roachpb",
"//pkg/sql/sem/builtins",
"//pkg/sql/vecindex/internal",
"//pkg/sql/vecindex/quantize",
"//pkg/util/container/heap",
Expand All @@ -61,16 +68,25 @@ go_test(
"in_memory_store_test.go",
"main_test.go",
"partition_test.go",
"persistent_store_test.go",
"search_set_test.go",
"vecstorepb_test.go",
],
embed = [":vecstore"],
deps = [
"//pkg/base",
"//pkg/keys",
"//pkg/roachpb",
"//pkg/security/securityassets",
"//pkg/security/securitytest",
"//pkg/server",
"//pkg/sql/randgen",
"//pkg/sql/rowenc",
"//pkg/sql/sem/tree",
"//pkg/sql/types",
"//pkg/sql/vecindex/internal",
"//pkg/sql/vecindex/quantize",
"//pkg/testutils/serverutils",
"//pkg/util/leaktest",
"//pkg/util/log",
"//pkg/util/num32",
Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/vecindex/vecstore/encoding.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ func EncodePartitionKey(appendTo []byte, key PartitionKey) []byte {
return encoding.EncodeUvarintAscending(appendTo, uint64(key))
}

// EncodedPartitionKeyLen returns the number of bytes needed to encode the
// partition key.
func EncodedPartitionKeyLen(key PartitionKey) int {
return encoding.EncLenUvarintAscending(uint64(key))
}

// EncodeChildKey encodes a child key into the given byte slice. The "appendTo"
// slice is expected to be the prefix shared between all KV entries for a
// partition.
Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/vecindex/vecstore/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,19 @@ import (
"os"
"testing"

"github.com/cockroachdb/cockroach/pkg/security/securityassets"
"github.com/cockroachdb/cockroach/pkg/security/securitytest"
"github.com/cockroachdb/cockroach/pkg/server"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
)

//go:generate ../util/leaktest/add-leaktest.sh *_test.go

func TestMain(m *testing.M) {
securityassets.SetLoader(securitytest.EmbeddedAssets)
randutil.SeedForTests()
serverutils.InitTestServerFactory(server.TestServerFactory)

os.Exit(m.Run())
}
62 changes: 62 additions & 0 deletions pkg/sql/vecindex/vecstore/persistent_store.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright 2024 The Cockroach Authors.
//
// Use of this software is governed by the CockroachDB Software License
// included in the /LICENSE file.

package vecstore

import (
"context"

"github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/sql/vecindex/quantize"
)

type PersistentStore struct {
db *kv.DB // Needed for index maintenance functions
quantizer quantize.Quantizer
rootQuantizer quantize.Quantizer
prefix roachpb.Key
}

var _ Store = (*PersistentStore)(nil)

// NewPersistentStore creates a vecstore.Store interface backed by the KV for a
// single vector index.
func NewPersistentStore(
db *kv.DB, quantizer quantize.Quantizer, prefix roachpb.Key,
) *PersistentStore {
ps := PersistentStore{
db: db,
quantizer: quantizer,
rootQuantizer: quantize.NewUnQuantizer(quantizer.GetOriginalDims()),
prefix: prefix,
}

return &ps
}

// Begin is part of the vecstore.Store interface. Begin creates a new KV
// transaction on behalf of the user and prepares it to operate on the persistent
// vector store.
func (s *PersistentStore) Begin(ctx context.Context) (Txn, error) {
return NewPersistentStoreTxn(s, s.db.NewTxn(ctx, "vecstore.PersistentStore begin transaction")), nil
}

// Commit is part of the vecstore.Store interface. Commit commits the
// underlying KV transaction wrapped by the vecstore.Txn passed in.
func (s *PersistentStore) Commit(ctx context.Context, txn Txn) error {
return txn.(*PersistentStoreTxn).kv.Commit(ctx)
}

// Abort is part of the vecstore.Store interface. Abort causes the underlying
// KV transaction wrapped by the passed vecstore.Txn to roll back.
func (s *PersistentStore) Abort(ctx context.Context, txn Txn) error {
return txn.(*PersistentStoreTxn).kv.Rollback(ctx)
}

// MergeStats is part of the vecstore.Store interface.
func (s *PersistentStore) MergeStats(ctx context.Context, stats *IndexStats, skipMerge bool) error {
panic("MergeStats() unimplemented")
}
93 changes: 93 additions & 0 deletions pkg/sql/vecindex/vecstore/persistent_store_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
// Copyright 2024 The Cockroach Authors.
//
// Use of this software is governed by the CockroachDB Software License
// included in the /LICENSE file.

package vecstore

import (
"context"
"testing"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/sql/rowenc"
"github.com/cockroachdb/cockroach/pkg/sql/vecindex/internal"
"github.com/cockroachdb/cockroach/pkg/sql/vecindex/quantize"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/cockroachdb/cockroach/pkg/util/vector"
"github.com/stretchr/testify/require"
)

func TestPersistentStore(t *testing.T) {
defer leaktest.AfterTest(t)()

ctx := internal.WithWorkspace(context.Background(), &internal.Workspace{})
s, _, kvDB := serverutils.StartServer(t, base.TestServerArgs{DefaultTestTenant: base.TestIsForStuffThatShouldWorkWithSecondaryTenantsButDoesntYet(42)})
defer s.Stopper().Stop(ctx)

childKey2 := ChildKey{PartitionKey: 2}
childKey10 := ChildKey{PartitionKey: 10}
childKey20 := ChildKey{PartitionKey: 20}
primaryKey200 := ChildKey{PrimaryKey: PrimaryKey{2, 00}}
primaryKey300 := ChildKey{PrimaryKey: PrimaryKey{3, 00}}
primaryKey400 := ChildKey{PrimaryKey: PrimaryKey{4, 00}}

ten5Codec := keys.MakeSQLCodec(roachpb.MustMakeTenantID(5))
prefix := rowenc.MakeIndexKeyPrefix(ten5Codec, 500, 42)
quantizer := quantize.NewUnQuantizer(2)
store := NewPersistentStore(kvDB, quantizer, prefix)

t.Run("insert a root partition into the store and read it back", func(t *testing.T) {
txn := beginTransaction(ctx, t, store)
defer commitTransaction(ctx, t, store, txn)

vectors := vector.T{4, 3}.AsSet()
quantizedSet := quantizer.Quantize(ctx, &vectors)
root := NewPartition(quantizer, quantizedSet, []ChildKey{childKey2}, Level(2))
require.NoError(t, txn.SetRootPartition(ctx, root))
readRoot, err := txn.GetPartition(ctx, RootKey)
require.NoError(t, err)
testingAssertPartitionsEqual(t, root, readRoot)

vectors = vector.T{4, 3}.AsSet()
vectors.Add(vector.T{2, 1})
quantizedSet = quantizer.Quantize(ctx, &vectors)
root = NewPartition(quantizer, quantizedSet, []ChildKey{childKey10, childKey20}, Level(2))
require.NoError(t, txn.SetRootPartition(ctx, root))
readRoot, err = txn.GetPartition(ctx, RootKey)
require.NoError(t, err)
testingAssertPartitionsEqual(t, root, readRoot)

vectors = vector.T{4, 3}.AsSet()
vectors.Add(vector.T{2, 1})
vectors.Add(vector.T{5, 6})
quantizedSet = quantizer.Quantize(ctx, &vectors)
root = NewPartition(quantizer, quantizedSet, []ChildKey{primaryKey200, primaryKey300, primaryKey400}, LeafLevel)
require.NoError(t, txn.SetRootPartition(ctx, root))
readRoot, err = txn.GetPartition(ctx, RootKey)
require.NoError(t, err)
testingAssertPartitionsEqual(t, root, readRoot)
})

t.Run("insert a partition and then delete it", func(t *testing.T) {
txn := beginTransaction(ctx, t, store)
defer commitTransaction(ctx, t, store, txn)

vectors := vector.T{4, 3}.AsSet()
quantizedSet := quantizer.Quantize(ctx, &vectors)
testPartition := NewPartition(quantizer, quantizedSet, []ChildKey{childKey2}, Level(2))
partitionKey, err := txn.InsertPartition(ctx, testPartition)
require.NoError(t, err)
newPartition, err := txn.GetPartition(ctx, partitionKey)
require.NoError(t, err)
testingAssertPartitionsEqual(t, testPartition, newPartition)

err = txn.DeletePartition(ctx, partitionKey)
require.NoError(t, err)
_, err = txn.GetPartition(ctx, partitionKey)
require.Error(t, err)
})
}
Loading

0 comments on commit 1c10b1f

Please sign in to comment.