Skip to content

Commit

Permalink
vecstore: implement partition manipulation functions
Browse files Browse the repository at this point in the history
Add persistent storage to vecstore. Implement the methods for partition
creation, retrieval and deletion. Add a simple test to ensure these
methods are functioning.
  • Loading branch information
mw5h committed Dec 14, 2024
1 parent 4b3782a commit d1cff40
Show file tree
Hide file tree
Showing 5 changed files with 408 additions and 0 deletions.
16 changes: 16 additions & 0 deletions pkg/sql/vecindex/vecstore/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ go_library(
"in_memory_store.go",
"in_memory_txn.go",
"partition.go",
"persistent_store.go",
"persistent_txn.go",
"search_set.go",
"store.go",
"vecstorepb.go",
Expand All @@ -41,6 +43,11 @@ go_library(
importpath = "github.com/cockroachdb/cockroach/pkg/sql/vecindex/vecstore",
visibility = ["//visibility:public"],
deps = [
"//pkg/kv",
"//pkg/kv/kvpb",
"//pkg/kv/kvserver/concurrency/isolation",
"//pkg/roachpb",
"//pkg/sql/sem/builtins",
"//pkg/sql/vecindex/internal",
"//pkg/sql/vecindex/quantize",
"//pkg/util/container/heap",
Expand All @@ -61,16 +68,25 @@ go_test(
"in_memory_store_test.go",
"main_test.go",
"partition_test.go",
"persistent_store_test.go",
"search_set_test.go",
"vecstorepb_test.go",
],
embed = [":vecstore"],
deps = [
"//pkg/base",
"//pkg/keys",
"//pkg/roachpb",
"//pkg/security/securityassets",
"//pkg/security/securitytest",
"//pkg/server",
"//pkg/sql/randgen",
"//pkg/sql/rowenc",
"//pkg/sql/sem/tree",
"//pkg/sql/types",
"//pkg/sql/vecindex/internal",
"//pkg/sql/vecindex/quantize",
"//pkg/testutils/serverutils",
"//pkg/util/leaktest",
"//pkg/util/log",
"//pkg/util/num32",
Expand Down
6 changes: 6 additions & 0 deletions pkg/sql/vecindex/vecstore/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,10 @@
package vecstore

import (
"github.com/cockroachdb/cockroach/pkg/security/securityassets"
"github.com/cockroachdb/cockroach/pkg/security/securitytest"
"github.com/cockroachdb/cockroach/pkg/server"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"os"
"testing"

Expand All @@ -15,7 +19,9 @@ import (
//go:generate ../util/leaktest/add-leaktest.sh *_test.go

func TestMain(m *testing.M) {
securityassets.SetLoader(securitytest.EmbeddedAssets)
randutil.SeedForTests()
serverutils.InitTestServerFactory(server.TestServerFactory)

os.Exit(m.Run())
}
60 changes: 60 additions & 0 deletions pkg/sql/vecindex/vecstore/persistent_store.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
// Copyright 2024 The Cockroach Authors.
//
// Use of this software is governed by the CockroachDB Software License
// included in the /LICENSE file.

package vecstore

import (
"context"
"github.com/cockroachdb/cockroach/pkg/kv"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/sql/vecindex/quantize"
)

type PersistentStore struct {
db *kv.DB // Needed for index maintenance functions
quantizer quantize.Quantizer
rootQuantizer quantize.Quantizer
prefix roachpb.Key
}

var _ Store = (*PersistentStore)(nil)

func NewPersistentStore(
db *kv.DB,
quantizer quantize.Quantizer,
prefix roachpb.Key,
) *PersistentStore {
ps := PersistentStore{
db: db,
quantizer: quantizer,
rootQuantizer: quantize.NewUnQuantizer(quantizer.GetOriginalDims()),
prefix: prefix,
}

return &ps
}

// Begin() is part of the vecstore.Store interface. Begin() creates a new KV
// transaction on behalf of the user and prepares it to operate on the persistent
// vector store.
func (s *PersistentStore) Begin(ctx context.Context) (Txn, error) {
return NewPersistentStoreTxn(s, s.db.NewTxn(ctx, "vecstore.PersistentStore begin transaction")), nil
}

// Commit() is part of the vecstore.Store interface. Commit() commits the
// underlying KV transaction wrapped by the vecstore.Txn passed in.
func (s *PersistentStore) Commit(ctx context.Context, txn Txn) error {
return txn.(*PersistentStoreTxn).kv.Commit(ctx)
}

// Abort() is part of the vecstore.Store interface. Abort() causes the underlying
// KV transaction wrapped by the passed vecstore.Txn to roll back.
func (s *PersistentStore) Abort(ctx context.Context, txn Txn) error {
return txn.(*PersistentStoreTxn).kv.Rollback(ctx)
}

func (s *PersistentStore) MergeStats(ctx context.Context, stats *IndexStats, skipMerge bool) error {
panic("MergeStats() unimplemented")
}
90 changes: 90 additions & 0 deletions pkg/sql/vecindex/vecstore/persistent_store_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
// Copyright 2024 The Cockroach Authors.
//
// Use of this software is governed by the CockroachDB Software License
// included in the /LICENSE file.

package vecstore

import (
"context"
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/sql/rowenc"
"testing"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/sql/vecindex/internal"
"github.com/cockroachdb/cockroach/pkg/sql/vecindex/quantize"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/util/vector"
"github.com/stretchr/testify/require"
)

func TestPersistentStore(t *testing.T) {
ctx := internal.WithWorkspace(context.Background(), &internal.Workspace{})
s, _, kvDB := serverutils.StartServer(t, base.TestServerArgs{DefaultTestTenant: base.TestIsForStuffThatShouldWorkWithSecondaryTenantsButDoesntYet(42)})
defer s.Stopper().Stop(ctx)

childKey2 := ChildKey{PartitionKey: 2}
childKey10 := ChildKey{PartitionKey: 10}
childKey20 := ChildKey{PartitionKey: 20}
primaryKey200 := ChildKey{PrimaryKey: PrimaryKey{2, 00}}
primaryKey300 := ChildKey{PrimaryKey: PrimaryKey{3, 00}}
primaryKey400 := ChildKey{PrimaryKey: PrimaryKey{4, 00}}

ten5Codec := keys.MakeSQLCodec(roachpb.MustMakeTenantID(5))
prefix := rowenc.MakeIndexKeyPrefix(ten5Codec, 500, 42)
quantizer := quantize.NewUnQuantizer(2)
store := NewPersistentStore(kvDB, quantizer, prefix)

t.Run("insert a root partition into the store and read it back", func(t *testing.T) {
txn := beginTransaction(ctx, t, store)
defer commitTransaction(ctx, t, store, txn)

vectors := vector.T{4, 3}.AsSet()
quantizedSet := quantizer.Quantize(ctx, &vectors)
root := NewPartition(quantizer, quantizedSet, []ChildKey{childKey2}, Level(2))
require.NoError(t, txn.SetRootPartition(ctx, root))
readRoot, err := txn.GetPartition(ctx, RootKey)
require.NoError(t, err)
testingAssertPartitionsEqual(t, root, readRoot)

vectors = vector.T{4, 3}.AsSet()
vectors.Add(vector.T{2, 1})
quantizedSet = quantizer.Quantize(ctx, &vectors)
root = NewPartition(quantizer, quantizedSet, []ChildKey{childKey10, childKey20}, Level(2))
require.NoError(t, txn.SetRootPartition(ctx, root))
readRoot, err = txn.GetPartition(ctx, RootKey)
require.NoError(t, err)
testingAssertPartitionsEqual(t, root, readRoot)

vectors = vector.T{4, 3}.AsSet()
vectors.Add(vector.T{2, 1})
vectors.Add(vector.T{5, 6})
quantizedSet = quantizer.Quantize(ctx, &vectors)
root = NewPartition(quantizer, quantizedSet, []ChildKey{primaryKey200, primaryKey300, primaryKey400}, LeafLevel)
require.NoError(t, txn.SetRootPartition(ctx, root))
readRoot, err = txn.GetPartition(ctx, RootKey)
require.NoError(t, err)
testingAssertPartitionsEqual(t, root, readRoot)
})

t.Run("insert a partition and then delete it", func(t *testing.T) {
txn := beginTransaction(ctx, t, store)
defer commitTransaction(ctx, t, store, txn)

vectors := vector.T{4, 3}.AsSet()
quantizedSet := quantizer.Quantize(ctx, &vectors)
testPartition := NewPartition(quantizer, quantizedSet, []ChildKey{childKey2}, Level(2))
partitionKey, err := txn.InsertPartition(ctx, testPartition)
require.NoError(t, err)
newPartition, err := txn.GetPartition(ctx, partitionKey)
require.NoError(t, err)
testingAssertPartitionsEqual(t, testPartition, newPartition)

err = txn.DeletePartition(ctx, partitionKey)
require.NoError(t, err)
newPartition, err = txn.GetPartition(ctx, partitionKey)
require.Error(t, err)
})
}
Loading

0 comments on commit d1cff40

Please sign in to comment.