Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add composite indexes #2226

Merged
merged 31 commits into from
Jan 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
14e53ec
unique index
islamaliev Dec 15, 2023
f28b785
djust indexKeyBuilder to work with composite indexes
islamaliev Dec 18, 2023
4fbeebe
Remove assertion on DocFetched
islamaliev Dec 19, 2023
a44954c
Make base index iterate all fields
islamaliev Dec 20, 2023
18d6eb5
Implement CRUD for composite indexes
islamaliev Dec 20, 2023
0c5794b
Fix after rebase
islamaliev Dec 20, 2023
8d23b1c
Make value matcher index-key-unaware
islamaliev Dec 21, 2023
e06e231
Make SplitFilter work with multiple fields
islamaliev Dec 22, 2023
c3c6896
Swap expected and actual
islamaliev Jan 3, 2024
640e2b9
Pass around index instead of indexed field
islamaliev Jan 3, 2024
d701adf
Make index fetcher use all fields of the index
islamaliev Jan 3, 2024
1c6427b
Pass index and all filter conditions to index iter factory
islamaliev Jan 3, 2024
c40fb39
Fix edge-case in filtering algorithm
islamaliev Jan 4, 2024
8bf5658
Extract matcher creation into another function
islamaliev Jan 5, 2024
b87b9ae
Add multiple matchers
islamaliev Jan 5, 2024
990aa8e
Create multiple value matchers
islamaliev Jan 8, 2024
86a6ee7
Fix edge-case with filter normalization
islamaliev Jan 8, 2024
cb924ef
Execute matching for remaining fields of index
islamaliev Jan 8, 2024
03f3631
Add tests for composite index
islamaliev Jan 8, 2024
8fbd099
Fix after rebase
islamaliev Jan 15, 2024
ad7997a
Query compose index on nil values
islamaliev Jan 15, 2024
770a191
filter on composite index without middle value
islamaliev Jan 15, 2024
b220e6b
Fix lint
islamaliev Jan 15, 2024
e30e51b
Add copyright header
islamaliev Jan 15, 2024
338dd84
Add unique composite index
islamaliev Jan 17, 2024
2059513
Rename method, add comment
islamaliev Jan 19, 2024
c053e7d
Merge 2 errors into 1
islamaliev Jan 19, 2024
2022a93
Add a comment
islamaliev Jan 19, 2024
aaa0bc6
PR fixup
islamaliev Jan 19, 2024
28ec9ad
Write schema string directly in test action
islamaliev Jan 19, 2024
fbb69d2
Fix after rebase
islamaliev Jan 22, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions client/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,3 +58,15 @@ func (d CollectionDescription) CollectIndexedFields(schema *SchemaDescription) [
}
return fields
}

// GetIndexesOnField returns all indexes that are indexing the given field.
// If the field is not the first field of a composite index, the index is not returned.
func (d CollectionDescription) GetIndexesOnField(fieldName string) []IndexDescription {
result := []IndexDescription{}
for _, index := range d.Indexes {
if index.Fields[0].Name == fieldName {
result = append(result, index)
}
}
return result
}
129 changes: 129 additions & 0 deletions client/index_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
// Copyright 2024 Democratized Data Foundation
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package client

import (
"testing"

"github.com/stretchr/testify/assert"
)

func TestCollectIndexesOnField(t *testing.T) {
tests := []struct {
name string
desc CollectionDescription
field string
expected []IndexDescription
}{
{
name: "no indexes",
desc: CollectionDescription{
Indexes: []IndexDescription{},
},
field: "test",
expected: []IndexDescription{},
},
{
name: "single index on field",
desc: CollectionDescription{
Indexes: []IndexDescription{
{
Name: "index1",
Fields: []IndexedFieldDescription{
{Name: "test", Direction: Ascending},
},
},
},
},
field: "test",
expected: []IndexDescription{
{
Name: "index1",
Fields: []IndexedFieldDescription{
{Name: "test", Direction: Ascending},
},
},
},
},
{
name: "multiple indexes on field",
desc: CollectionDescription{
Indexes: []IndexDescription{
{
Name: "index1",
Fields: []IndexedFieldDescription{
{Name: "test", Direction: Ascending},
},
},
{
Name: "index2",
Fields: []IndexedFieldDescription{
{Name: "test", Direction: Descending},
},
},
},
},
field: "test",
expected: []IndexDescription{
{
Name: "index1",
Fields: []IndexedFieldDescription{
{Name: "test", Direction: Ascending},
},
},
{
Name: "index2",
Fields: []IndexedFieldDescription{
{Name: "test", Direction: Descending},
},
},
},
},
{
name: "no indexes on field",
desc: CollectionDescription{
Indexes: []IndexDescription{
{
Name: "index1",
Fields: []IndexedFieldDescription{
{Name: "other", Direction: Ascending},
},
},
},
},
field: "test",
expected: []IndexDescription{},
},
{
name: "second field in composite index",
desc: CollectionDescription{
Indexes: []IndexDescription{
{
Name: "index1",
Fields: []IndexedFieldDescription{
{Name: "other", Direction: Ascending},
{Name: "test", Direction: Ascending},
},
},
},
},
field: "test",
expected: []IndexDescription{},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
actual := tt.desc.GetIndexesOnField(tt.field)
assert.Equal(t, tt.expected, actual)
})
}
}
3 changes: 2 additions & 1 deletion db/collection_index.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,8 @@ func (c *collection) createIndex(
c.indexes = append(c.indexes, colIndex)
err = c.indexExistingDocs(ctx, txn, colIndex)
if err != nil {
return nil, err
removeErr := colIndex.RemoveAll(ctx, txn)
return nil, errors.Join(err, removeErr)
}
return colIndex, nil
}
Expand Down
27 changes: 8 additions & 19 deletions db/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ const (
errInvalidFieldValue string = "invalid field value"
errUnsupportedIndexFieldType string = "unsupported index field type"
errIndexDescriptionHasNoFields string = "index description has no fields"
errIndexDescHasNonExistingField string = "index description has non existing field"
errFieldOrAliasToFieldNotExist string = "The given field or alias to field does not exist"
errCreateFile string = "failed to create file"
errRemoveFile string = "failed to remove file"
Expand All @@ -86,7 +85,7 @@ const (
errExpectedJSONArray string = "expected JSON array"
errOneOneAlreadyLinked string = "target document is already linked to another document"
errIndexDoesNotMatchName string = "the index used does not match the given name"
errCanNotIndexNonUniqueField string = "can not index a doc's field that violates unique index"
errCanNotIndexNonUniqueFields string = "can not index a doc's field(s) that violates unique index"
errInvalidViewQuery string = "the query provided is not valid as a View"
)

Expand All @@ -108,6 +107,7 @@ var (
ErrExpectedJSONObject = errors.New(errExpectedJSONObject)
ErrExpectedJSONArray = errors.New(errExpectedJSONArray)
ErrInvalidViewQuery = errors.New(errInvalidViewQuery)
ErrCanNotIndexNonUniqueFields = errors.New(errCanNotIndexNonUniqueFields)
)

// NewErrFailedToGetHeads returns a new error indicating that the heads of a document
Expand Down Expand Up @@ -468,16 +468,6 @@ func NewErrIndexDescHasNoFields(desc client.IndexDescription) error {
)
}

// NewErrIndexDescHasNonExistingField returns a new error indicating that the given index
// description points to a field that does not exist.
func NewErrIndexDescHasNonExistingField(desc client.IndexDescription, fieldName string) error {
return errors.New(
errIndexDescHasNonExistingField,
errors.NewKV("Description", desc),
errors.NewKV("Field name", fieldName),
)
}

// NewErrCreateFile returns a new error indicating there was a failure in creating a file.
func NewErrCreateFile(inner error, filepath string) error {
return errors.Wrap(errCreateFile, inner, errors.NewKV("Filepath", filepath))
Expand Down Expand Up @@ -566,13 +556,12 @@ func NewErrIndexDoesNotMatchName(index, name string) error {
)
}

func NewErrCanNotIndexNonUniqueField(docID, fieldName string, value any) error {
return errors.New(
errCanNotIndexNonUniqueField,
errors.NewKV("DocID", docID),
errors.NewKV("Field name", fieldName),
errors.NewKV("Field value", value),
)
func NewErrCanNotIndexNonUniqueFields(docID string, fieldValues ...errors.KV) error {
kvPairs := make([]errors.KV, 0, len(fieldValues)+1)
kvPairs = append(kvPairs, errors.NewKV("DocID", docID))
kvPairs = append(kvPairs, fieldValues...)

return errors.New(errCanNotIndexNonUniqueFields, kvPairs...)
}

func NewErrInvalidViewQueryCastFailed(query string) error {
Expand Down
4 changes: 4 additions & 0 deletions db/fetcher/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ const (
errVFetcherFailedToGetDagLink string = "(version fetcher) failed to get node link from DAG"
errFailedToGetDagNode string = "failed to get DAG Node"
errMissingMapper string = "missing document mapper"
errInvalidInOperatorValue string = "invalid _in/_nin value"
errInvalidIndexFilterCondition string = "invalid index filter condition"
)

var (
Expand All @@ -41,6 +43,8 @@ var (
ErrFailedToGetDagNode = errors.New(errFailedToGetDagNode)
ErrMissingMapper = errors.New(errMissingMapper)
ErrSingleSpanOnly = errors.New("spans must contain only a single entry")
ErrInvalidInOperatorValue = errors.New(errInvalidInOperatorValue)
ErrInvalidIndexFilterCondition = errors.New(errInvalidIndexFilterCondition)
)

// NewErrFieldIdNotFound returns an error indicating that the given FieldId was not found.
Expand Down
72 changes: 39 additions & 33 deletions db/fetcher/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,33 +23,32 @@ import (
// IndexFetcher is a fetcher that fetches documents by index.
// It fetches only the indexed field and the rest of the fields are fetched by the internal fetcher.
type IndexFetcher struct {
docFetcher Fetcher
col client.Collection
txn datastore.Txn
indexFilter *mapper.Filter
docFilter *mapper.Filter
doc *encodedDocument
mapping *core.DocumentMapping
indexedField client.FieldDescription
docFields []client.FieldDescription
indexDesc client.IndexDescription
indexIter indexIterator
indexDataStoreKey core.IndexDataStoreKey
execInfo ExecInfo
docFetcher Fetcher
col client.Collection
txn datastore.Txn
indexFilter *mapper.Filter
docFilter *mapper.Filter
doc *encodedDocument
mapping *core.DocumentMapping
indexedFields []client.FieldDescription
docFields []client.FieldDescription
indexDesc client.IndexDescription
indexIter indexIterator
execInfo ExecInfo
}

var _ Fetcher = (*IndexFetcher)(nil)

// NewIndexFetcher creates a new IndexFetcher.
func NewIndexFetcher(
docFetcher Fetcher,
indexedFieldDesc client.FieldDescription,
indexDesc client.IndexDescription,
indexFilter *mapper.Filter,
) *IndexFetcher {
return &IndexFetcher{
docFetcher: docFetcher,
indexedField: indexedFieldDesc,
indexFilter: indexFilter,
docFetcher: docFetcher,
indexDesc: indexDesc,
indexFilter: indexFilter,
}
}

Expand All @@ -69,24 +68,27 @@ func (f *IndexFetcher) Init(
f.mapping = docMapper
f.txn = txn

for _, index := range col.Description().Indexes {
if index.Fields[0].Name == f.indexedField.Name {
f.indexDesc = index
f.indexDataStoreKey.IndexID = index.ID
break
for _, indexedField := range f.indexDesc.Fields {
for _, field := range f.col.Schema().Fields {
if field.Name == indexedField.Name {
f.indexedFields = append(f.indexedFields, field)
break
}
}
}

f.indexDataStoreKey.CollectionID = f.col.ID()

f.docFields = make([]client.FieldDescription, 0, len(fields))
outer:
for i := range fields {
if fields[i].Name == f.indexedField.Name {
f.docFields = append(fields[:i], fields[i+1:]...)
break
for j := range f.indexedFields {
if fields[i].Name == f.indexedFields[j].Name {
continue outer
}
}
f.docFields = append(f.docFields, fields[i])
}

iter, err := createIndexIterator(f.indexDataStoreKey, f.indexFilter, &f.execInfo, f.indexDesc.Unique)
iter, err := f.createIndexIterator()
islamaliev marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return err
}
Expand Down Expand Up @@ -123,17 +125,21 @@ func (f *IndexFetcher) FetchNext(ctx context.Context) (EncodedDocument, ExecInfo
return nil, f.execInfo, nil
}

property := &encProperty{
Desc: f.indexedField,
Raw: res.key.FieldValues[0],
for i, indexedField := range f.indexedFields {
property := &encProperty{
Desc: indexedField,
Raw: res.key.FieldValues[i],
}

f.doc.properties[indexedField] = property
}

if f.indexDesc.Unique {
f.doc.id = res.value
} else {
f.doc.id = res.key.FieldValues[1]
f.doc.id = res.key.FieldValues[len(res.key.FieldValues)-1]
}
f.doc.properties[f.indexedField] = property

f.execInfo.FieldsFetched++

if f.docFetcher != nil && len(f.docFields) > 0 {
Expand Down
Loading
Loading