Skip to content

Commit

Permalink
enhance: Allow explicit dynamic JSON column as insert data (#807)
Browse files Browse the repository at this point in the history
Related to #806

Previously all columns not appeared in schema definition are treated as
dynamic columns. This PR allows specifying one JSON column to be the THE
dynamic column data.

---------

Signed-off-by: Congqi Xia <congqi.xia@zilliz.com>
  • Loading branch information
congqixia authored Aug 19, 2024
1 parent 4eff482 commit 542b5a0
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 15 deletions.
27 changes: 27 additions & 0 deletions client/insert.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,8 @@ func (c *GrpcClient) processInsertColumns(colSchema *entity.Schema, columns ...e
}
mNameColumn := make(map[string]entity.Column)
var dynamicColumns []entity.Column
hasExplictDynamicColumn := false
var expDynColumn entity.Column
for _, column := range columns {
_, dup := mNameColumn[column.Name()]
if dup {
Expand All @@ -100,6 +102,25 @@ func (c *GrpcClient) processInsertColumns(colSchema *entity.Schema, columns ...e
return nil, 0, errors.New("column size not match")
}
}

// check if explicitly passing dynamic json column
if column.Type() == entity.FieldTypeJSON {
jsonColumn, ok := column.(*entity.ColumnJSONBytes)
if ok && jsonColumn.IsDynamic() {
// schema not match
if !isDynamic {
return nil, 0, fmt.Errorf("collection %s is not dynamic but insert data contains explicit dynamic json field %s", colSchema.CollectionName, column.Name())
}
// multiple dynamic column
if hasExplictDynamicColumn {
return nil, 0, fmt.Errorf("multiple explicit dynamic json column found")
}
hasExplictDynamicColumn = true
expDynColumn = column
continue
}
}

field, has := mNameField[column.Name()]
if !has {
if !isDynamic {
Expand Down Expand Up @@ -142,13 +163,19 @@ func (c *GrpcClient) processInsertColumns(colSchema *entity.Schema, columns ...e
fieldsData = append(fieldsData, fixedColumn.FieldData())
}
if len(dynamicColumns) > 0 {
if hasExplictDynamicColumn {
return nil, 0, fmt.Errorf("dynamic fields & explicit dynamic column cannot be inserted at same time")
}
// use empty column name here
col, err := c.mergeDynamicColumns("", rowSize, dynamicColumns)
if err != nil {
return nil, 0, err
}
fieldsData = append(fieldsData, col)
}
if hasExplictDynamicColumn {
fieldsData = append(fieldsData, expDynColumn.FieldData())
}

return fieldsData, rowSize, nil
}
Expand Down
4 changes: 4 additions & 0 deletions entity/columns_json.go
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,10 @@ func (c *ColumnJSONBytes) WithIsDynamic(isDynamic bool) *ColumnJSONBytes {
return c
}

func (c *ColumnJSONBytes) IsDynamic() bool {
return c.isDynamic
}

// NewColumnJSONBytes composes a Column with json bytes.
func NewColumnJSONBytes(name string, values [][]byte) *ColumnJSONBytes {
return &ColumnJSONBytes{
Expand Down
9 changes: 0 additions & 9 deletions test/testcases/insert_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -621,15 +621,6 @@ func TestInsertSparseInvalidVector(t *testing.T) {
data1 := append(data, entity.NewColumnSparseVectors(common.DefaultSparseVecFieldName, []entity.SparseEmbedding{sparseVec}))
_, err = mc.Insert(ctx, collName, "", data1...)
common.CheckErr(t, err, false, "invalid index in sparse float vector: must be less than 2^32-1")

// invalid sparse vector: empty position and values
positions = []uint32{}
values = []float32{}
sparseVec, err = entity.NewSliceSparseEmbedding(positions, values)
common.CheckErr(t, err, true)
data2 := append(data, entity.NewColumnSparseVectors(common.DefaultSparseVecFieldName, []entity.SparseEmbedding{sparseVec}))
_, err = mc.Insert(ctx, collName, "", data2...)
common.CheckErr(t, err, false, "empty sparse float vector row")
}

func TestInsertSparseVectorSamePosition(t *testing.T) {
Expand Down
6 changes: 0 additions & 6 deletions test/testcases/search_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1827,12 +1827,6 @@ func TestSearchInvalidSparseVector(t *testing.T) {
entity.IP, common.DefaultTopK, sp)
common.CheckErr(t, errSearch, false, "nq (number of search vector per search request) should be in range [1, 16384]")

vector1, err := entity.NewSliceSparseEmbedding([]uint32{}, []float32{})
common.CheckErr(t, err, true)
_, errSearch = mc.Search(ctx, collName, []string{}, "", []string{"*"}, []entity.Vector{vector1}, common.DefaultSparseVecFieldName,
entity.IP, common.DefaultTopK, sp)
common.CheckErr(t, errSearch, false, "Sparse row data should not be empty")

positions := make([]uint32, 100)
values := make([]float32, 100)
for i := 0; i < 100; i++ {
Expand Down

0 comments on commit 542b5a0

Please sign in to comment.