Skip to content

Commit

Permalink
Merge remote branch '7.6-couchbase' into master (#1997)
Browse files Browse the repository at this point in the history
Co-authored-by: Rahul Rampure <rahul.rampure@couchbase.com>
Co-authored-by: Aditi Ahuja <aditi.ahuja@couchbase.com>
Co-authored-by: Likith B <likith.b@couchbase.com>
Co-authored-by: Mohd Shaad Khan <65341373+moshaad7@users.noreply.github.com>
Co-authored-by: Thejas-bhat <thejas.orkombu@couchbase.com>
  • Loading branch information
6 people authored Mar 18, 2024
1 parent 5f1f45a commit d1a10ee
Show file tree
Hide file tree
Showing 61 changed files with 4,487 additions and 564 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ A modern text indexing library in go
* Conjunction, Disjunction, Boolean (must/should/must_not)
* Term Range, Numeric Range, Date Range
* [Geo Spatial](https://github.com/blevesearch/bleve/blob/master/geo/README.md)
* Simple [query string syntax](http://www.blevesearch.com/docs/Query-String-Query/) for human entry
* Simple [query string syntax](http://www.blevesearch.com/docs/Query-String-Query/)
* [Vector Search](https://github.com/blevesearch/bleve/blob/master/vectors.md)
* [tf-idf](https://en.wikipedia.org/wiki/Tf-idf) Scoring
* Query time boosting
* Search result match highlighting with document fragments
Expand Down
37 changes: 22 additions & 15 deletions document/field_vector.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,13 @@ func init() {
const DefaultVectorIndexingOptions = index.IndexField

type VectorField struct {
name string
dims int // Dimensionality of the vector
similarity string // Similarity metric to use for scoring
options index.FieldIndexingOptions
value []float32
numPlainTextBytes uint64
name string
dims int // Dimensionality of the vector
similarity string // Similarity metric to use for scoring
options index.FieldIndexingOptions
value []float32
numPlainTextBytes uint64
vectorIndexOptimizedFor string // Optimization applied to this index.
}

func (n *VectorField) Size() int {
Expand Down Expand Up @@ -95,25 +96,27 @@ func (n *VectorField) GoString() string {
// For the sake of not polluting the API, we are keeping arrayPositions as a
// parameter, but it is not used.
func NewVectorField(name string, arrayPositions []uint64,
vector []float32, dims int, similarity string) *VectorField {
vector []float32, dims int, similarity, vectorIndexOptimizedFor string) *VectorField {
return NewVectorFieldWithIndexingOptions(name, arrayPositions,
vector, dims, similarity, DefaultVectorIndexingOptions)
vector, dims, similarity, vectorIndexOptimizedFor,
DefaultVectorIndexingOptions)
}

// For the sake of not polluting the API, we are keeping arrayPositions as a
// parameter, but it is not used.
func NewVectorFieldWithIndexingOptions(name string, arrayPositions []uint64,
vector []float32, dims int, similarity string,
vector []float32, dims int, similarity, vectorIndexOptimizedFor string,
options index.FieldIndexingOptions) *VectorField {
options = options | DefaultVectorIndexingOptions

return &VectorField{
name: name,
dims: dims,
similarity: similarity,
options: options,
value: vector,
numPlainTextBytes: numBytesFloat32s(vector),
name: name,
dims: dims,
similarity: similarity,
options: options,
value: vector,
numPlainTextBytes: numBytesFloat32s(vector),
vectorIndexOptimizedFor: vectorIndexOptimizedFor,
}
}

Expand All @@ -136,3 +139,7 @@ func (n *VectorField) Dims() int {
func (n *VectorField) Similarity() string {
return n.similarity
}

func (n *VectorField) IndexOptimizedFor() string {
return n.vectorIndexOptimizedFor
}
22 changes: 12 additions & 10 deletions error.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ const (
ErrorUnknownIndexType
ErrorEmptyID
ErrorIndexReadInconsistency
ErrorTwoPhaseSearchInconsistency
)

// Error represents a more strongly typed bleve error for detecting
Expand All @@ -37,14 +38,15 @@ func (e Error) Error() string {
}

var errorMessages = map[Error]string{
ErrorIndexPathExists: "cannot create new index, path already exists",
ErrorIndexPathDoesNotExist: "cannot open index, path does not exist",
ErrorIndexMetaMissing: "cannot open index, metadata missing",
ErrorIndexMetaCorrupt: "cannot open index, metadata corrupt",
ErrorIndexClosed: "index is closed",
ErrorAliasMulti: "cannot perform single index operation on multiple index alias",
ErrorAliasEmpty: "cannot perform operation on empty alias",
ErrorUnknownIndexType: "unknown index type",
ErrorEmptyID: "document ID cannot be empty",
ErrorIndexReadInconsistency: "index read inconsistency detected",
ErrorIndexPathExists: "cannot create new index, path already exists",
ErrorIndexPathDoesNotExist: "cannot open index, path does not exist",
ErrorIndexMetaMissing: "cannot open index, metadata missing",
ErrorIndexMetaCorrupt: "cannot open index, metadata corrupt",
ErrorIndexClosed: "index is closed",
ErrorAliasMulti: "cannot perform single index operation on multiple index alias",
ErrorAliasEmpty: "cannot perform operation on empty alias",
ErrorUnknownIndexType: "unknown index type",
ErrorEmptyID: "document ID cannot be empty",
ErrorIndexReadInconsistency: "index read inconsistency detected",
ErrorTwoPhaseSearchInconsistency: "2-phase search failed, likely due to an overlapping topology change",
}
12 changes: 7 additions & 5 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,17 +1,17 @@
module github.com/blevesearch/bleve/v2

go 1.19
go 1.20

require (
github.com/RoaringBitmap/roaring v1.2.3
github.com/bits-and-blooms/bitset v1.2.0
github.com/blevesearch/bleve_index_api v1.0.6
github.com/blevesearch/geo v0.1.18
github.com/blevesearch/bleve_index_api v1.1.6
github.com/blevesearch/geo v0.1.20
github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475
github.com/blevesearch/go-porterstemmer v1.0.3
github.com/blevesearch/goleveldb v1.0.1
github.com/blevesearch/gtreap v0.1.1
github.com/blevesearch/scorch_segment_api/v2 v2.1.6
github.com/blevesearch/scorch_segment_api/v2 v2.2.9
github.com/blevesearch/segment v0.9.1
github.com/blevesearch/snowball v0.6.1
github.com/blevesearch/snowballstem v0.9.0
Expand All @@ -23,6 +23,7 @@ require (
github.com/blevesearch/zapx/v13 v13.3.10
github.com/blevesearch/zapx/v14 v14.3.10
github.com/blevesearch/zapx/v15 v15.3.13
github.com/blevesearch/zapx/v16 v16.0.12
github.com/couchbase/moss v0.2.0
github.com/golang/protobuf v1.3.2
github.com/spf13/cobra v1.7.0
Expand All @@ -31,6 +32,7 @@ require (
)

require (
github.com/blevesearch/go-faiss v1.0.13 // indirect
github.com/blevesearch/mmap-go v1.0.4 // indirect
github.com/couchbase/ghistogram v0.1.0 // indirect
github.com/golang/geo v0.0.0-20210211234256-740aa86cb551 // indirect
Expand All @@ -39,5 +41,5 @@ require (
github.com/json-iterator/go v0.0.0-20171115153421-f7279a603ede // indirect
github.com/mschoch/smat v0.2.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
golang.org/x/sys v0.5.0 // indirect
golang.org/x/sys v0.13.0 // indirect
)
20 changes: 12 additions & 8 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,12 @@ github.com/RoaringBitmap/roaring v1.2.3 h1:yqreLINqIrX22ErkKI0vY47/ivtJr6n+kMhVO
github.com/RoaringBitmap/roaring v1.2.3/go.mod h1:plvDsJQpxOC5bw8LRteu/MLWHsHez/3y6cubLI4/1yE=
github.com/bits-and-blooms/bitset v1.2.0 h1:Kn4yilvwNtMACtf1eYDlG8H77R07mZSPbMjLyS07ChA=
github.com/bits-and-blooms/bitset v1.2.0/go.mod h1:gIdJ4wp64HaoK2YrL1Q5/N7Y16edYb8uY+O0FJTyyDA=
github.com/blevesearch/bleve_index_api v1.0.6 h1:gyUUxdsrvmW3jVhhYdCVL6h9dCjNT/geNU7PxGn37p8=
github.com/blevesearch/bleve_index_api v1.0.6/go.mod h1:YXMDwaXFFXwncRS8UobWs7nvo0DmusriM1nztTlj1ms=
github.com/blevesearch/geo v0.1.18 h1:Np8jycHTZ5scFe7VEPLrDoHnnb9C4j636ue/CGrhtDw=
github.com/blevesearch/geo v0.1.18/go.mod h1:uRMGWG0HJYfWfFJpK3zTdnnr1K+ksZTuWKhXeSokfnM=
github.com/blevesearch/bleve_index_api v1.1.6 h1:orkqDFCBuNU2oHW9hN2YEJmet+TE9orml3FCGbl1cKk=
github.com/blevesearch/bleve_index_api v1.1.6/go.mod h1:PbcwjIcRmjhGbkS/lJCpfgVSMROV6TRubGGAODaK1W8=
github.com/blevesearch/geo v0.1.20 h1:paaSpu2Ewh/tn5DKn/FB5SzvH0EWupxHEIwbCk/QPqM=
github.com/blevesearch/geo v0.1.20/go.mod h1:DVG2QjwHNMFmjo+ZgzrIq2sfCh6rIHzy9d9d0B59I6w=
github.com/blevesearch/go-faiss v1.0.13 h1:zfFs7ZYD0NqXVSY37j0JZjZT1BhE9AE4peJfcx/NB4A=
github.com/blevesearch/go-faiss v1.0.13/go.mod h1:jrxHrbl42X/RnDPI+wBoZU8joxxuRwedrxqswQ3xfU8=
github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475 h1:kDy+zgJFJJoJYBvdfBSiZYBbdsUL0XcjHYWezpQBGPA=
github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475/go.mod h1:9eJDeqxJ3E7WnLebQUlPD7ZjSce7AnDb9vjGmMCbD0A=
github.com/blevesearch/go-porterstemmer v1.0.3 h1:GtmsqID0aZdCSNiY8SkuPJ12pD4jI+DdXTAn4YRcHCo=
Expand All @@ -17,8 +19,8 @@ github.com/blevesearch/gtreap v0.1.1/go.mod h1:QaQyDRAT51sotthUWAH4Sj08awFSSWzgY
github.com/blevesearch/mmap-go v1.0.2/go.mod h1:ol2qBqYaOUsGdm7aRMRrYGgPvnwLe6Y+7LMvAB5IbSA=
github.com/blevesearch/mmap-go v1.0.4 h1:OVhDhT5B/M1HNPpYPBKIEJaD0F3Si+CrEKULGCDPWmc=
github.com/blevesearch/mmap-go v1.0.4/go.mod h1:EWmEAOmdAS9z/pi/+Toxu99DnsbhG1TIxUoRmJw/pSs=
github.com/blevesearch/scorch_segment_api/v2 v2.1.6 h1:CdekX/Ob6YCYmeHzD72cKpwzBjvkOGegHOqhAkXp6yA=
github.com/blevesearch/scorch_segment_api/v2 v2.1.6/go.mod h1:nQQYlp51XvoSVxcciBjtvuHPIVjlWrN1hX4qwK2cqdc=
github.com/blevesearch/scorch_segment_api/v2 v2.2.9 h1:3nBaSBRFokjE4FtPW3eUDgcAu3KphBg1GP07zy/6Uyk=
github.com/blevesearch/scorch_segment_api/v2 v2.2.9/go.mod h1:ckbeb7knyOOvAdZinn/ASbB7EA3HoagnJkmEV3J7+sg=
github.com/blevesearch/segment v0.9.1 h1:+dThDy+Lvgj5JMxhmOVlgFfkUtZV2kw49xax4+jTfSU=
github.com/blevesearch/segment v0.9.1/go.mod h1:zN21iLm7+GnBHWTao9I+Au/7MBiL8pPFtJBJTsk6kQw=
github.com/blevesearch/snowball v0.6.1 h1:cDYjn/NCH+wwt2UdehaLpr2e4BwLIjN4V/TdLsL+B5A=
Expand All @@ -41,6 +43,8 @@ github.com/blevesearch/zapx/v14 v14.3.10 h1:SG6xlsL+W6YjhX5N3aEiL/2tcWh3DO75Bnz7
github.com/blevesearch/zapx/v14 v14.3.10/go.mod h1:qqyuR0u230jN1yMmE4FIAuCxmahRQEOehF78m6oTgns=
github.com/blevesearch/zapx/v15 v15.3.13 h1:6EkfaZiPlAxqXz0neniq35my6S48QI94W/wyhnpDHHQ=
github.com/blevesearch/zapx/v15 v15.3.13/go.mod h1:Turk/TNRKj9es7ZpKK95PS7f6D44Y7fAFy8F4LXQtGg=
github.com/blevesearch/zapx/v16 v16.0.12 h1:Uccxvjmn+hQ6ywQP+wIiTpdq9LnAviGoryJOmGwAo/I=
github.com/blevesearch/zapx/v16 v16.0.12/go.mod h1:MYnOshRfSm4C4drxx1LGRI+MVFByykJ2anDY1fxdk9Q=
github.com/couchbase/ghistogram v0.1.0 h1:b95QcQTCzjTUocDXp/uMgSNQi8oj1tGwnJ4bODWZnps=
github.com/couchbase/ghistogram v0.1.0/go.mod h1:s1Jhy76zqfEecpNWJfWUiKZookAFaiGOEoyzgHt9i7k=
github.com/couchbase/moss v0.2.0 h1:VCYrMzFwEryyhRSeI+/b3tRBSeTpi/8gn5Kf6dxqn+o=
Expand Down Expand Up @@ -89,8 +93,8 @@ golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJ
golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181221143128-b4a75ba826a6/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0 h1:MUK/U/4lj1t1oPg0HfuXDN/Z1wv31ZJ/YcPiGccS4DU=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.13.0 h1:Af8nKPmuFypiUBjVoU9V20FiaFXOcuZI21p0ycVYYGE=
golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.8.0 h1:57P1ETyNKtuIjB4SRd15iJxuhj8Gc416Y78H3qgMh68=
Expand Down
23 changes: 21 additions & 2 deletions index/scorch/introducer.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ type segmentIntroduction struct {
obsoletes map[uint64]*roaring.Bitmap
ids []string
internal map[string][]byte
stats *fieldStats

applied chan error
persisted chan error
Expand Down Expand Up @@ -146,15 +147,21 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
newss := &SegmentSnapshot{
id: root.segment[i].id,
segment: root.segment[i].segment,
stats: root.segment[i].stats,
cachedDocs: root.segment[i].cachedDocs,
cachedMeta: root.segment[i].cachedMeta,
creator: root.segment[i].creator,
}

// apply new obsoletions
if root.segment[i].deleted == nil {
newss.deleted = delta
} else {
newss.deleted = roaring.Or(root.segment[i].deleted, delta)
if delta.IsEmpty() {
newss.deleted = root.segment[i].deleted
} else {
newss.deleted = roaring.Or(root.segment[i].deleted, delta)
}
}
if newss.deleted.IsEmpty() {
newss.deleted = nil
Expand Down Expand Up @@ -188,7 +195,9 @@ func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
newSegmentSnapshot := &SegmentSnapshot{
id: next.id,
segment: next.data, // take ownership of next.data's ref-count
stats: next.stats,
cachedDocs: &cachedDocs{cache: nil},
cachedMeta: &cachedMeta{meta: nil},
creator: "introduceSegment",
}
newSnapshot.segment = append(newSnapshot.segment, newSegmentSnapshot)
Expand Down Expand Up @@ -275,7 +284,9 @@ func (s *Scorch) introducePersist(persist *persistIntroduction) {
id: segmentSnapshot.id,
segment: replacement,
deleted: segmentSnapshot.deleted,
stats: segmentSnapshot.stats,
cachedDocs: segmentSnapshot.cachedDocs,
cachedMeta: segmentSnapshot.cachedMeta,
creator: "introducePersist",
mmaped: 1,
}
Expand Down Expand Up @@ -374,7 +385,9 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
id: root.segment[i].id,
segment: root.segment[i].segment,
deleted: root.segment[i].deleted,
stats: root.segment[i].stats,
cachedDocs: root.segment[i].cachedDocs,
cachedMeta: root.segment[i].cachedMeta,
creator: root.segment[i].creator,
})
root.segment[i].segment.AddRef()
Expand All @@ -394,7 +407,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
}
}
}

// before the newMerge introduction, need to clean the newly
// merged segment wrt the current root segments, hence
// applying the obsolete segment contents to newly merged segment
Expand All @@ -415,12 +427,19 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
if nextMerge.new != nil &&
nextMerge.new.Count() > newSegmentDeleted.GetCardinality() {

stats := newFieldStats()
if fsr, ok := nextMerge.new.(segment.FieldStatsReporter); ok {
fsr.UpdateFieldStats(stats)
}

// put new segment at end
newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
id: nextMerge.id,
segment: nextMerge.new, // take ownership for nextMerge.new's ref-count
deleted: newSegmentDeleted,
stats: stats,
cachedDocs: &cachedDocs{cache: nil},
cachedMeta: &cachedMeta{meta: nil},
creator: "introduceMerge",
mmaped: nextMerge.mmaped,
})
Expand Down
8 changes: 4 additions & 4 deletions index/scorch/merge.go
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ func (s *Scorch) planMergeAtSnapshot(ctx context.Context,

atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegments, uint64(len(task.Segments)))

oldMap := make(map[uint64]*SegmentSnapshot)
oldMap := make(map[uint64]*SegmentSnapshot, len(task.Segments))
newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
segmentsToMerge := make([]segment.Segment, 0, len(task.Segments))
docsToDrop := make([]*roaring.Bitmap, 0, len(task.Segments))
Expand Down Expand Up @@ -357,7 +357,7 @@ func (s *Scorch) planMergeAtSnapshot(ctx context.Context,
totalBytesRead := seg.BytesRead() + prevBytesReadTotal
seg.ResetBytesRead(totalBytesRead)

oldNewDocNums = make(map[uint64][]uint64)
oldNewDocNums = make(map[uint64][]uint64, len(newDocNums))
for i, segNewDocNums := range newDocNums {
oldNewDocNums[task.Segments[i].Id()] = segNewDocNums
}
Expand Down Expand Up @@ -485,8 +485,8 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,

sm := &segmentMerge{
id: newSegmentID,
old: make(map[uint64]*SegmentSnapshot),
oldNewDocNums: make(map[uint64][]uint64),
old: make(map[uint64]*SegmentSnapshot, len(sbsIndexes)),
oldNewDocNums: make(map[uint64][]uint64, len(sbsIndexes)),
new: seg,
notifyCh: make(chan *mergeTaskIntroStatus),
}
Expand Down
3 changes: 2 additions & 1 deletion index/scorch/optimize.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,11 @@ package scorch

import (
"fmt"
"sync/atomic"

"github.com/RoaringBitmap/roaring"
index "github.com/blevesearch/bleve_index_api"
segment "github.com/blevesearch/scorch_segment_api/v2"
"sync/atomic"
)

var OptimizeConjunction = true
Expand Down
Loading

0 comments on commit d1a10ee

Please sign in to comment.