From 22777af66f7b8865e673dda210dd65a1db7f7d8b Mon Sep 17 00:00:00 2001 From: Jason Song Date: Thu, 8 Jun 2023 14:08:20 +0800 Subject: [PATCH 01/43] fix: mark stats --- modules/indexer/stats/indexer.go | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/indexer/stats/indexer.go b/modules/indexer/stats/indexer.go index 1c01e25e29e2c..d8f355e3f4d20 100644 --- a/modules/indexer/stats/indexer.go +++ b/modules/indexer/stats/indexer.go @@ -11,6 +11,7 @@ import ( ) // Indexer defines an interface to index repository stats +// TODO: this indexer is quite different from the others, maybe it should be moved out for module/indexer type Indexer interface { Index(id int64) error Close() From 534945bff51304633e87aeccae420f18e266006c Mon Sep 17 00:00:00 2001 From: Jason Song Date: Thu, 8 Jun 2023 14:49:22 +0800 Subject: [PATCH 02/43] feat: bleve issue basic --- modules/indexer/code/bleve.go | 1 + modules/indexer/internal/bleve/indexer.go | 84 +++++++++++++++++++++++ modules/indexer/internal/indexer.go | 15 ++++ modules/indexer/issues/bleve.go | 38 ++++------ modules/indexer/issues/indexer.go | 5 +- 5 files changed, 115 insertions(+), 28 deletions(-) create mode 100644 modules/indexer/internal/bleve/indexer.go create mode 100644 modules/indexer/internal/indexer.go diff --git a/modules/indexer/code/bleve.go b/modules/indexer/code/bleve.go index 5936613e3ad8a..150a310458a60 100644 --- a/modules/indexer/code/bleve.go +++ b/modules/indexer/code/bleve.go @@ -62,6 +62,7 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error { // openBleveIndexer open the index at the specified path, checking for metadata // updates and bleve version updates. If index needs to be created (or // re-created), returns (nil, nil) +// Deprecated: func openBleveIndexer(path string, latestVersion int) (bleve.Index, error) { _, err := os.Stat(path) if err != nil && os.IsNotExist(err) { diff --git a/modules/indexer/internal/bleve/indexer.go b/modules/indexer/internal/bleve/indexer.go new file mode 100644 index 0000000000000..6d443e8e06247 --- /dev/null +++ b/modules/indexer/internal/bleve/indexer.go @@ -0,0 +1,84 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package bleve + +import ( + "os" + + "code.gitea.io/gitea/modules/indexer/internal" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/util" + + "github.com/blevesearch/bleve/v2" + "github.com/blevesearch/bleve/v2/index/upsidedown" + "github.com/ethantkoenig/rupture" +) + +var _ internal.Indexer = &Indexer{} + +// Indexer represents a bleve indexer implementation +type Indexer struct { + IndexDir string + Indexer bleve.Index + Version int +} + +// Init initializes the indexer +func (i *Indexer) Init() (bool, error) { + var err error + i.Indexer, err = openIndexer(i.IndexDir, i.Version) + if err != nil { + return false, err + } + if i.Indexer != nil { + return true, nil + } + return false, nil +} + +// Ping checks if the indexer is available +func (i *Indexer) Ping() bool { + return i.Indexer != nil +} + +func (i *Indexer) Close() { + if indexer := i.Indexer; indexer != nil { + if err := indexer.Close(); err != nil { + log.Error("Failed to close bleve indexer in %q: %v", i.IndexDir, err) + } + } +} + +// openIndexer open the index at the specified path, checking for metadata +// updates and bleve version updates. If index needs to be created (or +// re-created), returns (nil, nil) +func openIndexer(path string, latestVersion int) (bleve.Index, error) { + _, err := os.Stat(path) + if err != nil && os.IsNotExist(err) { + return nil, nil + } else if err != nil { + return nil, err + } + + metadata, err := rupture.ReadIndexMetadata(path) + if err != nil { + return nil, err + } + if metadata.Version < latestVersion { + // the indexer is using a previous version, so we should delete it and + // re-populate + return nil, util.RemoveAll(path) + } + + index, err := bleve.Open(path) + if err != nil && err == upsidedown.IncompatibleVersion { + // the indexer was built with a previous version of bleve, so we should + // delete it and re-populate + return nil, util.RemoveAll(path) + } else if err != nil { + return nil, err + } + + return index, nil +} diff --git a/modules/indexer/internal/indexer.go b/modules/indexer/internal/indexer.go new file mode 100644 index 0000000000000..ceee31f37d499 --- /dev/null +++ b/modules/indexer/internal/indexer.go @@ -0,0 +1,15 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package internal + +// Indexer defines an basic indexer interface +type Indexer interface { + // Init initializes the indexer + // returns true if the index was opened (with data populated), false if it was created (without any data) + Init() (bool, error) + // Ping checks if the indexer is available + Ping() bool + // Close closes the indexer + Close() +} diff --git a/modules/indexer/issues/bleve.go b/modules/indexer/issues/bleve.go index 60d9ef76174f6..fa09efae00bcc 100644 --- a/modules/indexer/issues/bleve.go +++ b/modules/indexer/issues/bleve.go @@ -10,7 +10,7 @@ import ( "strconv" gitea_bleve "code.gitea.io/gitea/modules/indexer/bleve" - "code.gitea.io/gitea/modules/log" + in_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve" "code.gitea.io/gitea/modules/util" "github.com/blevesearch/bleve/v2" @@ -75,6 +75,7 @@ const maxBatchSize = 16 // openIndexer open the index at the specified path, checking for metadata // updates and bleve version updates. If index needs to be created (or // re-created), returns (nil, nil) +// Deprecated: func openIndexer(path string, latestVersion int) (bleve.Index, error) { _, err := os.Stat(path) if err != nil && os.IsNotExist(err) { @@ -161,49 +162,36 @@ var _ Indexer = &BleveIndexer{} // BleveIndexer implements Indexer interface type BleveIndexer struct { - indexDir string - indexer bleve.Index + in_bleve.Indexer } // NewBleveIndexer creates a new bleve local indexer func NewBleveIndexer(indexDir string) *BleveIndexer { return &BleveIndexer{ - indexDir: indexDir, + Indexer: in_bleve.Indexer{ + IndexDir: indexDir, + Version: issueIndexerLatestVersion, + }, } } // Init will initialize the indexer func (b *BleveIndexer) Init() (bool, error) { - var err error - b.indexer, err = openIndexer(b.indexDir, issueIndexerLatestVersion) + opened, err := b.Indexer.Init() if err != nil { return false, err } - if b.indexer != nil { + if opened { return true, nil } - b.indexer, err = createIssueIndexer(b.indexDir, issueIndexerLatestVersion) + b.Indexer.Indexer, err = createIssueIndexer(b.IndexDir, issueIndexerLatestVersion) return false, err } -// Ping does nothing -func (b *BleveIndexer) Ping() bool { - return true -} - -// Close will close the bleve indexer -func (b *BleveIndexer) Close() { - if b.indexer != nil { - if err := b.indexer.Close(); err != nil { - log.Error("Error whilst closing indexer: %v", err) - } - } -} - // Index will save the index data func (b *BleveIndexer) Index(issues []*IndexerData) error { - batch := gitea_bleve.NewFlushingBatch(b.indexer, maxBatchSize) + batch := gitea_bleve.NewFlushingBatch(b.Indexer.Indexer, maxBatchSize) for _, issue := range issues { if err := batch.Index(indexerID(issue.ID), struct { RepoID int64 @@ -224,7 +212,7 @@ func (b *BleveIndexer) Index(issues []*IndexerData) error { // Delete deletes indexes by ids func (b *BleveIndexer) Delete(ids ...int64) error { - batch := gitea_bleve.NewFlushingBatch(b.indexer, maxBatchSize) + batch := gitea_bleve.NewFlushingBatch(b.Indexer.Indexer, maxBatchSize) for _, id := range ids { if err := batch.Delete(indexerID(id)); err != nil { return err @@ -255,7 +243,7 @@ func (b *BleveIndexer) Search(ctx context.Context, keyword string, repoIDs []int search := bleve.NewSearchRequestOptions(indexerQuery, limit, start, false) search.SortBy([]string{"-_score"}) - result, err := b.indexer.SearchInContext(ctx, search) + result, err := b.Indexer.Indexer.SearchInContext(ctx, search) if err != nil { return nil, err } diff --git a/modules/indexer/issues/indexer.go b/modules/indexer/issues/indexer.go index f36ea10935412..fba91c9e02c72 100644 --- a/modules/indexer/issues/indexer.go +++ b/modules/indexer/issues/indexer.go @@ -15,6 +15,7 @@ import ( issues_model "code.gitea.io/gitea/models/issues" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/indexer/internal" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/process" "code.gitea.io/gitea/modules/queue" @@ -47,12 +48,10 @@ type SearchResult struct { // Indexer defines an interface to indexer issues contents type Indexer interface { - Init() (bool, error) - Ping() bool + internal.Indexer Index(issue []*IndexerData) error Delete(ids ...int64) error Search(ctx context.Context, kw string, repoIDs []int64, limit, start int) (*SearchResult, error) - Close() } type indexerHolder struct { From 15d6247e8ff66e20707ae9ec974df9822a28a5e5 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Thu, 8 Jun 2023 15:55:58 +0800 Subject: [PATCH 03/43] feat: elasticsearch issue --- modules/indexer/internal/bleve/indexer.go | 40 +----- modules/indexer/internal/bleve/util.go | 47 +++++++ .../indexer/internal/elasticsearch/indexer.go | 59 +++++++++ .../indexer/internal/elasticsearch/util.go | 34 +++++ modules/indexer/internal/indexer.go | 2 +- modules/indexer/issues/elastic_search.go | 122 ++++++------------ 6 files changed, 180 insertions(+), 124 deletions(-) create mode 100644 modules/indexer/internal/bleve/util.go create mode 100644 modules/indexer/internal/elasticsearch/indexer.go create mode 100644 modules/indexer/internal/elasticsearch/util.go diff --git a/modules/indexer/internal/bleve/indexer.go b/modules/indexer/internal/bleve/indexer.go index 6d443e8e06247..cb02d992a6426 100644 --- a/modules/indexer/internal/bleve/indexer.go +++ b/modules/indexer/internal/bleve/indexer.go @@ -4,20 +4,15 @@ package bleve import ( - "os" - "code.gitea.io/gitea/modules/indexer/internal" "code.gitea.io/gitea/modules/log" - "code.gitea.io/gitea/modules/util" "github.com/blevesearch/bleve/v2" - "github.com/blevesearch/bleve/v2/index/upsidedown" - "github.com/ethantkoenig/rupture" ) var _ internal.Indexer = &Indexer{} -// Indexer represents a bleve indexer implementation +// Indexer represents a basic bleve indexer implementation type Indexer struct { IndexDir string Indexer bleve.Index @@ -49,36 +44,3 @@ func (i *Indexer) Close() { } } } - -// openIndexer open the index at the specified path, checking for metadata -// updates and bleve version updates. If index needs to be created (or -// re-created), returns (nil, nil) -func openIndexer(path string, latestVersion int) (bleve.Index, error) { - _, err := os.Stat(path) - if err != nil && os.IsNotExist(err) { - return nil, nil - } else if err != nil { - return nil, err - } - - metadata, err := rupture.ReadIndexMetadata(path) - if err != nil { - return nil, err - } - if metadata.Version < latestVersion { - // the indexer is using a previous version, so we should delete it and - // re-populate - return nil, util.RemoveAll(path) - } - - index, err := bleve.Open(path) - if err != nil && err == upsidedown.IncompatibleVersion { - // the indexer was built with a previous version of bleve, so we should - // delete it and re-populate - return nil, util.RemoveAll(path) - } else if err != nil { - return nil, err - } - - return index, nil -} diff --git a/modules/indexer/internal/bleve/util.go b/modules/indexer/internal/bleve/util.go new file mode 100644 index 0000000000000..94dbbce4bcd8d --- /dev/null +++ b/modules/indexer/internal/bleve/util.go @@ -0,0 +1,47 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package bleve + +import ( + "os" + + "code.gitea.io/gitea/modules/util" + + "github.com/blevesearch/bleve/v2" + "github.com/blevesearch/bleve/v2/index/upsidedown" + "github.com/ethantkoenig/rupture" +) + +// openIndexer open the index at the specified path, checking for metadata +// updates and bleve version updates. If index needs to be created (or +// re-created), returns (nil, nil) +func openIndexer(path string, latestVersion int) (bleve.Index, error) { + _, err := os.Stat(path) + if err != nil && os.IsNotExist(err) { + return nil, nil + } else if err != nil { + return nil, err + } + + metadata, err := rupture.ReadIndexMetadata(path) + if err != nil { + return nil, err + } + if metadata.Version < latestVersion { + // the indexer is using a previous version, so we should delete it and + // re-populate + return nil, util.RemoveAll(path) + } + + index, err := bleve.Open(path) + if err != nil && err == upsidedown.IncompatibleVersion { + // the indexer was built with a previous version of bleve, so we should + // delete it and re-populate + return nil, util.RemoveAll(path) + } else if err != nil { + return nil, err + } + + return index, nil +} diff --git a/modules/indexer/internal/elasticsearch/indexer.go b/modules/indexer/internal/elasticsearch/indexer.go new file mode 100644 index 0000000000000..d80322e292a61 --- /dev/null +++ b/modules/indexer/internal/elasticsearch/indexer.go @@ -0,0 +1,59 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package elasticsearch + +import ( + "sync" + + "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/indexer/internal" + + "github.com/olivere/elastic/v7" +) + +var _ internal.Indexer = &Indexer{} + +// Indexer represents a basic elasticsearch indexer implementation +type Indexer struct { + Client *elastic.Client + IndexerName string + available bool + StopTimer chan struct{} + lock sync.RWMutex +} + +func NewIndexer(client *elastic.Client, indexerName string) *Indexer { + return &Indexer{ + Client: client, + IndexerName: indexerName, + available: true, + StopTimer: make(chan struct{}), + } +} + +// Init initializes the indexer +func (i *Indexer) Init() (bool, error) { + ctx := graceful.GetManager().HammerContext() + exists, err := i.Client.IndexExists(i.IndexerName).Do(ctx) + if err != nil { + return false, i.CheckError(err) + } + return exists, nil +} + +// Ping checks if the indexer is available +func (i *Indexer) Ping() bool { + i.lock.RLock() + defer i.lock.RUnlock() + return i.available +} + +// Close closes the indexer +func (i *Indexer) Close() { + select { + case <-i.StopTimer: + default: + close(i.StopTimer) + } +} diff --git a/modules/indexer/internal/elasticsearch/util.go b/modules/indexer/internal/elasticsearch/util.go new file mode 100644 index 0000000000000..40ac7d4f32d6d --- /dev/null +++ b/modules/indexer/internal/elasticsearch/util.go @@ -0,0 +1,34 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package elasticsearch + +import ( + "errors" + "net" + + "github.com/olivere/elastic/v7" +) + +// CheckError checks if the error is a connection error and sets the availability +func (i *Indexer) CheckError(err error) error { + var opErr *net.OpError + if !(elastic.IsConnErr(err) || (errors.As(err, &opErr) && (opErr.Op == "dial" || opErr.Op == "read"))) { + return err + } + + i.SetAvailability(false) + + return err +} + +func (i *Indexer) SetAvailability(available bool) { + i.lock.Lock() + defer i.lock.Unlock() + + if i.available == available { + return + } + + i.available = available +} diff --git a/modules/indexer/internal/indexer.go b/modules/indexer/internal/indexer.go index ceee31f37d499..c71fc86ce8cb2 100644 --- a/modules/indexer/internal/indexer.go +++ b/modules/indexer/internal/indexer.go @@ -6,7 +6,7 @@ package internal // Indexer defines an basic indexer interface type Indexer interface { // Init initializes the indexer - // returns true if the index was opened (with data populated), false if it was created (without any data) + // returns true if the index was opened/existed (with data populated), false if it was created/not-existed (with no data) Init() (bool, error) // Ping checks if the indexer is available Ping() bool diff --git a/modules/indexer/issues/elastic_search.go b/modules/indexer/issues/elastic_search.go index ec62f857adace..2606e1e119c41 100644 --- a/modules/indexer/issues/elastic_search.go +++ b/modules/indexer/issues/elastic_search.go @@ -7,12 +7,11 @@ import ( "context" "errors" "fmt" - "net" "strconv" - "sync" "time" "code.gitea.io/gitea/modules/graceful" + in_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" "code.gitea.io/gitea/modules/log" "github.com/olivere/elastic/v7" @@ -22,11 +21,7 @@ var _ Indexer = &ElasticSearchIndexer{} // ElasticSearchIndexer implements Indexer interface type ElasticSearchIndexer struct { - client *elastic.Client - indexerName string - available bool - stopTimer chan struct{} - lock sync.RWMutex + *in_elasticsearch.Indexer } // NewElasticSearchIndexer creates a new elasticsearch indexer @@ -49,10 +44,7 @@ func NewElasticSearchIndexer(url, indexerName string) (*ElasticSearchIndexer, er } indexer := &ElasticSearchIndexer{ - client: client, - indexerName: indexerName, - available: true, - stopTimer: make(chan struct{}), + Indexer: in_elasticsearch.NewIndexer(client, indexerName), } ticker := time.NewTicker(10 * time.Second) @@ -61,7 +53,7 @@ func NewElasticSearchIndexer(url, indexerName string) (*ElasticSearchIndexer, er select { case <-ticker.C: indexer.checkAvailability() - case <-indexer.stopTimer: + case <-indexer.StopTimer: ticker.Stop() return } @@ -102,33 +94,26 @@ const ( // Init will initialize the indexer func (b *ElasticSearchIndexer) Init() (bool, error) { - ctx := graceful.GetManager().HammerContext() - exists, err := b.client.IndexExists(b.indexerName).Do(ctx) + opened, err := b.Indexer.Init() if err != nil { - return false, b.checkError(err) + return false, err + } + if opened { + return true, nil } - if !exists { - mapping := defaultMapping - - createIndex, err := b.client.CreateIndex(b.indexerName).BodyString(mapping).Do(ctx) - if err != nil { - return false, b.checkError(err) - } - if !createIndex.Acknowledged { - return false, errors.New("init failed") - } + mapping := defaultMapping - return false, nil + ctx := graceful.GetManager().HammerContext() + createIndex, err := b.Client.CreateIndex(b.IndexerName).BodyString(mapping).Do(ctx) + if err != nil { + return false, b.CheckError(err) + } + if !createIndex.Acknowledged { + return false, errors.New("init failed") } - return true, nil -} -// Ping checks if elastic is available -func (b *ElasticSearchIndexer) Ping() bool { - b.lock.RLock() - defer b.lock.RUnlock() - return b.available + return false, nil } // Index will save the index data @@ -137,8 +122,8 @@ func (b *ElasticSearchIndexer) Index(issues []*IndexerData) error { return nil } else if len(issues) == 1 { issue := issues[0] - _, err := b.client.Index(). - Index(b.indexerName). + _, err := b.Client.Index(). + Index(b.IndexerName). Id(fmt.Sprintf("%d", issue.ID)). BodyJson(map[string]interface{}{ "id": issue.ID, @@ -148,14 +133,14 @@ func (b *ElasticSearchIndexer) Index(issues []*IndexerData) error { "comments": issue.Comments, }). Do(graceful.GetManager().HammerContext()) - return b.checkError(err) + return b.CheckError(err) } reqs := make([]elastic.BulkableRequest, 0) for _, issue := range issues { reqs = append(reqs, elastic.NewBulkIndexRequest(). - Index(b.indexerName). + Index(b.IndexerName). Id(fmt.Sprintf("%d", issue.ID)). Doc(map[string]interface{}{ "id": issue.ID, @@ -167,11 +152,11 @@ func (b *ElasticSearchIndexer) Index(issues []*IndexerData) error { ) } - _, err := b.client.Bulk(). - Index(b.indexerName). + _, err := b.Client.Bulk(). + Index(b.IndexerName). Add(reqs...). Do(graceful.GetManager().HammerContext()) - return b.checkError(err) + return b.CheckError(err) } // Delete deletes indexes by ids @@ -179,27 +164,27 @@ func (b *ElasticSearchIndexer) Delete(ids ...int64) error { if len(ids) == 0 { return nil } else if len(ids) == 1 { - _, err := b.client.Delete(). - Index(b.indexerName). + _, err := b.Client.Delete(). + Index(b.IndexerName). Id(fmt.Sprintf("%d", ids[0])). Do(graceful.GetManager().HammerContext()) - return b.checkError(err) + return b.CheckError(err) } reqs := make([]elastic.BulkableRequest, 0) for _, id := range ids { reqs = append(reqs, elastic.NewBulkDeleteRequest(). - Index(b.indexerName). + Index(b.IndexerName). Id(fmt.Sprintf("%d", id)), ) } - _, err := b.client.Bulk(). - Index(b.indexerName). + _, err := b.Client.Bulk(). + Index(b.IndexerName). Add(reqs...). Do(graceful.GetManager().HammerContext()) - return b.checkError(err) + return b.CheckError(err) } // Search searches for issues by given conditions. @@ -216,14 +201,14 @@ func (b *ElasticSearchIndexer) Search(ctx context.Context, keyword string, repoI repoQuery := elastic.NewTermsQuery("repo_id", repoStrs...) query = query.Must(repoQuery) } - searchResult, err := b.client.Search(). - Index(b.indexerName). + searchResult, err := b.Client.Search(). + Index(b.IndexerName). Query(query). Sort("_score", false). From(start).Size(limit). Do(ctx) if err != nil { - return nil, b.checkError(err) + return nil, b.CheckError(err) } hits := make([]Match, 0, limit) @@ -240,48 +225,17 @@ func (b *ElasticSearchIndexer) Search(ctx context.Context, keyword string, repoI }, nil } -// Close implements indexer -func (b *ElasticSearchIndexer) Close() { - select { - case <-b.stopTimer: - default: - close(b.stopTimer) - } -} - -func (b *ElasticSearchIndexer) checkError(err error) error { - var opErr *net.OpError - if !(elastic.IsConnErr(err) || (errors.As(err, &opErr) && (opErr.Op == "dial" || opErr.Op == "read"))) { - return err - } - - b.setAvailability(false) - - return err -} - func (b *ElasticSearchIndexer) checkAvailability() { if b.Ping() { return } // Request cluster state to check if elastic is available again - _, err := b.client.ClusterState().Do(graceful.GetManager().ShutdownContext()) + _, err := b.Client.ClusterState().Do(graceful.GetManager().ShutdownContext()) if err != nil { - b.setAvailability(false) - return - } - - b.setAvailability(true) -} - -func (b *ElasticSearchIndexer) setAvailability(available bool) { - b.lock.Lock() - defer b.lock.Unlock() - - if b.available == available { + b.SetAvailability(false) return } - b.available = available + b.SetAvailability(true) } From afde928f46cf4ed216c9d80b1de9dce077b8f492 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Thu, 8 Jun 2023 16:12:39 +0800 Subject: [PATCH 04/43] fix: better composite --- .../indexer/internal/elasticsearch/indexer.go | 25 +++++-- .../indexer/internal/elasticsearch/util.go | 20 ++++- modules/indexer/issues/bleve.go | 59 ++++----------- modules/indexer/issues/elastic_search.go | 75 +++++++------------ 4 files changed, 76 insertions(+), 103 deletions(-) diff --git a/modules/indexer/internal/elasticsearch/indexer.go b/modules/indexer/internal/elasticsearch/indexer.go index d80322e292a61..0105ca6ef756d 100644 --- a/modules/indexer/internal/elasticsearch/indexer.go +++ b/modules/indexer/internal/elasticsearch/indexer.go @@ -5,6 +5,7 @@ package elasticsearch import ( "sync" + "time" "code.gitea.io/gitea/modules/graceful" "code.gitea.io/gitea/modules/indexer/internal" @@ -19,17 +20,31 @@ type Indexer struct { Client *elastic.Client IndexerName string available bool - StopTimer chan struct{} + stopTimer chan struct{} lock sync.RWMutex } func NewIndexer(client *elastic.Client, indexerName string) *Indexer { - return &Indexer{ + indexer := &Indexer{ Client: client, IndexerName: indexerName, available: true, - StopTimer: make(chan struct{}), + stopTimer: make(chan struct{}), } + + ticker := time.NewTicker(10 * time.Second) + go func() { + for { + select { + case <-ticker.C: + indexer.checkAvailability() + case <-indexer.stopTimer: + ticker.Stop() + return + } + } + }() + return indexer } // Init initializes the indexer @@ -52,8 +67,8 @@ func (i *Indexer) Ping() bool { // Close closes the indexer func (i *Indexer) Close() { select { - case <-i.StopTimer: + case <-i.stopTimer: default: - close(i.StopTimer) + close(i.stopTimer) } } diff --git a/modules/indexer/internal/elasticsearch/util.go b/modules/indexer/internal/elasticsearch/util.go index 40ac7d4f32d6d..f46dc29d793ca 100644 --- a/modules/indexer/internal/elasticsearch/util.go +++ b/modules/indexer/internal/elasticsearch/util.go @@ -4,6 +4,7 @@ package elasticsearch import ( + "code.gitea.io/gitea/modules/graceful" "errors" "net" @@ -17,12 +18,12 @@ func (i *Indexer) CheckError(err error) error { return err } - i.SetAvailability(false) + i.setAvailability(false) return err } -func (i *Indexer) SetAvailability(available bool) { +func (i *Indexer) setAvailability(available bool) { i.lock.Lock() defer i.lock.Unlock() @@ -32,3 +33,18 @@ func (i *Indexer) SetAvailability(available bool) { i.available = available } + +func (i *Indexer) checkAvailability() { + if i.Ping() { + return + } + + // Request cluster state to check if elastic is available again + _, err := i.Client.ClusterState().Do(graceful.GetManager().ShutdownContext()) + if err != nil { + i.setAvailability(false) + return + } + + i.setAvailability(true) +} diff --git a/modules/indexer/issues/bleve.go b/modules/indexer/issues/bleve.go index fa09efae00bcc..7be4d6f7a8811 100644 --- a/modules/indexer/issues/bleve.go +++ b/modules/indexer/issues/bleve.go @@ -6,12 +6,11 @@ package issues import ( "context" "fmt" - "os" "strconv" gitea_bleve "code.gitea.io/gitea/modules/indexer/bleve" + "code.gitea.io/gitea/modules/indexer/internal" in_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve" - "code.gitea.io/gitea/modules/util" "github.com/blevesearch/bleve/v2" "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" @@ -19,7 +18,6 @@ import ( "github.com/blevesearch/bleve/v2/analysis/token/lowercase" "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" - "github.com/blevesearch/bleve/v2/index/upsidedown" "github.com/blevesearch/bleve/v2/mapping" "github.com/blevesearch/bleve/v2/search/query" "github.com/ethantkoenig/rupture" @@ -72,40 +70,6 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error { const maxBatchSize = 16 -// openIndexer open the index at the specified path, checking for metadata -// updates and bleve version updates. If index needs to be created (or -// re-created), returns (nil, nil) -// Deprecated: -func openIndexer(path string, latestVersion int) (bleve.Index, error) { - _, err := os.Stat(path) - if err != nil && os.IsNotExist(err) { - return nil, nil - } else if err != nil { - return nil, err - } - - metadata, err := rupture.ReadIndexMetadata(path) - if err != nil { - return nil, err - } - if metadata.Version < latestVersion { - // the indexer is using a previous version, so we should delete it and - // re-populate - return nil, util.RemoveAll(path) - } - - index, err := bleve.Open(path) - if err != nil && err == upsidedown.IncompatibleVersion { - // the indexer was built with a previous version of bleve, so we should - // delete it and re-populate - return nil, util.RemoveAll(path) - } else if err != nil { - return nil, err - } - - return index, nil -} - // BleveIndexerData an update to the issue indexer type BleveIndexerData IndexerData @@ -162,16 +126,19 @@ var _ Indexer = &BleveIndexer{} // BleveIndexer implements Indexer interface type BleveIndexer struct { - in_bleve.Indexer + in *in_bleve.Indexer + internal.Indexer // do not composite in_bleve.Indexer directly to avoid exposing too much } // NewBleveIndexer creates a new bleve local indexer func NewBleveIndexer(indexDir string) *BleveIndexer { + in := &in_bleve.Indexer{ + IndexDir: indexDir, + Version: issueIndexerLatestVersion, + } return &BleveIndexer{ - Indexer: in_bleve.Indexer{ - IndexDir: indexDir, - Version: issueIndexerLatestVersion, - }, + Indexer: in, + in: in, } } @@ -185,13 +152,13 @@ func (b *BleveIndexer) Init() (bool, error) { return true, nil } - b.Indexer.Indexer, err = createIssueIndexer(b.IndexDir, issueIndexerLatestVersion) + b.in.Indexer, err = createIssueIndexer(b.in.IndexDir, issueIndexerLatestVersion) return false, err } // Index will save the index data func (b *BleveIndexer) Index(issues []*IndexerData) error { - batch := gitea_bleve.NewFlushingBatch(b.Indexer.Indexer, maxBatchSize) + batch := gitea_bleve.NewFlushingBatch(b.in.Indexer, maxBatchSize) for _, issue := range issues { if err := batch.Index(indexerID(issue.ID), struct { RepoID int64 @@ -212,7 +179,7 @@ func (b *BleveIndexer) Index(issues []*IndexerData) error { // Delete deletes indexes by ids func (b *BleveIndexer) Delete(ids ...int64) error { - batch := gitea_bleve.NewFlushingBatch(b.Indexer.Indexer, maxBatchSize) + batch := gitea_bleve.NewFlushingBatch(b.in.Indexer, maxBatchSize) for _, id := range ids { if err := batch.Delete(indexerID(id)); err != nil { return err @@ -243,7 +210,7 @@ func (b *BleveIndexer) Search(ctx context.Context, keyword string, repoIDs []int search := bleve.NewSearchRequestOptions(indexerQuery, limit, start, false) search.SortBy([]string{"-_score"}) - result, err := b.Indexer.Indexer.SearchInContext(ctx, search) + result, err := b.in.Indexer.SearchInContext(ctx, search) if err != nil { return nil, err } diff --git a/modules/indexer/issues/elastic_search.go b/modules/indexer/issues/elastic_search.go index 2606e1e119c41..4276031ad449c 100644 --- a/modules/indexer/issues/elastic_search.go +++ b/modules/indexer/issues/elastic_search.go @@ -11,6 +11,7 @@ import ( "time" "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/indexer/internal" in_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" "code.gitea.io/gitea/modules/log" @@ -21,7 +22,8 @@ var _ Indexer = &ElasticSearchIndexer{} // ElasticSearchIndexer implements Indexer interface type ElasticSearchIndexer struct { - *in_elasticsearch.Indexer + in *in_elasticsearch.Indexer + internal.Indexer // do not composite in_elasticsearch.Indexer directly to avoid exposing too much } // NewElasticSearchIndexer creates a new elasticsearch indexer @@ -43,23 +45,11 @@ func NewElasticSearchIndexer(url, indexerName string) (*ElasticSearchIndexer, er return nil, err } + in := in_elasticsearch.NewIndexer(client, indexerName) indexer := &ElasticSearchIndexer{ - Indexer: in_elasticsearch.NewIndexer(client, indexerName), + in: in, + Indexer: in, } - - ticker := time.NewTicker(10 * time.Second) - go func() { - for { - select { - case <-ticker.C: - indexer.checkAvailability() - case <-indexer.StopTimer: - ticker.Stop() - return - } - } - }() - return indexer, nil } @@ -105,9 +95,9 @@ func (b *ElasticSearchIndexer) Init() (bool, error) { mapping := defaultMapping ctx := graceful.GetManager().HammerContext() - createIndex, err := b.Client.CreateIndex(b.IndexerName).BodyString(mapping).Do(ctx) + createIndex, err := b.in.Client.CreateIndex(b.in.IndexerName).BodyString(mapping).Do(ctx) if err != nil { - return false, b.CheckError(err) + return false, b.in.CheckError(err) } if !createIndex.Acknowledged { return false, errors.New("init failed") @@ -122,8 +112,8 @@ func (b *ElasticSearchIndexer) Index(issues []*IndexerData) error { return nil } else if len(issues) == 1 { issue := issues[0] - _, err := b.Client.Index(). - Index(b.IndexerName). + _, err := b.in.Client.Index(). + Index(b.in.IndexerName). Id(fmt.Sprintf("%d", issue.ID)). BodyJson(map[string]interface{}{ "id": issue.ID, @@ -133,14 +123,14 @@ func (b *ElasticSearchIndexer) Index(issues []*IndexerData) error { "comments": issue.Comments, }). Do(graceful.GetManager().HammerContext()) - return b.CheckError(err) + return b.in.CheckError(err) } reqs := make([]elastic.BulkableRequest, 0) for _, issue := range issues { reqs = append(reqs, elastic.NewBulkIndexRequest(). - Index(b.IndexerName). + Index(b.in.IndexerName). Id(fmt.Sprintf("%d", issue.ID)). Doc(map[string]interface{}{ "id": issue.ID, @@ -152,11 +142,11 @@ func (b *ElasticSearchIndexer) Index(issues []*IndexerData) error { ) } - _, err := b.Client.Bulk(). - Index(b.IndexerName). + _, err := b.in.Client.Bulk(). + Index(b.in.IndexerName). Add(reqs...). Do(graceful.GetManager().HammerContext()) - return b.CheckError(err) + return b.in.CheckError(err) } // Delete deletes indexes by ids @@ -164,27 +154,27 @@ func (b *ElasticSearchIndexer) Delete(ids ...int64) error { if len(ids) == 0 { return nil } else if len(ids) == 1 { - _, err := b.Client.Delete(). - Index(b.IndexerName). + _, err := b.in.Client.Delete(). + Index(b.in.IndexerName). Id(fmt.Sprintf("%d", ids[0])). Do(graceful.GetManager().HammerContext()) - return b.CheckError(err) + return b.in.CheckError(err) } reqs := make([]elastic.BulkableRequest, 0) for _, id := range ids { reqs = append(reqs, elastic.NewBulkDeleteRequest(). - Index(b.IndexerName). + Index(b.in.IndexerName). Id(fmt.Sprintf("%d", id)), ) } - _, err := b.Client.Bulk(). - Index(b.IndexerName). + _, err := b.in.Client.Bulk(). + Index(b.in.IndexerName). Add(reqs...). Do(graceful.GetManager().HammerContext()) - return b.CheckError(err) + return b.in.CheckError(err) } // Search searches for issues by given conditions. @@ -201,14 +191,14 @@ func (b *ElasticSearchIndexer) Search(ctx context.Context, keyword string, repoI repoQuery := elastic.NewTermsQuery("repo_id", repoStrs...) query = query.Must(repoQuery) } - searchResult, err := b.Client.Search(). - Index(b.IndexerName). + searchResult, err := b.in.Client.Search(). + Index(b.in.IndexerName). Query(query). Sort("_score", false). From(start).Size(limit). Do(ctx) if err != nil { - return nil, b.CheckError(err) + return nil, b.in.CheckError(err) } hits := make([]Match, 0, limit) @@ -224,18 +214,3 @@ func (b *ElasticSearchIndexer) Search(ctx context.Context, keyword string, repoI Hits: hits, }, nil } - -func (b *ElasticSearchIndexer) checkAvailability() { - if b.Ping() { - return - } - - // Request cluster state to check if elastic is available again - _, err := b.Client.ClusterState().Do(graceful.GetManager().ShutdownContext()) - if err != nil { - b.SetAvailability(false) - return - } - - b.SetAvailability(true) -} From 94a57365ca5a0e740a36a02b07ac9446cd59eaca Mon Sep 17 00:00:00 2001 From: Jason Song Date: Thu, 8 Jun 2023 16:33:46 +0800 Subject: [PATCH 05/43] feat: db issue --- modules/indexer/internal/db/indexer.go | 31 ++++++++++++++++++++++++++ modules/indexer/issues/db.go | 12 +++++++++- modules/indexer/issues/indexer.go | 2 +- 3 files changed, 43 insertions(+), 2 deletions(-) create mode 100644 modules/indexer/internal/db/indexer.go diff --git a/modules/indexer/internal/db/indexer.go b/modules/indexer/internal/db/indexer.go new file mode 100644 index 0000000000000..c03c2a40e5bb4 --- /dev/null +++ b/modules/indexer/internal/db/indexer.go @@ -0,0 +1,31 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package db + +import ( + "code.gitea.io/gitea/modules/indexer/internal" +) + +var _ internal.Indexer = &Indexer{} + +// Indexer represents a basic db indexer implementation +type Indexer struct{} + +// Init initializes the indexer +func (i *Indexer) Init() (bool, error) { + // nothing to do + return false, nil +} + +// Ping checks if the indexer is available +func (i *Indexer) Ping() bool { + // No need to ping database to check if it is available. + // If the database goes down, Gitea will go down, so nobody will care if the indexer is available. + return true +} + +// Close closes the indexer +func (i *Indexer) Close() { + // nothing to do +} diff --git a/modules/indexer/issues/db.go b/modules/indexer/issues/db.go index 04c101c356900..6a23cfbd48f68 100644 --- a/modules/indexer/issues/db.go +++ b/modules/indexer/issues/db.go @@ -8,10 +8,20 @@ import ( "code.gitea.io/gitea/models/db" issues_model "code.gitea.io/gitea/models/issues" + "code.gitea.io/gitea/modules/indexer/internal" + in_db "code.gitea.io/gitea/modules/indexer/internal/db" ) // DBIndexer implements Indexer interface to use database's like search -type DBIndexer struct{} +type DBIndexer struct { + internal.Indexer +} + +func NewDBIndexer() *DBIndexer { + return &DBIndexer{ + Indexer: &in_db.Indexer{}, + } +} // Init dummy function func (i *DBIndexer) Init() (bool, error) { diff --git a/modules/indexer/issues/indexer.go b/modules/indexer/issues/indexer.go index fba91c9e02c72..051c7eec556e7 100644 --- a/modules/indexer/issues/indexer.go +++ b/modules/indexer/issues/indexer.go @@ -194,7 +194,7 @@ func InitIssueIndexer(syncReindex bool) { populate = !exist holder.set(issueIndexer) case "db": - issueIndexer := &DBIndexer{} + issueIndexer := NewDBIndexer() holder.set(issueIndexer) case "meilisearch": issueIndexer, err := NewMeilisearchIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueConnAuth, setting.Indexer.IssueIndexerName) From 242095f2dba409993c9e31dae53627447223a161 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Thu, 8 Jun 2023 16:35:32 +0800 Subject: [PATCH 06/43] fix: inner --- modules/indexer/issues/bleve.go | 18 ++++----- modules/indexer/issues/db.go | 4 +- modules/indexer/issues/elastic_search.go | 48 ++++++++++++------------ 3 files changed, 35 insertions(+), 35 deletions(-) diff --git a/modules/indexer/issues/bleve.go b/modules/indexer/issues/bleve.go index 7be4d6f7a8811..0b9eda213f7cb 100644 --- a/modules/indexer/issues/bleve.go +++ b/modules/indexer/issues/bleve.go @@ -10,7 +10,7 @@ import ( gitea_bleve "code.gitea.io/gitea/modules/indexer/bleve" "code.gitea.io/gitea/modules/indexer/internal" - in_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve" + inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve" "github.com/blevesearch/bleve/v2" "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" @@ -126,19 +126,19 @@ var _ Indexer = &BleveIndexer{} // BleveIndexer implements Indexer interface type BleveIndexer struct { - in *in_bleve.Indexer - internal.Indexer // do not composite in_bleve.Indexer directly to avoid exposing too much + inner *inner_bleve.Indexer + internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much } // NewBleveIndexer creates a new bleve local indexer func NewBleveIndexer(indexDir string) *BleveIndexer { - in := &in_bleve.Indexer{ + in := &inner_bleve.Indexer{ IndexDir: indexDir, Version: issueIndexerLatestVersion, } return &BleveIndexer{ Indexer: in, - in: in, + inner: in, } } @@ -152,13 +152,13 @@ func (b *BleveIndexer) Init() (bool, error) { return true, nil } - b.in.Indexer, err = createIssueIndexer(b.in.IndexDir, issueIndexerLatestVersion) + b.inner.Indexer, err = createIssueIndexer(b.inner.IndexDir, issueIndexerLatestVersion) return false, err } // Index will save the index data func (b *BleveIndexer) Index(issues []*IndexerData) error { - batch := gitea_bleve.NewFlushingBatch(b.in.Indexer, maxBatchSize) + batch := gitea_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) for _, issue := range issues { if err := batch.Index(indexerID(issue.ID), struct { RepoID int64 @@ -179,7 +179,7 @@ func (b *BleveIndexer) Index(issues []*IndexerData) error { // Delete deletes indexes by ids func (b *BleveIndexer) Delete(ids ...int64) error { - batch := gitea_bleve.NewFlushingBatch(b.in.Indexer, maxBatchSize) + batch := gitea_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) for _, id := range ids { if err := batch.Delete(indexerID(id)); err != nil { return err @@ -210,7 +210,7 @@ func (b *BleveIndexer) Search(ctx context.Context, keyword string, repoIDs []int search := bleve.NewSearchRequestOptions(indexerQuery, limit, start, false) search.SortBy([]string{"-_score"}) - result, err := b.in.Indexer.SearchInContext(ctx, search) + result, err := b.inner.Indexer.SearchInContext(ctx, search) if err != nil { return nil, err } diff --git a/modules/indexer/issues/db.go b/modules/indexer/issues/db.go index 6a23cfbd48f68..b5a0aba3b1888 100644 --- a/modules/indexer/issues/db.go +++ b/modules/indexer/issues/db.go @@ -9,7 +9,7 @@ import ( "code.gitea.io/gitea/models/db" issues_model "code.gitea.io/gitea/models/issues" "code.gitea.io/gitea/modules/indexer/internal" - in_db "code.gitea.io/gitea/modules/indexer/internal/db" + inner_db "code.gitea.io/gitea/modules/indexer/internal/db" ) // DBIndexer implements Indexer interface to use database's like search @@ -19,7 +19,7 @@ type DBIndexer struct { func NewDBIndexer() *DBIndexer { return &DBIndexer{ - Indexer: &in_db.Indexer{}, + Indexer: &inner_db.Indexer{}, } } diff --git a/modules/indexer/issues/elastic_search.go b/modules/indexer/issues/elastic_search.go index 4276031ad449c..7f37eb8bc4fb1 100644 --- a/modules/indexer/issues/elastic_search.go +++ b/modules/indexer/issues/elastic_search.go @@ -12,7 +12,7 @@ import ( "code.gitea.io/gitea/modules/graceful" "code.gitea.io/gitea/modules/indexer/internal" - in_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" + inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" "code.gitea.io/gitea/modules/log" "github.com/olivere/elastic/v7" @@ -22,8 +22,8 @@ var _ Indexer = &ElasticSearchIndexer{} // ElasticSearchIndexer implements Indexer interface type ElasticSearchIndexer struct { - in *in_elasticsearch.Indexer - internal.Indexer // do not composite in_elasticsearch.Indexer directly to avoid exposing too much + inner *inner_elasticsearch.Indexer + internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much } // NewElasticSearchIndexer creates a new elasticsearch indexer @@ -45,9 +45,9 @@ func NewElasticSearchIndexer(url, indexerName string) (*ElasticSearchIndexer, er return nil, err } - in := in_elasticsearch.NewIndexer(client, indexerName) + in := inner_elasticsearch.NewIndexer(client, indexerName) indexer := &ElasticSearchIndexer{ - in: in, + inner: in, Indexer: in, } return indexer, nil @@ -95,9 +95,9 @@ func (b *ElasticSearchIndexer) Init() (bool, error) { mapping := defaultMapping ctx := graceful.GetManager().HammerContext() - createIndex, err := b.in.Client.CreateIndex(b.in.IndexerName).BodyString(mapping).Do(ctx) + createIndex, err := b.inner.Client.CreateIndex(b.inner.IndexerName).BodyString(mapping).Do(ctx) if err != nil { - return false, b.in.CheckError(err) + return false, b.inner.CheckError(err) } if !createIndex.Acknowledged { return false, errors.New("init failed") @@ -112,8 +112,8 @@ func (b *ElasticSearchIndexer) Index(issues []*IndexerData) error { return nil } else if len(issues) == 1 { issue := issues[0] - _, err := b.in.Client.Index(). - Index(b.in.IndexerName). + _, err := b.inner.Client.Index(). + Index(b.inner.IndexerName). Id(fmt.Sprintf("%d", issue.ID)). BodyJson(map[string]interface{}{ "id": issue.ID, @@ -123,14 +123,14 @@ func (b *ElasticSearchIndexer) Index(issues []*IndexerData) error { "comments": issue.Comments, }). Do(graceful.GetManager().HammerContext()) - return b.in.CheckError(err) + return b.inner.CheckError(err) } reqs := make([]elastic.BulkableRequest, 0) for _, issue := range issues { reqs = append(reqs, elastic.NewBulkIndexRequest(). - Index(b.in.IndexerName). + Index(b.inner.IndexerName). Id(fmt.Sprintf("%d", issue.ID)). Doc(map[string]interface{}{ "id": issue.ID, @@ -142,11 +142,11 @@ func (b *ElasticSearchIndexer) Index(issues []*IndexerData) error { ) } - _, err := b.in.Client.Bulk(). - Index(b.in.IndexerName). + _, err := b.inner.Client.Bulk(). + Index(b.inner.IndexerName). Add(reqs...). Do(graceful.GetManager().HammerContext()) - return b.in.CheckError(err) + return b.inner.CheckError(err) } // Delete deletes indexes by ids @@ -154,27 +154,27 @@ func (b *ElasticSearchIndexer) Delete(ids ...int64) error { if len(ids) == 0 { return nil } else if len(ids) == 1 { - _, err := b.in.Client.Delete(). - Index(b.in.IndexerName). + _, err := b.inner.Client.Delete(). + Index(b.inner.IndexerName). Id(fmt.Sprintf("%d", ids[0])). Do(graceful.GetManager().HammerContext()) - return b.in.CheckError(err) + return b.inner.CheckError(err) } reqs := make([]elastic.BulkableRequest, 0) for _, id := range ids { reqs = append(reqs, elastic.NewBulkDeleteRequest(). - Index(b.in.IndexerName). + Index(b.inner.IndexerName). Id(fmt.Sprintf("%d", id)), ) } - _, err := b.in.Client.Bulk(). - Index(b.in.IndexerName). + _, err := b.inner.Client.Bulk(). + Index(b.inner.IndexerName). Add(reqs...). Do(graceful.GetManager().HammerContext()) - return b.in.CheckError(err) + return b.inner.CheckError(err) } // Search searches for issues by given conditions. @@ -191,14 +191,14 @@ func (b *ElasticSearchIndexer) Search(ctx context.Context, keyword string, repoI repoQuery := elastic.NewTermsQuery("repo_id", repoStrs...) query = query.Must(repoQuery) } - searchResult, err := b.in.Client.Search(). - Index(b.in.IndexerName). + searchResult, err := b.inner.Client.Search(). + Index(b.inner.IndexerName). Query(query). Sort("_score", false). From(start).Size(limit). Do(ctx) if err != nil { - return nil, b.in.CheckError(err) + return nil, b.inner.CheckError(err) } hits := make([]Match, 0, limit) From e3190e6545ab179b7c2df072259fc50b9c9c0aee Mon Sep 17 00:00:00 2001 From: Jason Song Date: Thu, 8 Jun 2023 16:38:22 +0800 Subject: [PATCH 07/43] fix: move gitea_bleve --- modules/indexer/code/bleve.go | 10 +++++----- modules/indexer/{ => internal}/bleve/batch.go | 0 modules/indexer/issues/bleve.go | 5 ++--- 3 files changed, 7 insertions(+), 8 deletions(-) rename modules/indexer/{ => internal}/bleve/batch.go (100%) diff --git a/modules/indexer/code/bleve.go b/modules/indexer/code/bleve.go index 150a310458a60..5fad374a5b300 100644 --- a/modules/indexer/code/bleve.go +++ b/modules/indexer/code/bleve.go @@ -17,7 +17,7 @@ import ( "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" - gitea_bleve "code.gitea.io/gitea/modules/indexer/bleve" + inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/timeutil" @@ -183,7 +183,7 @@ func NewBleveIndexer(indexDir string) (*BleveIndexer, bool, error) { } func (b *BleveIndexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserError, batchReader *bufio.Reader, commitSha string, - update fileUpdate, repo *repo_model.Repository, batch *gitea_bleve.FlushingBatch, + update fileUpdate, repo *repo_model.Repository, batch *inner_bleve.FlushingBatch, ) error { // Ignore vendored files in code search if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { @@ -238,7 +238,7 @@ func (b *BleveIndexer) addUpdate(ctx context.Context, batchWriter git.WriteClose }) } -func (b *BleveIndexer) addDelete(filename string, repo *repo_model.Repository, batch *gitea_bleve.FlushingBatch) error { +func (b *BleveIndexer) addDelete(filename string, repo *repo_model.Repository, batch *inner_bleve.FlushingBatch) error { id := filenameIndexerID(repo.ID, filename) return batch.Delete(id) } @@ -281,7 +281,7 @@ func (b *BleveIndexer) Ping() bool { // Index indexes the data func (b *BleveIndexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *repoChanges) error { - batch := gitea_bleve.NewFlushingBatch(b.indexer, maxBatchSize) + batch := inner_bleve.NewFlushingBatch(b.indexer, maxBatchSize) if len(changes.Updates) > 0 { // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! @@ -316,7 +316,7 @@ func (b *BleveIndexer) Delete(repoID int64) error { if err != nil { return err } - batch := gitea_bleve.NewFlushingBatch(b.indexer, maxBatchSize) + batch := inner_bleve.NewFlushingBatch(b.indexer, maxBatchSize) for _, hit := range result.Hits { if err = batch.Delete(hit.ID); err != nil { return err diff --git a/modules/indexer/bleve/batch.go b/modules/indexer/internal/bleve/batch.go similarity index 100% rename from modules/indexer/bleve/batch.go rename to modules/indexer/internal/bleve/batch.go diff --git a/modules/indexer/issues/bleve.go b/modules/indexer/issues/bleve.go index 0b9eda213f7cb..7599a6eb8eb62 100644 --- a/modules/indexer/issues/bleve.go +++ b/modules/indexer/issues/bleve.go @@ -8,7 +8,6 @@ import ( "fmt" "strconv" - gitea_bleve "code.gitea.io/gitea/modules/indexer/bleve" "code.gitea.io/gitea/modules/indexer/internal" inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve" @@ -158,7 +157,7 @@ func (b *BleveIndexer) Init() (bool, error) { // Index will save the index data func (b *BleveIndexer) Index(issues []*IndexerData) error { - batch := gitea_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) + batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) for _, issue := range issues { if err := batch.Index(indexerID(issue.ID), struct { RepoID int64 @@ -179,7 +178,7 @@ func (b *BleveIndexer) Index(issues []*IndexerData) error { // Delete deletes indexes by ids func (b *BleveIndexer) Delete(ids ...int64) error { - batch := gitea_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) + batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) for _, id := range ids { if err := batch.Delete(indexerID(id)); err != nil { return err From 3dbc2e49119b820ca4569d700411f937d10309f7 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Thu, 8 Jun 2023 17:44:14 +0800 Subject: [PATCH 08/43] feat: code indexer --- modules/indexer/code/bleve.go | 99 +++-------- modules/indexer/code/bleve_test.go | 3 +- modules/indexer/code/elastic_search.go | 186 ++++++-------------- modules/indexer/code/elastic_search_test.go | 11 +- modules/indexer/code/indexer.go | 21 ++- modules/indexer/code/wrapped.go | 9 + modules/indexer/issues/elastic_search.go | 2 + 7 files changed, 116 insertions(+), 215 deletions(-) diff --git a/modules/indexer/code/bleve.go b/modules/indexer/code/bleve.go index 5fad374a5b300..ef8110a23e0ac 100644 --- a/modules/indexer/code/bleve.go +++ b/modules/indexer/code/bleve.go @@ -8,7 +8,6 @@ import ( "context" "fmt" "io" - "os" "strconv" "strings" "time" @@ -17,12 +16,12 @@ import ( "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/indexer/internal" inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/typesniffer" - "code.gitea.io/gitea/modules/util" "github.com/blevesearch/bleve/v2" analyzer_custom "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" @@ -31,7 +30,6 @@ import ( "github.com/blevesearch/bleve/v2/analysis/token/lowercase" "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm" "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" - "github.com/blevesearch/bleve/v2/index/upsidedown" "github.com/blevesearch/bleve/v2/mapping" "github.com/blevesearch/bleve/v2/search/query" "github.com/ethantkoenig/rupture" @@ -59,39 +57,6 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error { }) } -// openBleveIndexer open the index at the specified path, checking for metadata -// updates and bleve version updates. If index needs to be created (or -// re-created), returns (nil, nil) -// Deprecated: -func openBleveIndexer(path string, latestVersion int) (bleve.Index, error) { - _, err := os.Stat(path) - if err != nil && os.IsNotExist(err) { - return nil, nil - } else if err != nil { - return nil, err - } - - metadata, err := rupture.ReadIndexMetadata(path) - if err != nil { - return nil, err - } - if metadata.Version < latestVersion { - // the indexer is using a previous version, so we should delete it and - // re-populate - return nil, util.RemoveAll(path) - } - - index, err := bleve.Open(path) - if err != nil && err == upsidedown.IncompatibleVersion { - // the indexer was built with a previous version of bleve, so we should - // delete it and re-populate - return nil, util.RemoveAll(path) - } else if err != nil { - return nil, err - } - return index, nil -} - // RepoIndexerData data stored in the repo indexer type RepoIndexerData struct { RepoID int64 @@ -165,21 +130,20 @@ var _ Indexer = &BleveIndexer{} // BleveIndexer represents a bleve indexer implementation type BleveIndexer struct { - indexDir string - indexer bleve.Index + inner *inner_bleve.Indexer + internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much } // NewBleveIndexer creates a new bleve local indexer -func NewBleveIndexer(indexDir string) (*BleveIndexer, bool, error) { - indexer := &BleveIndexer{ - indexDir: indexDir, +func NewBleveIndexer(indexDir string) *BleveIndexer { + in := &inner_bleve.Indexer{ + IndexDir: indexDir, + Version: repoIndexerLatestVersion, } - created, err := indexer.init() - if err != nil { - indexer.Close() - return nil, false, err + return &BleveIndexer{ + Indexer: in, + inner: in, } - return indexer, created, err } func (b *BleveIndexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserError, batchReader *bufio.Reader, commitSha string, @@ -243,45 +207,26 @@ func (b *BleveIndexer) addDelete(filename string, repo *repo_model.Repository, b return batch.Delete(id) } -// init init the indexer -func (b *BleveIndexer) init() (bool, error) { - var err error - b.indexer, err = openBleveIndexer(b.indexDir, repoIndexerLatestVersion) +// Init initializes the indexer +func (b *BleveIndexer) Init() (bool, error) { + opened, err := b.Indexer.Init() if err != nil { return false, err } - if b.indexer != nil { - return false, nil + if opened { + return true, nil } - b.indexer, err = createBleveIndexer(b.indexDir, repoIndexerLatestVersion) + b.inner.Indexer, err = createBleveIndexer(b.inner.IndexDir, repoIndexerLatestVersion) if err != nil { return false, err } - - return true, nil -} - -// Close close the indexer -func (b *BleveIndexer) Close() { - log.Debug("Closing repo indexer") - if b.indexer != nil { - err := b.indexer.Close() - if err != nil { - log.Error("Error whilst closing the repository indexer: %v", err) - } - } - log.Info("PID: %d Repository Indexer closed", os.Getpid()) -} - -// Ping does nothing -func (b *BleveIndexer) Ping() bool { - return true + return false, nil } // Index indexes the data func (b *BleveIndexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *repoChanges) error { - batch := inner_bleve.NewFlushingBatch(b.indexer, maxBatchSize) + batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) if len(changes.Updates) > 0 { // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! @@ -312,11 +257,11 @@ func (b *BleveIndexer) Index(ctx context.Context, repo *repo_model.Repository, s func (b *BleveIndexer) Delete(repoID int64) error { query := numericEqualityQuery(repoID, "RepoID") searchRequest := bleve.NewSearchRequestOptions(query, 2147483647, 0, false) - result, err := b.indexer.Search(searchRequest) + result, err := b.inner.Indexer.Search(searchRequest) if err != nil { return err } - batch := inner_bleve.NewFlushingBatch(b.indexer, maxBatchSize) + batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) for _, hit := range result.Hits { if err = batch.Delete(hit.ID); err != nil { return err @@ -380,7 +325,7 @@ func (b *BleveIndexer) Search(ctx context.Context, repoIDs []int64, language, ke searchRequest.AddFacet("languages", bleve.NewFacetRequest("Language", 10)) } - result, err := b.indexer.SearchInContext(ctx, searchRequest) + result, err := b.inner.Indexer.SearchInContext(ctx, searchRequest) if err != nil { return 0, nil, nil, err } @@ -427,7 +372,7 @@ func (b *BleveIndexer) Search(ctx context.Context, repoIDs []int64, language, ke facetRequest.IncludeLocations = true facetRequest.AddFacet("languages", bleve.NewFacetRequest("Language", 10)) - if result, err = b.indexer.Search(facetRequest); err != nil { + if result, err = b.inner.Indexer.Search(facetRequest); err != nil { return 0, nil, nil, err } diff --git a/modules/indexer/code/bleve_test.go b/modules/indexer/code/bleve_test.go index 00bcd5c90c67d..8476da71f5b82 100644 --- a/modules/indexer/code/bleve_test.go +++ b/modules/indexer/code/bleve_test.go @@ -16,7 +16,8 @@ func TestBleveIndexAndSearch(t *testing.T) { dir := t.TempDir() - idx, _, err := NewBleveIndexer(dir) + idx := NewBleveIndexer(dir) + _, err := idx.Init() if err != nil { assert.Fail(t, "Unable to create bleve indexer Error: %v", err) if idx != nil { diff --git a/modules/indexer/code/elastic_search.go b/modules/indexer/code/elastic_search.go index 0e56a865880e7..61b7fb03467d5 100644 --- a/modules/indexer/code/elastic_search.go +++ b/modules/indexer/code/elastic_search.go @@ -6,13 +6,10 @@ package code import ( "bufio" "context" - "errors" "fmt" "io" - "net" "strconv" "strings" - "sync" "time" repo_model "code.gitea.io/gitea/models/repo" @@ -20,6 +17,8 @@ import ( "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/indexer/internal" + inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" "code.gitea.io/gitea/modules/json" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" @@ -42,15 +41,12 @@ var _ Indexer = &ElasticSearchIndexer{} // ElasticSearchIndexer implements Indexer interface type ElasticSearchIndexer struct { - client *elastic.Client - indexerAliasName string - available bool - stopTimer chan struct{} - lock sync.RWMutex + inner *inner_elasticsearch.Indexer + internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much } // NewElasticSearchIndexer creates a new elasticsearch indexer -func NewElasticSearchIndexer(url, indexerName string) (*ElasticSearchIndexer, bool, error) { +func NewElasticSearchIndexer(url, indexerName string) (*ElasticSearchIndexer, error) { opts := []elastic.ClientOptionFunc{ elastic.SetURL(url), elastic.SetSniff(false), @@ -66,35 +62,15 @@ func NewElasticSearchIndexer(url, indexerName string) (*ElasticSearchIndexer, bo client, err := elastic.NewClient(opts...) if err != nil { - return nil, false, err + return nil, err } + in := inner_elasticsearch.NewIndexer(client, indexerName) indexer := &ElasticSearchIndexer{ - client: client, - indexerAliasName: indexerName, - available: true, - stopTimer: make(chan struct{}), - } - - ticker := time.NewTicker(10 * time.Second) - go func() { - for { - select { - case <-ticker.C: - indexer.checkAvailability() - case <-indexer.stopTimer: - ticker.Stop() - return - } - } - }() - - exists, err := indexer.init() - if err != nil { - indexer.Close() - return nil, false, err + inner: in, + Indexer: in, } - return indexer, !exists, err + return indexer, nil } const ( @@ -128,68 +104,64 @@ const ( ) func (b *ElasticSearchIndexer) realIndexerName() string { - return fmt.Sprintf("%s.v%d", b.indexerAliasName, esRepoIndexerLatestVersion) + return fmt.Sprintf("%s.v%d", b.inner.IndexerName, esRepoIndexerLatestVersion) } // Init will initialize the indexer -func (b *ElasticSearchIndexer) init() (bool, error) { - ctx := graceful.GetManager().HammerContext() - exists, err := b.client.IndexExists(b.realIndexerName()).Do(ctx) +func (b *ElasticSearchIndexer) Init() (bool, error) { + opened, err := b.Indexer.Init() if err != nil { - return false, b.checkError(err) + return false, err + } + if opened { + return true, nil } - if !exists { - mapping := defaultMapping - createIndex, err := b.client.CreateIndex(b.realIndexerName()).BodyString(mapping).Do(ctx) - if err != nil { - return false, b.checkError(err) - } - if !createIndex.Acknowledged { - return false, fmt.Errorf("create index %s with %s failed", b.realIndexerName(), mapping) - } + ctx := graceful.GetManager().HammerContext() + mapping := defaultMapping + + createIndex, err := b.inner.Client.CreateIndex(b.realIndexerName()).BodyString(mapping).Do(ctx) + if err != nil { + return false, b.inner.CheckError(err) + } + if !createIndex.Acknowledged { + return false, fmt.Errorf("create index %s with %s failed", b.realIndexerName(), mapping) } // check version - r, err := b.client.Aliases().Do(ctx) + // FIXME: return value should be fixed + r, err := b.inner.Client.Aliases().Do(ctx) if err != nil { - return false, b.checkError(err) + return false, b.inner.CheckError(err) } - realIndexerNames := r.IndicesByAlias(b.indexerAliasName) + realIndexerNames := r.IndicesByAlias(b.inner.IndexerName) if len(realIndexerNames) < 1 { - res, err := b.client.Alias(). - Add(b.realIndexerName(), b.indexerAliasName). + res, err := b.inner.Client.Alias(). + Add(b.realIndexerName(), b.inner.IndexerName). Do(ctx) if err != nil { - return false, b.checkError(err) + return false, b.inner.CheckError(err) } if !res.Acknowledged { - return false, fmt.Errorf("create alias %s to index %s failed", b.indexerAliasName, b.realIndexerName()) + return false, fmt.Errorf("create alias %s to index %s failed", b.inner.IndexerName, b.realIndexerName()) } } else if len(realIndexerNames) >= 1 && realIndexerNames[0] < b.realIndexerName() { log.Warn("Found older gitea indexer named %s, but we will create a new one %s and keep the old NOT DELETED. You can delete the old version after the upgrade succeed.", realIndexerNames[0], b.realIndexerName()) - res, err := b.client.Alias(). - Remove(realIndexerNames[0], b.indexerAliasName). - Add(b.realIndexerName(), b.indexerAliasName). + res, err := b.inner.Client.Alias(). + Remove(realIndexerNames[0], b.inner.IndexerName). + Add(b.realIndexerName(), b.inner.IndexerName). Do(ctx) if err != nil { - return false, b.checkError(err) + return false, b.inner.CheckError(err) } if !res.Acknowledged { - return false, fmt.Errorf("change alias %s to index %s failed", b.indexerAliasName, b.realIndexerName()) + return false, fmt.Errorf("change alias %s to index %s failed", b.inner.IndexerName, b.realIndexerName()) } } - return exists, nil -} - -// Ping checks if elastic is available -func (b *ElasticSearchIndexer) Ping() bool { - b.lock.RLock() - defer b.lock.RUnlock() - return b.available + return true, nil } func (b *ElasticSearchIndexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserError, batchReader *bufio.Reader, sha string, update fileUpdate, repo *repo_model.Repository) ([]elastic.BulkableRequest, error) { @@ -239,7 +211,7 @@ func (b *ElasticSearchIndexer) addUpdate(ctx context.Context, batchWriter git.Wr return []elastic.BulkableRequest{ elastic.NewBulkIndexRequest(). - Index(b.indexerAliasName). + Index(b.inner.IndexerName). Id(id). Doc(map[string]interface{}{ "repo_id": repo.ID, @@ -254,7 +226,7 @@ func (b *ElasticSearchIndexer) addUpdate(ctx context.Context, batchWriter git.Wr func (b *ElasticSearchIndexer) addDelete(filename string, repo *repo_model.Repository) elastic.BulkableRequest { id := filenameIndexerID(repo.ID, filename) return elastic.NewBulkDeleteRequest(). - Index(b.indexerAliasName). + Index(b.inner.IndexerName). Id(id) } @@ -288,21 +260,21 @@ func (b *ElasticSearchIndexer) Index(ctx context.Context, repo *repo_model.Repos } if len(reqs) > 0 { - _, err := b.client.Bulk(). - Index(b.indexerAliasName). + _, err := b.inner.Client.Bulk(). + Index(b.inner.IndexerName). Add(reqs...). Do(ctx) - return b.checkError(err) + return b.inner.CheckError(err) } return nil } // Delete deletes indexes by ids func (b *ElasticSearchIndexer) Delete(repoID int64) error { - _, err := b.client.DeleteByQuery(b.indexerAliasName). + _, err := b.inner.Client.DeleteByQuery(b.inner.IndexerName). Query(elastic.NewTermsQuery("repo_id", repoID)). Do(graceful.GetManager().HammerContext()) - return b.checkError(err) + return b.inner.CheckError(err) } // indexPos find words positions for start and the following end on content. It will @@ -412,8 +384,8 @@ func (b *ElasticSearchIndexer) Search(ctx context.Context, repoIDs []int64, lang } if len(language) == 0 { - searchResult, err := b.client.Search(). - Index(b.indexerAliasName). + searchResult, err := b.inner.Client.Search(). + Index(b.inner.IndexerName). Aggregation("language", aggregation). Query(query). Highlight( @@ -426,26 +398,26 @@ func (b *ElasticSearchIndexer) Search(ctx context.Context, repoIDs []int64, lang From(start).Size(pageSize). Do(ctx) if err != nil { - return 0, nil, nil, b.checkError(err) + return 0, nil, nil, b.inner.CheckError(err) } return convertResult(searchResult, kw, pageSize) } langQuery := elastic.NewMatchQuery("language", language) - countResult, err := b.client.Search(). - Index(b.indexerAliasName). + countResult, err := b.inner.Client.Search(). + Index(b.inner.IndexerName). Aggregation("language", aggregation). Query(query). Size(0). // We only needs stats information Do(ctx) if err != nil { - return 0, nil, nil, b.checkError(err) + return 0, nil, nil, b.inner.CheckError(err) } query = query.Must(langQuery) - searchResult, err := b.client.Search(). - Index(b.indexerAliasName). + searchResult, err := b.inner.Client.Search(). + Index(b.inner.IndexerName). Query(query). Highlight( elastic.NewHighlight(). @@ -457,56 +429,10 @@ func (b *ElasticSearchIndexer) Search(ctx context.Context, repoIDs []int64, lang From(start).Size(pageSize). Do(ctx) if err != nil { - return 0, nil, nil, b.checkError(err) + return 0, nil, nil, b.inner.CheckError(err) } total, hits, _, err := convertResult(searchResult, kw, pageSize) return total, hits, extractAggs(countResult), err } - -// Close implements indexer -func (b *ElasticSearchIndexer) Close() { - select { - case <-b.stopTimer: - default: - close(b.stopTimer) - } -} - -func (b *ElasticSearchIndexer) checkError(err error) error { - var opErr *net.OpError - if !(elastic.IsConnErr(err) || (errors.As(err, &opErr) && (opErr.Op == "dial" || opErr.Op == "read"))) { - return err - } - - b.setAvailability(false) - - return err -} - -func (b *ElasticSearchIndexer) checkAvailability() { - if b.Ping() { - return - } - - // Request cluster state to check if elastic is available again - _, err := b.client.ClusterState().Do(graceful.GetManager().ShutdownContext()) - if err != nil { - b.setAvailability(false) - return - } - - b.setAvailability(true) -} - -func (b *ElasticSearchIndexer) setAvailability(available bool) { - b.lock.Lock() - defer b.lock.Unlock() - - if b.available == available { - return - } - - b.available = available -} diff --git a/modules/indexer/code/elastic_search_test.go b/modules/indexer/code/elastic_search_test.go index e7506eefa6807..46c1ededc66da 100644 --- a/modules/indexer/code/elastic_search_test.go +++ b/modules/indexer/code/elastic_search_test.go @@ -21,7 +21,7 @@ func TestESIndexAndSearch(t *testing.T) { return } - indexer, _, err := NewElasticSearchIndexer(u, "gitea_codes") + indexer, err := NewElasticSearchIndexer(u, "gitea_codes") if err != nil { assert.Fail(t, "Unable to create ES indexer Error: %v", err) if indexer != nil { @@ -29,6 +29,15 @@ func TestESIndexAndSearch(t *testing.T) { } return } + + if _, err := indexer.Init(); err != nil { + assert.Fail(t, "Unable to init ES indexer Error: %v", err) + if indexer != nil { + indexer.Close() + } + return + } + defer indexer.Close() testIndexer("elastic_search", t, indexer) diff --git a/modules/indexer/code/indexer.go b/modules/indexer/code/indexer.go index f38fd6000c705..ba171ad7c8a68 100644 --- a/modules/indexer/code/indexer.go +++ b/modules/indexer/code/indexer.go @@ -14,6 +14,7 @@ import ( "code.gitea.io/gitea/models/db" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/indexer/internal" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/process" "code.gitea.io/gitea/modules/queue" @@ -44,11 +45,10 @@ type SearchResultLanguages struct { // Indexer defines an interface to index and search code contents type Indexer interface { - Ping() bool + internal.Indexer Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *repoChanges) error Delete(repoID int64) error Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error) - Close() } func filenameIndexerID(repoID int64, filename string) string { @@ -214,7 +214,7 @@ func Init() { start := time.Now() var ( rIndexer Indexer - populate bool + existed bool err error ) switch setting.Indexer.RepoType { @@ -228,7 +228,8 @@ func Init() { } }() - rIndexer, populate, err = NewBleveIndexer(setting.Indexer.RepoPath) + rIndexer = NewBleveIndexer(setting.Indexer.RepoPath) + existed, err = rIndexer.Init() if err != nil { cancel() indexer.Close() @@ -245,13 +246,21 @@ func Init() { } }() - rIndexer, populate, err = NewElasticSearchIndexer(setting.Indexer.RepoConnStr, setting.Indexer.RepoIndexerName) + rIndexer, err = NewElasticSearchIndexer(setting.Indexer.RepoConnStr, setting.Indexer.RepoIndexerName) + if err != nil { + cancel() + indexer.Close() + close(waitChannel) + log.Fatal("PID: %d Unable to create the elasticsearch Repository Indexer connstr: %s Error: %v", os.Getpid(), setting.Indexer.RepoConnStr, err) + } + existed, err = rIndexer.Init() if err != nil { cancel() indexer.Close() close(waitChannel) log.Fatal("PID: %d Unable to initialize the elasticsearch Repository Indexer connstr: %s Error: %v", os.Getpid(), setting.Indexer.RepoConnStr, err) } + default: log.Fatal("PID: %d Unknown Indexer type: %s", os.Getpid(), setting.Indexer.RepoType) } @@ -261,7 +270,7 @@ func Init() { // Start processing the queue go graceful.GetManager().RunWithCancel(indexerQueue) - if populate { + if !existed { // populate the index because it's created for the first time go graceful.GetManager().RunWithShutdownContext(populateRepoIndexer) } select { diff --git a/modules/indexer/code/wrapped.go b/modules/indexer/code/wrapped.go index 7eed3e8557598..d2935a5f46d6a 100644 --- a/modules/indexer/code/wrapped.go +++ b/modules/indexer/code/wrapped.go @@ -56,6 +56,15 @@ func (w *wrappedIndexer) get() (Indexer, error) { return w.internal, nil } +func (w *wrappedIndexer) Init() (bool, error) { + indexer, err := w.get() + if err != nil { + log.Warn("Failed to get indexer: %v", err) + return false, err + } + return indexer.Init() +} + // Ping checks if elastic is available func (w *wrappedIndexer) Ping() bool { indexer, err := w.get() diff --git a/modules/indexer/issues/elastic_search.go b/modules/indexer/issues/elastic_search.go index 7f37eb8bc4fb1..dfbad0e16015c 100644 --- a/modules/indexer/issues/elastic_search.go +++ b/modules/indexer/issues/elastic_search.go @@ -103,6 +103,8 @@ func (b *ElasticSearchIndexer) Init() (bool, error) { return false, errors.New("init failed") } + // FIXME: index version? + return false, nil } From 3a2873af72324f21c9a0178eaed8370163c36882 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Thu, 8 Jun 2023 18:24:44 +0800 Subject: [PATCH 09/43] feat: new IndexerHolder --- modules/indexer/internal/holder.go | 34 +++++++++++++++++ modules/indexer/issues/indexer.go | 61 ++++++------------------------ 2 files changed, 46 insertions(+), 49 deletions(-) create mode 100644 modules/indexer/internal/holder.go diff --git a/modules/indexer/internal/holder.go b/modules/indexer/internal/holder.go new file mode 100644 index 0000000000000..a36737617cfe4 --- /dev/null +++ b/modules/indexer/internal/holder.go @@ -0,0 +1,34 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package internal + +import "sync" + +type IndexerHolder[T Indexer] struct { + indexer T + mutex sync.RWMutex + cond *sync.Cond +} + +func NewIndexerHolder[T Indexer](_ T) *IndexerHolder[T] { + h := &IndexerHolder[T]{} + h.cond = sync.NewCond(h.mutex.RLocker()) + return h +} + +func (h *IndexerHolder[T]) Set(indexer T) { + h.mutex.Lock() + defer h.mutex.Unlock() + h.indexer = indexer + h.cond.Broadcast() +} + +func (h *IndexerHolder[T]) Get() T { + h.mutex.RLock() + defer h.mutex.RUnlock() + if h.indexer == nil { + h.cond.Wait() + } + return h.indexer +} diff --git a/modules/indexer/issues/indexer.go b/modules/indexer/issues/indexer.go index 051c7eec556e7..ba94f46fd3188 100644 --- a/modules/indexer/issues/indexer.go +++ b/modules/indexer/issues/indexer.go @@ -8,7 +8,6 @@ import ( "fmt" "os" "runtime/pprof" - "sync" "time" "code.gitea.io/gitea/models/db" @@ -54,46 +53,10 @@ type Indexer interface { Search(ctx context.Context, kw string, repoIDs []int64, limit, start int) (*SearchResult, error) } -type indexerHolder struct { - indexer Indexer - mutex sync.RWMutex - cond *sync.Cond - cancelled bool -} - -func newIndexerHolder() *indexerHolder { - h := &indexerHolder{} - h.cond = sync.NewCond(h.mutex.RLocker()) - return h -} - -func (h *indexerHolder) cancel() { - h.mutex.Lock() - defer h.mutex.Unlock() - h.cancelled = true - h.cond.Broadcast() -} - -func (h *indexerHolder) set(indexer Indexer) { - h.mutex.Lock() - defer h.mutex.Unlock() - h.indexer = indexer - h.cond.Broadcast() -} - -func (h *indexerHolder) get() Indexer { - h.mutex.RLock() - defer h.mutex.RUnlock() - if h.indexer == nil && !h.cancelled { - h.cond.Wait() - } - return h.indexer -} - var ( // issueIndexerQueue queue of issue ids to be updated issueIndexerQueue *queue.WorkerPoolQueue[*IndexerData] - holder = newIndexerHolder() + holder = internal.NewIndexerHolder(Indexer(nil)) ) // InitIssueIndexer initialize issue indexer, syncReindex is true then reindex until @@ -107,7 +70,7 @@ func InitIssueIndexer(syncReindex bool) { switch setting.Indexer.IssueType { case "bleve", "elasticsearch", "meilisearch": handler := func(items ...*IndexerData) (unhandled []*IndexerData) { - indexer := holder.get() + indexer := holder.Get() if indexer == nil { log.Warn("Issue indexer handler: indexer is not ready, retry later.") return items @@ -161,21 +124,21 @@ func InitIssueIndexer(syncReindex bool) { log.Error("PANIC whilst initializing issue indexer: %v\nStacktrace: %s", err, log.Stack(2)) log.Error("The indexer files are likely corrupted and may need to be deleted") log.Error("You can completely remove the %q directory to make Gitea recreate the indexes", setting.Indexer.IssuePath) - holder.cancel() + holder.Set(nil) log.Fatal("PID: %d Unable to initialize the Bleve Issue Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.IssuePath, err) } }() issueIndexer := NewBleveIndexer(setting.Indexer.IssuePath) exist, err := issueIndexer.Init() if err != nil { - holder.cancel() + holder.Set(nil) log.Fatal("Unable to initialize Bleve Issue Indexer at path: %s Error: %v", setting.Indexer.IssuePath, err) } populate = !exist - holder.set(issueIndexer) + holder.Set(issueIndexer) graceful.GetManager().RunAtTerminate(func() { log.Debug("Closing issue indexer") - issueIndexer := holder.get() + issueIndexer := holder.Get() if issueIndexer != nil { issueIndexer.Close() } @@ -192,10 +155,10 @@ func InitIssueIndexer(syncReindex bool) { log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", setting.Indexer.IssueConnStr, err) } populate = !exist - holder.set(issueIndexer) + holder.Set(issueIndexer) case "db": issueIndexer := NewDBIndexer() - holder.set(issueIndexer) + holder.Set(issueIndexer) case "meilisearch": issueIndexer, err := NewMeilisearchIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueConnAuth, setting.Indexer.IssueIndexerName) if err != nil { @@ -206,9 +169,9 @@ func InitIssueIndexer(syncReindex bool) { log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", setting.Indexer.IssueConnStr, err) } populate = !exist - holder.set(issueIndexer) + holder.Set(issueIndexer) default: - holder.cancel() + holder.Set(nil) log.Fatal("Unknown issue indexer type: %s", setting.Indexer.IssueType) } @@ -357,7 +320,7 @@ func DeleteRepoIssueIndexer(ctx context.Context, repo *repo_model.Repository) { // WARNNING: You have to ensure user have permission to visit repoIDs' issues func SearchIssuesByKeyword(ctx context.Context, repoIDs []int64, keyword string) ([]int64, error) { var issueIDs []int64 - indexer := holder.get() + indexer := holder.Get() if indexer == nil { log.Error("SearchIssuesByKeyword(): unable to get indexer!") @@ -375,7 +338,7 @@ func SearchIssuesByKeyword(ctx context.Context, repoIDs []int64, keyword string) // IsAvailable checks if issue indexer is available func IsAvailable() bool { - indexer := holder.get() + indexer := holder.Get() if indexer == nil { log.Error("IsAvailable(): unable to get indexer!") return false From 5748343dc26d779e62a76d4909c8a7d213c298ba Mon Sep 17 00:00:00 2001 From: Jason Song Date: Thu, 8 Jun 2023 18:28:30 +0800 Subject: [PATCH 10/43] fix: remove generic --- modules/indexer/internal/holder.go | 12 ++++++------ modules/indexer/issues/indexer.go | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/modules/indexer/internal/holder.go b/modules/indexer/internal/holder.go index a36737617cfe4..e95c517016b47 100644 --- a/modules/indexer/internal/holder.go +++ b/modules/indexer/internal/holder.go @@ -5,26 +5,26 @@ package internal import "sync" -type IndexerHolder[T Indexer] struct { - indexer T +type IndexerHolder struct { + indexer Indexer mutex sync.RWMutex cond *sync.Cond } -func NewIndexerHolder[T Indexer](_ T) *IndexerHolder[T] { - h := &IndexerHolder[T]{} +func NewIndexerHolder() *IndexerHolder { + h := &IndexerHolder{} h.cond = sync.NewCond(h.mutex.RLocker()) return h } -func (h *IndexerHolder[T]) Set(indexer T) { +func (h *IndexerHolder) Set(indexer Indexer) { h.mutex.Lock() defer h.mutex.Unlock() h.indexer = indexer h.cond.Broadcast() } -func (h *IndexerHolder[T]) Get() T { +func (h *IndexerHolder) Get() Indexer { h.mutex.RLock() defer h.mutex.RUnlock() if h.indexer == nil { diff --git a/modules/indexer/issues/indexer.go b/modules/indexer/issues/indexer.go index ba94f46fd3188..2db044532d046 100644 --- a/modules/indexer/issues/indexer.go +++ b/modules/indexer/issues/indexer.go @@ -56,7 +56,7 @@ type Indexer interface { var ( // issueIndexerQueue queue of issue ids to be updated issueIndexerQueue *queue.WorkerPoolQueue[*IndexerData] - holder = internal.NewIndexerHolder(Indexer(nil)) + holder = internal.NewIndexerHolder() ) // InitIssueIndexer initialize issue indexer, syncReindex is true then reindex until @@ -70,7 +70,7 @@ func InitIssueIndexer(syncReindex bool) { switch setting.Indexer.IssueType { case "bleve", "elasticsearch", "meilisearch": handler := func(items ...*IndexerData) (unhandled []*IndexerData) { - indexer := holder.Get() + indexer := holder.Get().(Indexer) if indexer == nil { log.Warn("Issue indexer handler: indexer is not ready, retry later.") return items @@ -320,7 +320,7 @@ func DeleteRepoIssueIndexer(ctx context.Context, repo *repo_model.Repository) { // WARNNING: You have to ensure user have permission to visit repoIDs' issues func SearchIssuesByKeyword(ctx context.Context, repoIDs []int64, keyword string) ([]int64, error) { var issueIDs []int64 - indexer := holder.Get() + indexer := holder.Get().(Indexer) if indexer == nil { log.Error("SearchIssuesByKeyword(): unable to get indexer!") From e609a3640017069caa2ecd54f3ac2427239e096d Mon Sep 17 00:00:00 2001 From: Jason Song Date: Thu, 8 Jun 2023 18:38:07 +0800 Subject: [PATCH 11/43] feat: use holder in code indexer --- modules/indexer/code/git.go | 2 +- modules/indexer/code/indexer.go | 35 +++++----- modules/indexer/code/search.go | 2 +- modules/indexer/code/wrapped.go | 113 -------------------------------- 4 files changed, 21 insertions(+), 131 deletions(-) delete mode 100644 modules/indexer/code/wrapped.go diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go index bbcc6ba487190..0ba4b9f1e19a4 100644 --- a/modules/indexer/code/git.go +++ b/modules/indexer/code/git.go @@ -109,7 +109,7 @@ func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revisio // previous commit sha may have been removed by a force push, so // try rebuilding from scratch log.Warn("git diff: %v", runErr) - if err := indexer.Delete(repo.ID); err != nil { + if err := holder.Get().(Indexer).Delete(repo.ID); err != nil { return nil, err } return genesisChanges(ctx, repo, revision) diff --git a/modules/indexer/code/indexer.go b/modules/indexer/code/indexer.go index ba171ad7c8a68..b6588600988ea 100644 --- a/modules/indexer/code/indexer.go +++ b/modules/indexer/code/indexer.go @@ -81,7 +81,10 @@ type IndexerData struct { RepoID int64 } -var indexerQueue *queue.WorkerPoolQueue[*IndexerData] +var ( + indexerQueue *queue.WorkerPoolQueue[*IndexerData] + holder = internal.NewIndexerHolder() +) func index(ctx context.Context, indexer Indexer, repoID int64) error { repo, err := repo_model.GetRepositoryByID(ctx, repoID) @@ -139,7 +142,7 @@ func index(ctx context.Context, indexer Indexer, repoID int64) error { // Init initialize the repo indexer func Init() { if !setting.Indexer.RepoIndexerEnabled { - indexer.Close() + holder.Get().Close() return } @@ -153,7 +156,7 @@ func Init() { } cancel() log.Debug("Closing repository indexer") - indexer.Close() + holder.Get().Close() log.Info("PID: %d Repository Indexer closed", os.Getpid()) finished() }) @@ -164,8 +167,8 @@ func Init() { switch setting.Indexer.RepoType { case "bleve", "elasticsearch": handler := func(items ...*IndexerData) (unhandled []*IndexerData) { - idx, err := indexer.get() - if idx == nil || err != nil { + indexer := holder.Get().(Indexer) + if indexer == nil { log.Warn("Codes indexer handler: indexer is not ready, retry later.") return items } @@ -188,7 +191,7 @@ func Init() { code.gitea.io/gitea/modules/indexer/code.index(indexer.go:105) */ if err := index(ctx, indexer, indexerData.RepoID); err != nil { - if !idx.Ping() { + if !indexer.Ping() { log.Error("Code indexer handler: indexer is unavailable.") unhandled = append(unhandled, indexerData) continue @@ -232,7 +235,7 @@ func Init() { existed, err = rIndexer.Init() if err != nil { cancel() - indexer.Close() + holder.Get().Close() close(waitChannel) log.Fatal("PID: %d Unable to initialize the bleve Repository Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.RepoPath, err) } @@ -249,14 +252,14 @@ func Init() { rIndexer, err = NewElasticSearchIndexer(setting.Indexer.RepoConnStr, setting.Indexer.RepoIndexerName) if err != nil { cancel() - indexer.Close() + holder.Get().Close() close(waitChannel) log.Fatal("PID: %d Unable to create the elasticsearch Repository Indexer connstr: %s Error: %v", os.Getpid(), setting.Indexer.RepoConnStr, err) } existed, err = rIndexer.Init() if err != nil { cancel() - indexer.Close() + holder.Get().Close() close(waitChannel) log.Fatal("PID: %d Unable to initialize the elasticsearch Repository Indexer connstr: %s Error: %v", os.Getpid(), setting.Indexer.RepoConnStr, err) } @@ -265,7 +268,7 @@ func Init() { log.Fatal("PID: %d Unknown Indexer type: %s", os.Getpid(), setting.Indexer.RepoType) } - indexer.set(rIndexer) + holder.Set(rIndexer) // Start processing the queue go graceful.GetManager().RunWithCancel(indexerQueue) @@ -292,18 +295,18 @@ func Init() { case <-graceful.GetManager().IsShutdown(): log.Warn("Shutdown before Repository Indexer completed initialization") cancel() - indexer.Close() + holder.Get().Close() case duration, ok := <-waitChannel: if !ok { log.Warn("Repository Indexer Initialization failed") cancel() - indexer.Close() + holder.Get().Close() return } log.Info("Repository Indexer Initialization took %v", duration) case <-time.After(timeout): cancel() - indexer.Close() + holder.Get().Close() log.Fatal("Repository Indexer Initialization Timed-Out after: %v", timeout) } }() @@ -320,9 +323,9 @@ func UpdateRepoIndexer(repo *repo_model.Repository) { // IsAvailable checks if issue indexer is available func IsAvailable() bool { - idx, err := indexer.get() - if err != nil { - log.Error("IsAvailable(): unable to get indexer: %v", err) + idx := holder.Get().(Indexer) + if idx == nil { + log.Error("IsAvailable(): unable to get indexer") return false } diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go index 1de9ffc224b19..e69ab3115a6f6 100644 --- a/modules/indexer/code/search.go +++ b/modules/indexer/code/search.go @@ -114,7 +114,7 @@ func PerformSearch(ctx context.Context, repoIDs []int64, language, keyword strin return 0, nil, nil, nil } - total, results, resultLanguages, err := indexer.Search(ctx, repoIDs, language, keyword, page, pageSize, isMatch) + total, results, resultLanguages, err := holder.Get().(Indexer).Search(ctx, repoIDs, language, keyword, page, pageSize, isMatch) if err != nil { return 0, nil, nil, err } diff --git a/modules/indexer/code/wrapped.go b/modules/indexer/code/wrapped.go deleted file mode 100644 index d2935a5f46d6a..0000000000000 --- a/modules/indexer/code/wrapped.go +++ /dev/null @@ -1,113 +0,0 @@ -// Copyright 2019 The Gitea Authors. All rights reserved. -// SPDX-License-Identifier: MIT - -package code - -import ( - "context" - "fmt" - "sync" - - repo_model "code.gitea.io/gitea/models/repo" - "code.gitea.io/gitea/modules/log" -) - -var indexer = newWrappedIndexer() - -// ErrWrappedIndexerClosed is the error returned if the indexer was closed before it was ready -var ErrWrappedIndexerClosed = fmt.Errorf("Indexer closed before ready") - -type wrappedIndexer struct { - internal Indexer - lock sync.RWMutex - cond *sync.Cond - closed bool -} - -func newWrappedIndexer() *wrappedIndexer { - w := &wrappedIndexer{} - w.cond = sync.NewCond(w.lock.RLocker()) - return w -} - -func (w *wrappedIndexer) set(indexer Indexer) { - w.lock.Lock() - defer w.lock.Unlock() - if w.closed { - // Too late! - indexer.Close() - } - w.internal = indexer - w.cond.Broadcast() -} - -func (w *wrappedIndexer) get() (Indexer, error) { - w.lock.RLock() - defer w.lock.RUnlock() - if w.internal == nil { - if w.closed { - return nil, ErrWrappedIndexerClosed - } - w.cond.Wait() - if w.closed { - return nil, ErrWrappedIndexerClosed - } - } - return w.internal, nil -} - -func (w *wrappedIndexer) Init() (bool, error) { - indexer, err := w.get() - if err != nil { - log.Warn("Failed to get indexer: %v", err) - return false, err - } - return indexer.Init() -} - -// Ping checks if elastic is available -func (w *wrappedIndexer) Ping() bool { - indexer, err := w.get() - if err != nil { - log.Warn("Failed to get indexer: %v", err) - return false - } - return indexer.Ping() -} - -func (w *wrappedIndexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *repoChanges) error { - indexer, err := w.get() - if err != nil { - return err - } - return indexer.Index(ctx, repo, sha, changes) -} - -func (w *wrappedIndexer) Delete(repoID int64) error { - indexer, err := w.get() - if err != nil { - return err - } - return indexer.Delete(repoID) -} - -func (w *wrappedIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error) { - indexer, err := w.get() - if err != nil { - return 0, nil, nil, err - } - return indexer.Search(ctx, repoIDs, language, keyword, page, pageSize, isMatch) -} - -func (w *wrappedIndexer) Close() { - w.lock.Lock() - defer w.lock.Unlock() - if w.closed { - return - } - w.closed = true - w.cond.Broadcast() - if w.internal != nil { - w.internal.Close() - } -} From c43676e4aadf272dd51e1bbfafabcd707a747df1 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Thu, 8 Jun 2023 18:41:08 +0800 Subject: [PATCH 12/43] fix: be safe with nil indexer --- modules/indexer/internal/bleve/indexer.go | 11 +++++++++++ modules/indexer/internal/elasticsearch/indexer.go | 10 ++++++++++ 2 files changed, 21 insertions(+) diff --git a/modules/indexer/internal/bleve/indexer.go b/modules/indexer/internal/bleve/indexer.go index cb02d992a6426..9d065348d383e 100644 --- a/modules/indexer/internal/bleve/indexer.go +++ b/modules/indexer/internal/bleve/indexer.go @@ -4,6 +4,8 @@ package bleve import ( + "fmt" + "code.gitea.io/gitea/modules/indexer/internal" "code.gitea.io/gitea/modules/log" @@ -21,6 +23,9 @@ type Indexer struct { // Init initializes the indexer func (i *Indexer) Init() (bool, error) { + if i == nil { + return false, fmt.Errorf("cannot init nil indexer") + } var err error i.Indexer, err = openIndexer(i.IndexDir, i.Version) if err != nil { @@ -34,10 +39,16 @@ func (i *Indexer) Init() (bool, error) { // Ping checks if the indexer is available func (i *Indexer) Ping() bool { + if i == nil { + return false + } return i.Indexer != nil } func (i *Indexer) Close() { + if i == nil { + return + } if indexer := i.Indexer; indexer != nil { if err := indexer.Close(); err != nil { log.Error("Failed to close bleve indexer in %q: %v", i.IndexDir, err) diff --git a/modules/indexer/internal/elasticsearch/indexer.go b/modules/indexer/internal/elasticsearch/indexer.go index 0105ca6ef756d..6c45798ef4312 100644 --- a/modules/indexer/internal/elasticsearch/indexer.go +++ b/modules/indexer/internal/elasticsearch/indexer.go @@ -4,6 +4,7 @@ package elasticsearch import ( + "fmt" "sync" "time" @@ -49,6 +50,9 @@ func NewIndexer(client *elastic.Client, indexerName string) *Indexer { // Init initializes the indexer func (i *Indexer) Init() (bool, error) { + if i == nil { + return false, fmt.Errorf("cannot init nil indexer") + } ctx := graceful.GetManager().HammerContext() exists, err := i.Client.IndexExists(i.IndexerName).Do(ctx) if err != nil { @@ -59,6 +63,9 @@ func (i *Indexer) Init() (bool, error) { // Ping checks if the indexer is available func (i *Indexer) Ping() bool { + if i == nil { + return false + } i.lock.RLock() defer i.lock.RUnlock() return i.available @@ -66,6 +73,9 @@ func (i *Indexer) Ping() bool { // Close closes the indexer func (i *Indexer) Close() { + if i == nil { + return + } select { case <-i.stopTimer: default: From 72b1a39e6e7e60426e7fef6be6f6562de65b16a4 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Thu, 8 Jun 2023 19:05:36 +0800 Subject: [PATCH 13/43] fix: Get never return nil --- modules/indexer/internal/holder.go | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/modules/indexer/internal/holder.go b/modules/indexer/internal/holder.go index e95c517016b47..3ff529eaa24b4 100644 --- a/modules/indexer/internal/holder.go +++ b/modules/indexer/internal/holder.go @@ -3,7 +3,9 @@ package internal -import "sync" +import ( + "sync" +) type IndexerHolder struct { indexer Indexer @@ -24,10 +26,12 @@ func (h *IndexerHolder) Set(indexer Indexer) { h.cond.Broadcast() } +// Get returns the indexer, blocking until it is set +// It never returns nil func (h *IndexerHolder) Get() Indexer { h.mutex.RLock() defer h.mutex.RUnlock() - if h.indexer == nil { + for h.indexer == nil { // make sure it never return nil even called Set(nil) h.cond.Wait() } return h.indexer From 34d136dd2b8370ea978923ce6351c955cffc619a Mon Sep 17 00:00:00 2001 From: Jason Song Date: Fri, 9 Jun 2023 15:51:41 +0800 Subject: [PATCH 14/43] chore: meilisearch --- modules/indexer/internal/meilisearch/indexer.go | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 modules/indexer/internal/meilisearch/indexer.go diff --git a/modules/indexer/internal/meilisearch/indexer.go b/modules/indexer/internal/meilisearch/indexer.go new file mode 100644 index 0000000000000..ccc0dfe6c8caf --- /dev/null +++ b/modules/indexer/internal/meilisearch/indexer.go @@ -0,0 +1,4 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package meilisearch From 94a6620f5df6c0915871cc92157defb4f70fb43d Mon Sep 17 00:00:00 2001 From: Jason Song Date: Fri, 9 Jun 2023 15:54:10 +0800 Subject: [PATCH 15/43] fix: spell of Elasticsearch --- .../{elastic_search.go => elasticsearch.go} | 26 +++++++++---------- ...c_search_test.go => elasticsearch_test.go} | 2 +- modules/indexer/code/indexer.go | 2 +- .../{elastic_search.go => elasticsearch.go} | 20 +++++++------- modules/indexer/issues/indexer.go | 2 +- 5 files changed, 26 insertions(+), 26 deletions(-) rename modules/indexer/code/{elastic_search.go => elasticsearch.go} (94%) rename modules/indexer/code/{elastic_search_test.go => elasticsearch_test.go} (93%) rename modules/indexer/issues/{elastic_search.go => elasticsearch.go} (90%) diff --git a/modules/indexer/code/elastic_search.go b/modules/indexer/code/elasticsearch.go similarity index 94% rename from modules/indexer/code/elastic_search.go rename to modules/indexer/code/elasticsearch.go index 61b7fb03467d5..b3154a10c790b 100644 --- a/modules/indexer/code/elastic_search.go +++ b/modules/indexer/code/elasticsearch.go @@ -37,16 +37,16 @@ const ( esMultiMatchTypePhrasePrefix = "phrase_prefix" ) -var _ Indexer = &ElasticSearchIndexer{} +var _ Indexer = &ElasticsearchIndexer{} -// ElasticSearchIndexer implements Indexer interface -type ElasticSearchIndexer struct { +// ElasticsearchIndexer implements Indexer interface +type ElasticsearchIndexer struct { inner *inner_elasticsearch.Indexer internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much } -// NewElasticSearchIndexer creates a new elasticsearch indexer -func NewElasticSearchIndexer(url, indexerName string) (*ElasticSearchIndexer, error) { +// NewElasticsearchIndexer creates a new elasticsearch indexer +func NewElasticsearchIndexer(url, indexerName string) (*ElasticsearchIndexer, error) { opts := []elastic.ClientOptionFunc{ elastic.SetURL(url), elastic.SetSniff(false), @@ -66,7 +66,7 @@ func NewElasticSearchIndexer(url, indexerName string) (*ElasticSearchIndexer, er } in := inner_elasticsearch.NewIndexer(client, indexerName) - indexer := &ElasticSearchIndexer{ + indexer := &ElasticsearchIndexer{ inner: in, Indexer: in, } @@ -103,12 +103,12 @@ const ( }` ) -func (b *ElasticSearchIndexer) realIndexerName() string { +func (b *ElasticsearchIndexer) realIndexerName() string { return fmt.Sprintf("%s.v%d", b.inner.IndexerName, esRepoIndexerLatestVersion) } // Init will initialize the indexer -func (b *ElasticSearchIndexer) Init() (bool, error) { +func (b *ElasticsearchIndexer) Init() (bool, error) { opened, err := b.Indexer.Init() if err != nil { return false, err @@ -164,7 +164,7 @@ func (b *ElasticSearchIndexer) Init() (bool, error) { return true, nil } -func (b *ElasticSearchIndexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserError, batchReader *bufio.Reader, sha string, update fileUpdate, repo *repo_model.Repository) ([]elastic.BulkableRequest, error) { +func (b *ElasticsearchIndexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserError, batchReader *bufio.Reader, sha string, update fileUpdate, repo *repo_model.Repository) ([]elastic.BulkableRequest, error) { // Ignore vendored files in code search if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { return nil, nil @@ -223,7 +223,7 @@ func (b *ElasticSearchIndexer) addUpdate(ctx context.Context, batchWriter git.Wr }, nil } -func (b *ElasticSearchIndexer) addDelete(filename string, repo *repo_model.Repository) elastic.BulkableRequest { +func (b *ElasticsearchIndexer) addDelete(filename string, repo *repo_model.Repository) elastic.BulkableRequest { id := filenameIndexerID(repo.ID, filename) return elastic.NewBulkDeleteRequest(). Index(b.inner.IndexerName). @@ -231,7 +231,7 @@ func (b *ElasticSearchIndexer) addDelete(filename string, repo *repo_model.Repos } // Index will save the index data -func (b *ElasticSearchIndexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *repoChanges) error { +func (b *ElasticsearchIndexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *repoChanges) error { reqs := make([]elastic.BulkableRequest, 0) if len(changes.Updates) > 0 { // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! @@ -270,7 +270,7 @@ func (b *ElasticSearchIndexer) Index(ctx context.Context, repo *repo_model.Repos } // Delete deletes indexes by ids -func (b *ElasticSearchIndexer) Delete(repoID int64) error { +func (b *ElasticsearchIndexer) Delete(repoID int64) error { _, err := b.inner.Client.DeleteByQuery(b.inner.IndexerName). Query(elastic.NewTermsQuery("repo_id", repoID)). Do(graceful.GetManager().HammerContext()) @@ -355,7 +355,7 @@ func extractAggs(searchResult *elastic.SearchResult) []*SearchResultLanguages { } // Search searches for codes and language stats by given conditions. -func (b *ElasticSearchIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error) { +func (b *ElasticsearchIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error) { searchType := esMultiMatchTypeBestFields if isMatch { searchType = esMultiMatchTypePhrasePrefix diff --git a/modules/indexer/code/elastic_search_test.go b/modules/indexer/code/elasticsearch_test.go similarity index 93% rename from modules/indexer/code/elastic_search_test.go rename to modules/indexer/code/elasticsearch_test.go index 46c1ededc66da..9d1812538a139 100644 --- a/modules/indexer/code/elastic_search_test.go +++ b/modules/indexer/code/elasticsearch_test.go @@ -21,7 +21,7 @@ func TestESIndexAndSearch(t *testing.T) { return } - indexer, err := NewElasticSearchIndexer(u, "gitea_codes") + indexer, err := NewElasticsearchIndexer(u, "gitea_codes") if err != nil { assert.Fail(t, "Unable to create ES indexer Error: %v", err) if indexer != nil { diff --git a/modules/indexer/code/indexer.go b/modules/indexer/code/indexer.go index b6588600988ea..57e06d6de4065 100644 --- a/modules/indexer/code/indexer.go +++ b/modules/indexer/code/indexer.go @@ -249,7 +249,7 @@ func Init() { } }() - rIndexer, err = NewElasticSearchIndexer(setting.Indexer.RepoConnStr, setting.Indexer.RepoIndexerName) + rIndexer, err = NewElasticsearchIndexer(setting.Indexer.RepoConnStr, setting.Indexer.RepoIndexerName) if err != nil { cancel() holder.Get().Close() diff --git a/modules/indexer/issues/elastic_search.go b/modules/indexer/issues/elasticsearch.go similarity index 90% rename from modules/indexer/issues/elastic_search.go rename to modules/indexer/issues/elasticsearch.go index dfbad0e16015c..b0fe44ea91534 100644 --- a/modules/indexer/issues/elastic_search.go +++ b/modules/indexer/issues/elasticsearch.go @@ -18,16 +18,16 @@ import ( "github.com/olivere/elastic/v7" ) -var _ Indexer = &ElasticSearchIndexer{} +var _ Indexer = &ElasticsearchIndexer{} -// ElasticSearchIndexer implements Indexer interface -type ElasticSearchIndexer struct { +// ElasticsearchIndexer implements Indexer interface +type ElasticsearchIndexer struct { inner *inner_elasticsearch.Indexer internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much } -// NewElasticSearchIndexer creates a new elasticsearch indexer -func NewElasticSearchIndexer(url, indexerName string) (*ElasticSearchIndexer, error) { +// NewElasticsearchIndexer creates a new elasticsearch indexer +func NewElasticsearchIndexer(url, indexerName string) (*ElasticsearchIndexer, error) { opts := []elastic.ClientOptionFunc{ elastic.SetURL(url), elastic.SetSniff(false), @@ -46,7 +46,7 @@ func NewElasticSearchIndexer(url, indexerName string) (*ElasticSearchIndexer, er } in := inner_elasticsearch.NewIndexer(client, indexerName) - indexer := &ElasticSearchIndexer{ + indexer := &ElasticsearchIndexer{ inner: in, Indexer: in, } @@ -83,7 +83,7 @@ const ( ) // Init will initialize the indexer -func (b *ElasticSearchIndexer) Init() (bool, error) { +func (b *ElasticsearchIndexer) Init() (bool, error) { opened, err := b.Indexer.Init() if err != nil { return false, err @@ -109,7 +109,7 @@ func (b *ElasticSearchIndexer) Init() (bool, error) { } // Index will save the index data -func (b *ElasticSearchIndexer) Index(issues []*IndexerData) error { +func (b *ElasticsearchIndexer) Index(issues []*IndexerData) error { if len(issues) == 0 { return nil } else if len(issues) == 1 { @@ -152,7 +152,7 @@ func (b *ElasticSearchIndexer) Index(issues []*IndexerData) error { } // Delete deletes indexes by ids -func (b *ElasticSearchIndexer) Delete(ids ...int64) error { +func (b *ElasticsearchIndexer) Delete(ids ...int64) error { if len(ids) == 0 { return nil } else if len(ids) == 1 { @@ -181,7 +181,7 @@ func (b *ElasticSearchIndexer) Delete(ids ...int64) error { // Search searches for issues by given conditions. // Returns the matching issue IDs -func (b *ElasticSearchIndexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int) (*SearchResult, error) { +func (b *ElasticsearchIndexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int) (*SearchResult, error) { kwQuery := elastic.NewMultiMatchQuery(keyword, "title", "content", "comments") query := elastic.NewBoolQuery() query = query.Must(kwQuery) diff --git a/modules/indexer/issues/indexer.go b/modules/indexer/issues/indexer.go index 2db044532d046..a4d389441f56a 100644 --- a/modules/indexer/issues/indexer.go +++ b/modules/indexer/issues/indexer.go @@ -146,7 +146,7 @@ func InitIssueIndexer(syncReindex bool) { }) log.Debug("Created Bleve Indexer") case "elasticsearch": - issueIndexer, err := NewElasticSearchIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueIndexerName) + issueIndexer, err := NewElasticsearchIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueIndexerName) if err != nil { log.Fatal("Unable to initialize Elastic Search Issue Indexer at connection: %s Error: %v", setting.Indexer.IssueConnStr, err) } From 7e033b98230370a99993a2eee316864a6f8a3416 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Fri, 9 Jun 2023 15:57:01 +0800 Subject: [PATCH 16/43] chore: remove useless code for DB --- modules/indexer/issues/db.go | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/modules/indexer/issues/db.go b/modules/indexer/issues/db.go index b5a0aba3b1888..9800e132ba02d 100644 --- a/modules/indexer/issues/db.go +++ b/modules/indexer/issues/db.go @@ -6,12 +6,13 @@ package issues import ( "context" - "code.gitea.io/gitea/models/db" issues_model "code.gitea.io/gitea/models/issues" "code.gitea.io/gitea/modules/indexer/internal" inner_db "code.gitea.io/gitea/modules/indexer/internal/db" ) +var _ Indexer = &DBIndexer{} + // DBIndexer implements Indexer interface to use database's like search type DBIndexer struct { internal.Indexer @@ -23,16 +24,6 @@ func NewDBIndexer() *DBIndexer { } } -// Init dummy function -func (i *DBIndexer) Init() (bool, error) { - return false, nil -} - -// Ping checks if database is available -func (i *DBIndexer) Ping() bool { - return db.GetEngine(db.DefaultContext).Ping() != nil -} - // Index dummy function func (i *DBIndexer) Index(issue []*IndexerData) error { return nil @@ -43,11 +34,7 @@ func (i *DBIndexer) Delete(ids ...int64) error { return nil } -// Close dummy function -func (i *DBIndexer) Close() { -} - -// Search dummy function +// Search searches for issues func (i *DBIndexer) Search(ctx context.Context, kw string, repoIDs []int64, limit, start int) (*SearchResult, error) { total, ids, err := issues_model.SearchIssueIDsByKeyword(ctx, kw, repoIDs, limit, start) if err != nil { From f7aff401726a254b4982c2b2ffea03d81630bd69 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Fri, 9 Jun 2023 16:17:00 +0800 Subject: [PATCH 17/43] feat: bleve mapping --- modules/indexer/code/bleve.go | 43 ++++------------------- modules/indexer/internal/bleve/indexer.go | 41 ++++++++++++++++++--- modules/indexer/issues/bleve.go | 40 ++++----------------- 3 files changed, 48 insertions(+), 76 deletions(-) diff --git a/modules/indexer/code/bleve.go b/modules/indexer/code/bleve.go index ef8110a23e0ac..02e06a1ef40d7 100644 --- a/modules/indexer/code/bleve.go +++ b/modules/indexer/code/bleve.go @@ -32,7 +32,6 @@ import ( "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" "github.com/blevesearch/bleve/v2/mapping" "github.com/blevesearch/bleve/v2/search/query" - "github.com/ethantkoenig/rupture" "github.com/go-enry/go-enry/v2" ) @@ -77,8 +76,8 @@ const ( repoIndexerLatestVersion = 6 ) -// createBleveIndexer create a bleve repo indexer if one does not already exist -func createBleveIndexer(path string, latestVersion int) (bleve.Index, error) { +// generateBleveIndexMapping generates a bleve index mapping for the repo indexer +func generateBleveIndexMapping() (mapping.IndexMapping, error) { docMapping := bleve.NewDocumentMapping() numericFieldMapping := bleve.NewNumericFieldMapping() numericFieldMapping.IncludeInAll = false @@ -113,17 +112,7 @@ func createBleveIndexer(path string, latestVersion int) (bleve.Index, error) { mapping.AddDocumentMapping(repoIndexerDocType, docMapping) mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping()) - indexer, err := bleve.New(path, mapping) - if err != nil { - return nil, err - } - - if err = rupture.WriteIndexMetadata(path, &rupture.IndexMetadata{ - Version: latestVersion, - }); err != nil { - return nil, err - } - return indexer, nil + return mapping, nil } var _ Indexer = &BleveIndexer{} @@ -136,13 +125,10 @@ type BleveIndexer struct { // NewBleveIndexer creates a new bleve local indexer func NewBleveIndexer(indexDir string) *BleveIndexer { - in := &inner_bleve.Indexer{ - IndexDir: indexDir, - Version: repoIndexerLatestVersion, - } + inner := inner_bleve.NewIndexer(indexDir, repoIndexerLatestVersion, generateBleveIndexMapping) return &BleveIndexer{ - Indexer: in, - inner: in, + Indexer: inner, + inner: inner, } } @@ -207,23 +193,6 @@ func (b *BleveIndexer) addDelete(filename string, repo *repo_model.Repository, b return batch.Delete(id) } -// Init initializes the indexer -func (b *BleveIndexer) Init() (bool, error) { - opened, err := b.Indexer.Init() - if err != nil { - return false, err - } - if opened { - return true, nil - } - - b.inner.Indexer, err = createBleveIndexer(b.inner.IndexDir, repoIndexerLatestVersion) - if err != nil { - return false, err - } - return false, nil -} - // Index indexes the data func (b *BleveIndexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *repoChanges) error { batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) diff --git a/modules/indexer/internal/bleve/indexer.go b/modules/indexer/internal/bleve/indexer.go index 9d065348d383e..44f8c48804d50 100644 --- a/modules/indexer/internal/bleve/indexer.go +++ b/modules/indexer/internal/bleve/indexer.go @@ -10,15 +10,29 @@ import ( "code.gitea.io/gitea/modules/log" "github.com/blevesearch/bleve/v2" + "github.com/blevesearch/bleve/v2/mapping" + "github.com/ethantkoenig/rupture" ) var _ internal.Indexer = &Indexer{} // Indexer represents a basic bleve indexer implementation type Indexer struct { - IndexDir string - Indexer bleve.Index - Version int + Indexer bleve.Index + + indexDir string + version int + mappingGetter MappingGetter +} + +type MappingGetter func() (mapping.IndexMapping, error) + +func NewIndexer(indexDir string, version int, mappingGetter func() (mapping.IndexMapping, error)) *Indexer { + return &Indexer{ + indexDir: indexDir, + version: version, + mappingGetter: mappingGetter, + } } // Init initializes the indexer @@ -27,13 +41,30 @@ func (i *Indexer) Init() (bool, error) { return false, fmt.Errorf("cannot init nil indexer") } var err error - i.Indexer, err = openIndexer(i.IndexDir, i.Version) + i.Indexer, err = openIndexer(i.indexDir, i.version) if err != nil { return false, err } if i.Indexer != nil { return true, nil } + + indexMapping, err := i.mappingGetter() + if err != nil { + return false, err + } + + i.Indexer, err = bleve.New(i.indexDir, indexMapping) + if err != nil { + return false, err + } + + if err = rupture.WriteIndexMetadata(i.indexDir, &rupture.IndexMetadata{ + Version: i.version, + }); err != nil { + return false, err + } + return false, nil } @@ -51,7 +82,7 @@ func (i *Indexer) Close() { } if indexer := i.Indexer; indexer != nil { if err := indexer.Close(); err != nil { - log.Error("Failed to close bleve indexer in %q: %v", i.IndexDir, err) + log.Error("Failed to close bleve indexer in %q: %v", i.indexDir, err) } } } diff --git a/modules/indexer/issues/bleve.go b/modules/indexer/issues/bleve.go index 7599a6eb8eb62..729bbcf8784e3 100644 --- a/modules/indexer/issues/bleve.go +++ b/modules/indexer/issues/bleve.go @@ -19,7 +19,6 @@ import ( "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode" "github.com/blevesearch/bleve/v2/mapping" "github.com/blevesearch/bleve/v2/search/query" - "github.com/ethantkoenig/rupture" ) const ( @@ -77,8 +76,8 @@ func (i *BleveIndexerData) Type() string { return issueIndexerDocType } -// createIssueIndexer create an issue indexer if one does not already exist -func createIssueIndexer(path string, latestVersion int) (bleve.Index, error) { +// generateIssueIndexMapping generates the bleve index mapping for issues +func generateIssueIndexMapping() (mapping.IndexMapping, error) { mapping := bleve.NewIndexMapping() docMapping := bleve.NewDocumentMapping() @@ -108,17 +107,7 @@ func createIssueIndexer(path string, latestVersion int) (bleve.Index, error) { mapping.AddDocumentMapping(issueIndexerDocType, docMapping) mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping()) - index, err := bleve.New(path, mapping) - if err != nil { - return nil, err - } - - if err = rupture.WriteIndexMetadata(path, &rupture.IndexMetadata{ - Version: latestVersion, - }); err != nil { - return nil, err - } - return index, nil + return mapping, nil } var _ Indexer = &BleveIndexer{} @@ -131,30 +120,13 @@ type BleveIndexer struct { // NewBleveIndexer creates a new bleve local indexer func NewBleveIndexer(indexDir string) *BleveIndexer { - in := &inner_bleve.Indexer{ - IndexDir: indexDir, - Version: issueIndexerLatestVersion, - } + inner := inner_bleve.NewIndexer(indexDir, issueIndexerLatestVersion, generateIssueIndexMapping) return &BleveIndexer{ - Indexer: in, - inner: in, + Indexer: inner, + inner: inner, } } -// Init will initialize the indexer -func (b *BleveIndexer) Init() (bool, error) { - opened, err := b.Indexer.Init() - if err != nil { - return false, err - } - if opened { - return true, nil - } - - b.inner.Indexer, err = createIssueIndexer(b.inner.IndexDir, issueIndexerLatestVersion) - return false, err -} - // Index will save the index data func (b *BleveIndexer) Index(issues []*IndexerData) error { batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) From b6f20d3e79734068527dd144bce7ecbc08d4c6c6 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Fri, 9 Jun 2023 16:55:41 +0800 Subject: [PATCH 18/43] fix: elasticsearch with version --- modules/indexer/code/elasticsearch.go | 100 ++---------------- modules/indexer/code/elasticsearch_test.go | 10 +- modules/indexer/code/indexer.go | 2 +- .../indexer/internal/elasticsearch/indexer.go | 60 ++++++----- .../indexer/internal/elasticsearch/util.go | 94 +++++++++++++++- modules/indexer/issues/elasticsearch.go | 66 ++---------- modules/indexer/issues/indexer.go | 5 +- 7 files changed, 149 insertions(+), 188 deletions(-) diff --git a/modules/indexer/code/elasticsearch.go b/modules/indexer/code/elasticsearch.go index b3154a10c790b..0c9fc32bc8698 100644 --- a/modules/indexer/code/elasticsearch.go +++ b/modules/indexer/code/elasticsearch.go @@ -10,7 +10,6 @@ import ( "io" "strconv" "strings" - "time" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/analyze" @@ -46,31 +45,13 @@ type ElasticsearchIndexer struct { } // NewElasticsearchIndexer creates a new elasticsearch indexer -func NewElasticsearchIndexer(url, indexerName string) (*ElasticsearchIndexer, error) { - opts := []elastic.ClientOptionFunc{ - elastic.SetURL(url), - elastic.SetSniff(false), - elastic.SetHealthcheckInterval(10 * time.Second), - elastic.SetGzip(false), - } - - logger := log.GetLogger(log.DEFAULT) - - opts = append(opts, elastic.SetTraceLog(&log.PrintfLogger{Logf: logger.Trace})) - opts = append(opts, elastic.SetInfoLog(&log.PrintfLogger{Logf: logger.Info})) - opts = append(opts, elastic.SetErrorLog(&log.PrintfLogger{Logf: logger.Error})) - - client, err := elastic.NewClient(opts...) - if err != nil { - return nil, err - } - - in := inner_elasticsearch.NewIndexer(client, indexerName) +func NewElasticsearchIndexer(url, indexerName string) *ElasticsearchIndexer { + in := inner_elasticsearch.NewIndexer(url, indexerName, repoIndexerLatestVersion, defaultMapping) indexer := &ElasticsearchIndexer{ inner: in, Indexer: in, } - return indexer, nil + return indexer } const ( @@ -103,67 +84,6 @@ const ( }` ) -func (b *ElasticsearchIndexer) realIndexerName() string { - return fmt.Sprintf("%s.v%d", b.inner.IndexerName, esRepoIndexerLatestVersion) -} - -// Init will initialize the indexer -func (b *ElasticsearchIndexer) Init() (bool, error) { - opened, err := b.Indexer.Init() - if err != nil { - return false, err - } - if opened { - return true, nil - } - - ctx := graceful.GetManager().HammerContext() - mapping := defaultMapping - - createIndex, err := b.inner.Client.CreateIndex(b.realIndexerName()).BodyString(mapping).Do(ctx) - if err != nil { - return false, b.inner.CheckError(err) - } - if !createIndex.Acknowledged { - return false, fmt.Errorf("create index %s with %s failed", b.realIndexerName(), mapping) - } - - // check version - // FIXME: return value should be fixed - r, err := b.inner.Client.Aliases().Do(ctx) - if err != nil { - return false, b.inner.CheckError(err) - } - - realIndexerNames := r.IndicesByAlias(b.inner.IndexerName) - if len(realIndexerNames) < 1 { - res, err := b.inner.Client.Alias(). - Add(b.realIndexerName(), b.inner.IndexerName). - Do(ctx) - if err != nil { - return false, b.inner.CheckError(err) - } - if !res.Acknowledged { - return false, fmt.Errorf("create alias %s to index %s failed", b.inner.IndexerName, b.realIndexerName()) - } - } else if len(realIndexerNames) >= 1 && realIndexerNames[0] < b.realIndexerName() { - log.Warn("Found older gitea indexer named %s, but we will create a new one %s and keep the old NOT DELETED. You can delete the old version after the upgrade succeed.", - realIndexerNames[0], b.realIndexerName()) - res, err := b.inner.Client.Alias(). - Remove(realIndexerNames[0], b.inner.IndexerName). - Add(b.realIndexerName(), b.inner.IndexerName). - Do(ctx) - if err != nil { - return false, b.inner.CheckError(err) - } - if !res.Acknowledged { - return false, fmt.Errorf("change alias %s to index %s failed", b.inner.IndexerName, b.realIndexerName()) - } - } - - return true, nil -} - func (b *ElasticsearchIndexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserError, batchReader *bufio.Reader, sha string, update fileUpdate, repo *repo_model.Repository) ([]elastic.BulkableRequest, error) { // Ignore vendored files in code search if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { @@ -211,7 +131,7 @@ func (b *ElasticsearchIndexer) addUpdate(ctx context.Context, batchWriter git.Wr return []elastic.BulkableRequest{ elastic.NewBulkIndexRequest(). - Index(b.inner.IndexerName). + Index(b.inner.IndexName()). Id(id). Doc(map[string]interface{}{ "repo_id": repo.ID, @@ -226,7 +146,7 @@ func (b *ElasticsearchIndexer) addUpdate(ctx context.Context, batchWriter git.Wr func (b *ElasticsearchIndexer) addDelete(filename string, repo *repo_model.Repository) elastic.BulkableRequest { id := filenameIndexerID(repo.ID, filename) return elastic.NewBulkDeleteRequest(). - Index(b.inner.IndexerName). + Index(b.inner.IndexName()). Id(id) } @@ -261,7 +181,7 @@ func (b *ElasticsearchIndexer) Index(ctx context.Context, repo *repo_model.Repos if len(reqs) > 0 { _, err := b.inner.Client.Bulk(). - Index(b.inner.IndexerName). + Index(b.inner.IndexName()). Add(reqs...). Do(ctx) return b.inner.CheckError(err) @@ -271,7 +191,7 @@ func (b *ElasticsearchIndexer) Index(ctx context.Context, repo *repo_model.Repos // Delete deletes indexes by ids func (b *ElasticsearchIndexer) Delete(repoID int64) error { - _, err := b.inner.Client.DeleteByQuery(b.inner.IndexerName). + _, err := b.inner.Client.DeleteByQuery(b.inner.IndexName()). Query(elastic.NewTermsQuery("repo_id", repoID)). Do(graceful.GetManager().HammerContext()) return b.inner.CheckError(err) @@ -385,7 +305,7 @@ func (b *ElasticsearchIndexer) Search(ctx context.Context, repoIDs []int64, lang if len(language) == 0 { searchResult, err := b.inner.Client.Search(). - Index(b.inner.IndexerName). + Index(b.inner.IndexName()). Aggregation("language", aggregation). Query(query). Highlight( @@ -406,7 +326,7 @@ func (b *ElasticsearchIndexer) Search(ctx context.Context, repoIDs []int64, lang langQuery := elastic.NewMatchQuery("language", language) countResult, err := b.inner.Client.Search(). - Index(b.inner.IndexerName). + Index(b.inner.IndexName()). Aggregation("language", aggregation). Query(query). Size(0). // We only needs stats information @@ -417,7 +337,7 @@ func (b *ElasticsearchIndexer) Search(ctx context.Context, repoIDs []int64, lang query = query.Must(langQuery) searchResult, err := b.inner.Client.Search(). - Index(b.inner.IndexerName). + Index(b.inner.IndexName()). Query(query). Highlight( elastic.NewHighlight(). diff --git a/modules/indexer/code/elasticsearch_test.go b/modules/indexer/code/elasticsearch_test.go index 9d1812538a139..467f8582a5bf7 100644 --- a/modules/indexer/code/elasticsearch_test.go +++ b/modules/indexer/code/elasticsearch_test.go @@ -21,15 +21,7 @@ func TestESIndexAndSearch(t *testing.T) { return } - indexer, err := NewElasticsearchIndexer(u, "gitea_codes") - if err != nil { - assert.Fail(t, "Unable to create ES indexer Error: %v", err) - if indexer != nil { - indexer.Close() - } - return - } - + indexer := NewElasticsearchIndexer(u, "gitea_codes") if _, err := indexer.Init(); err != nil { assert.Fail(t, "Unable to init ES indexer Error: %v", err) if indexer != nil { diff --git a/modules/indexer/code/indexer.go b/modules/indexer/code/indexer.go index 57e06d6de4065..9f6199b75716a 100644 --- a/modules/indexer/code/indexer.go +++ b/modules/indexer/code/indexer.go @@ -249,7 +249,7 @@ func Init() { } }() - rIndexer, err = NewElasticsearchIndexer(setting.Indexer.RepoConnStr, setting.Indexer.RepoIndexerName) + rIndexer = NewElasticsearchIndexer(setting.Indexer.RepoConnStr, setting.Indexer.RepoIndexerName) if err != nil { cancel() holder.Get().Close() diff --git a/modules/indexer/internal/elasticsearch/indexer.go b/modules/indexer/internal/elasticsearch/indexer.go index 6c45798ef4312..7398632cbcbb9 100644 --- a/modules/indexer/internal/elasticsearch/indexer.go +++ b/modules/indexer/internal/elasticsearch/indexer.go @@ -6,7 +6,6 @@ package elasticsearch import ( "fmt" "sync" - "time" "code.gitea.io/gitea/modules/graceful" "code.gitea.io/gitea/modules/indexer/internal" @@ -18,34 +17,27 @@ var _ internal.Indexer = &Indexer{} // Indexer represents a basic elasticsearch indexer implementation type Indexer struct { - Client *elastic.Client - IndexerName string - available bool - stopTimer chan struct{} - lock sync.RWMutex + Client *elastic.Client + + url string + indexAliasName string + version int + mapping string + + available bool + stopTimer chan struct{} + lock sync.RWMutex } -func NewIndexer(client *elastic.Client, indexerName string) *Indexer { - indexer := &Indexer{ - Client: client, - IndexerName: indexerName, - available: true, - stopTimer: make(chan struct{}), +func NewIndexer(url, indexName string, version int, mapping string) *Indexer { + return &Indexer{ + url: url, + indexAliasName: indexName, + version: version, + mapping: mapping, + available: false, + stopTimer: make(chan struct{}), } - - ticker := time.NewTicker(10 * time.Second) - go func() { - for { - select { - case <-ticker.C: - indexer.checkAvailability() - case <-indexer.stopTimer: - ticker.Stop() - return - } - } - }() - return indexer } // Init initializes the indexer @@ -53,11 +45,25 @@ func (i *Indexer) Init() (bool, error) { if i == nil { return false, fmt.Errorf("cannot init nil indexer") } + + if err := i.initClient(); err != nil { + return false, err + } + ctx := graceful.GetManager().HammerContext() - exists, err := i.Client.IndexExists(i.IndexerName).Do(ctx) + + exists, err := i.Client.IndexExists(i.IndexName()).Do(ctx) if err != nil { return false, i.CheckError(err) } + if exists { + return true, nil + } + + if err := i.createIndex(ctx); err != nil { + return false, i.CheckError(err) + } + return exists, nil } diff --git a/modules/indexer/internal/elasticsearch/util.go b/modules/indexer/internal/elasticsearch/util.go index f46dc29d793ca..18d8c72265557 100644 --- a/modules/indexer/internal/elasticsearch/util.go +++ b/modules/indexer/internal/elasticsearch/util.go @@ -4,9 +4,14 @@ package elasticsearch import ( - "code.gitea.io/gitea/modules/graceful" + "context" "errors" + "fmt" "net" + "time" + + "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/log" "github.com/olivere/elastic/v7" ) @@ -23,6 +28,11 @@ func (i *Indexer) CheckError(err error) error { return err } +// IndexName returns the full index name with version +func (i *Indexer) IndexName() string { + return fmt.Sprintf("%s.v%d", i.indexAliasName, i.version) +} + func (i *Indexer) setAvailability(available bool) { i.lock.Lock() defer i.lock.Unlock() @@ -48,3 +58,85 @@ func (i *Indexer) checkAvailability() { i.setAvailability(true) } + +func (i *Indexer) createIndex(ctx context.Context) error { + createIndex, err := i.Client.CreateIndex(i.IndexName()).BodyString(i.mapping).Do(ctx) + if err != nil { + return err + } + if !createIndex.Acknowledged { + return fmt.Errorf("create index %s with %s failed", i.IndexName(), i.mapping) + } + + // check version + r, err := i.Client.Aliases().Do(ctx) + if err != nil { + return err + } + + realIndexerNames := r.IndicesByAlias(i.indexAliasName) + if len(realIndexerNames) < 1 { + res, err := i.Client.Alias(). + Add(i.IndexName(), i.indexAliasName). + Do(ctx) + if err != nil { + return err + } + if !res.Acknowledged { + return fmt.Errorf("create alias %s to index %s failed", i.indexAliasName, i.IndexName()) + } + } else if len(realIndexerNames) >= 1 && realIndexerNames[0] < i.IndexName() { + log.Warn("Found older gitea indexer named %s, but we will create a new one %s and keep the old NOT DELETED. You can delete the old version after the upgrade succeed.", + realIndexerNames[0], i.IndexName()) + res, err := i.Client.Alias(). + Remove(realIndexerNames[0], i.indexAliasName). + Add(i.IndexName(), i.indexAliasName). + Do(ctx) + if err != nil { + return err + } + if !res.Acknowledged { + return fmt.Errorf("change alias %s to index %s failed", i.indexAliasName, i.IndexName()) + } + } + + return nil +} + +func (i *Indexer) initClient() error { + opts := []elastic.ClientOptionFunc{ + elastic.SetURL(i.url), + elastic.SetSniff(false), + elastic.SetHealthcheckInterval(10 * time.Second), + elastic.SetGzip(false), + } + + logger := log.GetLogger(log.DEFAULT) + + opts = append(opts, elastic.SetTraceLog(&log.PrintfLogger{Logf: logger.Trace})) + opts = append(opts, elastic.SetInfoLog(&log.PrintfLogger{Logf: logger.Info})) + opts = append(opts, elastic.SetErrorLog(&log.PrintfLogger{Logf: logger.Error})) + + client, err := elastic.NewClient(opts...) + if err != nil { + return err + } + + i.Client = client + + i.available = true + ticker := time.NewTicker(10 * time.Second) + go func() { + for { + select { + case <-ticker.C: + i.checkAvailability() + case <-i.stopTimer: + ticker.Stop() + return + } + } + }() + + return nil +} diff --git a/modules/indexer/issues/elasticsearch.go b/modules/indexer/issues/elasticsearch.go index b0fe44ea91534..bd294d4c764dc 100644 --- a/modules/indexer/issues/elasticsearch.go +++ b/modules/indexer/issues/elasticsearch.go @@ -5,15 +5,12 @@ package issues import ( "context" - "errors" "fmt" "strconv" - "time" "code.gitea.io/gitea/modules/graceful" "code.gitea.io/gitea/modules/indexer/internal" inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" - "code.gitea.io/gitea/modules/log" "github.com/olivere/elastic/v7" ) @@ -27,30 +24,13 @@ type ElasticsearchIndexer struct { } // NewElasticsearchIndexer creates a new elasticsearch indexer -func NewElasticsearchIndexer(url, indexerName string) (*ElasticsearchIndexer, error) { - opts := []elastic.ClientOptionFunc{ - elastic.SetURL(url), - elastic.SetSniff(false), - elastic.SetHealthcheckInterval(10 * time.Second), - elastic.SetGzip(false), - } - - logger := log.GetLogger(log.DEFAULT) - opts = append(opts, elastic.SetTraceLog(&log.PrintfLogger{Logf: logger.Trace})) - opts = append(opts, elastic.SetInfoLog(&log.PrintfLogger{Logf: logger.Info})) - opts = append(opts, elastic.SetErrorLog(&log.PrintfLogger{Logf: logger.Error})) - - client, err := elastic.NewClient(opts...) - if err != nil { - return nil, err - } - - in := inner_elasticsearch.NewIndexer(client, indexerName) +func NewElasticsearchIndexer(url, indexerName string) *ElasticsearchIndexer { + in := inner_elasticsearch.NewIndexer(url, indexerName, issueIndexerLatestVersion, defaultMapping) indexer := &ElasticsearchIndexer{ inner: in, Indexer: in, } - return indexer, nil + return indexer } const ( @@ -82,32 +62,6 @@ const ( }` ) -// Init will initialize the indexer -func (b *ElasticsearchIndexer) Init() (bool, error) { - opened, err := b.Indexer.Init() - if err != nil { - return false, err - } - if opened { - return true, nil - } - - mapping := defaultMapping - - ctx := graceful.GetManager().HammerContext() - createIndex, err := b.inner.Client.CreateIndex(b.inner.IndexerName).BodyString(mapping).Do(ctx) - if err != nil { - return false, b.inner.CheckError(err) - } - if !createIndex.Acknowledged { - return false, errors.New("init failed") - } - - // FIXME: index version? - - return false, nil -} - // Index will save the index data func (b *ElasticsearchIndexer) Index(issues []*IndexerData) error { if len(issues) == 0 { @@ -115,7 +69,7 @@ func (b *ElasticsearchIndexer) Index(issues []*IndexerData) error { } else if len(issues) == 1 { issue := issues[0] _, err := b.inner.Client.Index(). - Index(b.inner.IndexerName). + Index(b.inner.IndexName()). Id(fmt.Sprintf("%d", issue.ID)). BodyJson(map[string]interface{}{ "id": issue.ID, @@ -132,7 +86,7 @@ func (b *ElasticsearchIndexer) Index(issues []*IndexerData) error { for _, issue := range issues { reqs = append(reqs, elastic.NewBulkIndexRequest(). - Index(b.inner.IndexerName). + Index(b.inner.IndexName()). Id(fmt.Sprintf("%d", issue.ID)). Doc(map[string]interface{}{ "id": issue.ID, @@ -145,7 +99,7 @@ func (b *ElasticsearchIndexer) Index(issues []*IndexerData) error { } _, err := b.inner.Client.Bulk(). - Index(b.inner.IndexerName). + Index(b.inner.IndexName()). Add(reqs...). Do(graceful.GetManager().HammerContext()) return b.inner.CheckError(err) @@ -157,7 +111,7 @@ func (b *ElasticsearchIndexer) Delete(ids ...int64) error { return nil } else if len(ids) == 1 { _, err := b.inner.Client.Delete(). - Index(b.inner.IndexerName). + Index(b.inner.IndexName()). Id(fmt.Sprintf("%d", ids[0])). Do(graceful.GetManager().HammerContext()) return b.inner.CheckError(err) @@ -167,13 +121,13 @@ func (b *ElasticsearchIndexer) Delete(ids ...int64) error { for _, id := range ids { reqs = append(reqs, elastic.NewBulkDeleteRequest(). - Index(b.inner.IndexerName). + Index(b.inner.IndexName()). Id(fmt.Sprintf("%d", id)), ) } _, err := b.inner.Client.Bulk(). - Index(b.inner.IndexerName). + Index(b.inner.IndexName()). Add(reqs...). Do(graceful.GetManager().HammerContext()) return b.inner.CheckError(err) @@ -194,7 +148,7 @@ func (b *ElasticsearchIndexer) Search(ctx context.Context, keyword string, repoI query = query.Must(repoQuery) } searchResult, err := b.inner.Client.Search(). - Index(b.inner.IndexerName). + Index(b.inner.IndexName()). Query(query). Sort("_score", false). From(start).Size(limit). diff --git a/modules/indexer/issues/indexer.go b/modules/indexer/issues/indexer.go index a4d389441f56a..19a73e10ec5ae 100644 --- a/modules/indexer/issues/indexer.go +++ b/modules/indexer/issues/indexer.go @@ -146,10 +146,7 @@ func InitIssueIndexer(syncReindex bool) { }) log.Debug("Created Bleve Indexer") case "elasticsearch": - issueIndexer, err := NewElasticsearchIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueIndexerName) - if err != nil { - log.Fatal("Unable to initialize Elastic Search Issue Indexer at connection: %s Error: %v", setting.Indexer.IssueConnStr, err) - } + issueIndexer := NewElasticsearchIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueIndexerName) exist, err := issueIndexer.Init() if err != nil { log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", setting.Indexer.IssueConnStr, err) From a1ce99bdaabc5ee24dd33cfd82661f73540ca6de Mon Sep 17 00:00:00 2001 From: Jason Song Date: Fri, 9 Jun 2023 17:10:57 +0800 Subject: [PATCH 19/43] feat: base32 --- modules/indexer/code/indexer.go | 9 ++------- modules/indexer/internal/base32.go | 21 +++++++++++++++++++++ modules/indexer/issues/bleve.go | 22 +++------------------- 3 files changed, 26 insertions(+), 26 deletions(-) create mode 100644 modules/indexer/internal/base32.go diff --git a/modules/indexer/code/indexer.go b/modules/indexer/code/indexer.go index 9f6199b75716a..74d4ded0cb573 100644 --- a/modules/indexer/code/indexer.go +++ b/modules/indexer/code/indexer.go @@ -7,7 +7,6 @@ import ( "context" "os" "runtime/pprof" - "strconv" "strings" "time" @@ -52,11 +51,7 @@ type Indexer interface { } func filenameIndexerID(repoID int64, filename string) string { - return indexerID(repoID) + "_" + filename -} - -func indexerID(id int64) string { - return strconv.FormatInt(id, 36) + return internal.Base36(repoID) + "_" + filename } func parseIndexerID(indexerID string) (int64, string) { @@ -64,7 +59,7 @@ func parseIndexerID(indexerID string) (int64, string) { if index == -1 { log.Error("Unexpected ID in repo indexer: %s", indexerID) } - repoID, _ := strconv.ParseInt(indexerID[:index], 36, 64) + repoID, _ := internal.ParseBase36(indexerID[:index]) return repoID, indexerID[index+1:] } diff --git a/modules/indexer/internal/base32.go b/modules/indexer/internal/base32.go new file mode 100644 index 0000000000000..aca756c638a8a --- /dev/null +++ b/modules/indexer/internal/base32.go @@ -0,0 +1,21 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package internal + +import ( + "fmt" + "strconv" +) + +func Base36(i int64) string { + return strconv.FormatInt(i, 36) +} + +func ParseBase36(s string) (int64, error) { + i, err := strconv.ParseInt(s, 36, 64) + if err != nil { + return 0, fmt.Errorf("invalid base36 integer %q: %w", s, err) + } + return i, nil +} diff --git a/modules/indexer/issues/bleve.go b/modules/indexer/issues/bleve.go index 729bbcf8784e3..3c49741e3626e 100644 --- a/modules/indexer/issues/bleve.go +++ b/modules/indexer/issues/bleve.go @@ -5,8 +5,6 @@ package issues import ( "context" - "fmt" - "strconv" "code.gitea.io/gitea/modules/indexer/internal" inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve" @@ -27,20 +25,6 @@ const ( issueIndexerLatestVersion = 2 ) -// indexerID a bleve-compatible unique identifier for an integer id -func indexerID(id int64) string { - return strconv.FormatInt(id, 36) -} - -// idOfIndexerID the integer id associated with an indexer id -func idOfIndexerID(indexerID string) (int64, error) { - id, err := strconv.ParseInt(indexerID, 36, 64) - if err != nil { - return 0, fmt.Errorf("Unexpected indexer ID %s: %w", indexerID, err) - } - return id, nil -} - // numericEqualityQuery a numeric equality query for the given value and field func numericEqualityQuery(value int64, field string) *query.NumericRangeQuery { f := float64(value) @@ -131,7 +115,7 @@ func NewBleveIndexer(indexDir string) *BleveIndexer { func (b *BleveIndexer) Index(issues []*IndexerData) error { batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) for _, issue := range issues { - if err := batch.Index(indexerID(issue.ID), struct { + if err := batch.Index(internal.Base36(issue.ID), struct { RepoID int64 Title string Content string @@ -152,7 +136,7 @@ func (b *BleveIndexer) Index(issues []*IndexerData) error { func (b *BleveIndexer) Delete(ids ...int64) error { batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) for _, id := range ids { - if err := batch.Delete(indexerID(id)); err != nil { + if err := batch.Delete(internal.Base36(id)); err != nil { return err } } @@ -190,7 +174,7 @@ func (b *BleveIndexer) Search(ctx context.Context, keyword string, repoIDs []int Hits: make([]Match, 0, len(result.Hits)), } for _, hit := range result.Hits { - id, err := idOfIndexerID(hit.ID) + id, err := internal.ParseBase36(hit.ID) if err != nil { return nil, err } From d0622963bb6945854a3409e6a8ac888e8ea5c725 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Fri, 9 Jun 2023 17:13:56 +0800 Subject: [PATCH 20/43] fix: esRepoIndexerLatestVersion --- modules/indexer/code/elasticsearch.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/indexer/code/elasticsearch.go b/modules/indexer/code/elasticsearch.go index 0c9fc32bc8698..cd47da1d9638d 100644 --- a/modules/indexer/code/elasticsearch.go +++ b/modules/indexer/code/elasticsearch.go @@ -46,7 +46,7 @@ type ElasticsearchIndexer struct { // NewElasticsearchIndexer creates a new elasticsearch indexer func NewElasticsearchIndexer(url, indexerName string) *ElasticsearchIndexer { - in := inner_elasticsearch.NewIndexer(url, indexerName, repoIndexerLatestVersion, defaultMapping) + in := inner_elasticsearch.NewIndexer(url, indexerName, esRepoIndexerLatestVersion, defaultMapping) indexer := &ElasticsearchIndexer{ inner: in, Indexer: in, From 4f1d3b7f552bf016656679829363864b2d95caec Mon Sep 17 00:00:00 2001 From: Jason Song Date: Fri, 9 Jun 2023 17:32:49 +0800 Subject: [PATCH 21/43] fix: split issues --- modules/indexer/issues/base/indexer.go | 18 +++++ modules/indexer/issues/base/model.go | 27 ++++++++ modules/indexer/issues/{ => bleve}/bleve.go | 33 +++++----- .../indexer/issues/{ => bleve}/bleve_test.go | 8 ++- modules/indexer/issues/{ => db}/db.go | 25 +++---- .../{ => elasticsearch}/elasticsearch.go | 31 +++++---- modules/indexer/issues/indexer.go | 66 ++++++------------- modules/indexer/issues/indexer_test.go | 5 +- .../issues/{ => meilisearch}/meilisearch.go | 40 +++++------ 9 files changed, 142 insertions(+), 111 deletions(-) create mode 100644 modules/indexer/issues/base/indexer.go create mode 100644 modules/indexer/issues/base/model.go rename modules/indexer/issues/{ => bleve}/bleve.go (87%) rename modules/indexer/issues/{ => bleve}/bleve_test.go (91%) rename modules/indexer/issues/{ => db}/db.go (53%) rename modules/indexer/issues/{ => elasticsearch}/elasticsearch.go (84%) rename modules/indexer/issues/{ => meilisearch}/meilisearch.go (74%) diff --git a/modules/indexer/issues/base/indexer.go b/modules/indexer/issues/base/indexer.go new file mode 100644 index 0000000000000..8ee68b362e41a --- /dev/null +++ b/modules/indexer/issues/base/indexer.go @@ -0,0 +1,18 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package base + +import ( + "context" + + "code.gitea.io/gitea/modules/indexer/internal" +) + +// Indexer defines an interface to indexer issues contents +type Indexer interface { + internal.Indexer + Index(issue []*IndexerData) error + Delete(ids ...int64) error + Search(ctx context.Context, kw string, repoIDs []int64, limit, start int) (*SearchResult, error) +} diff --git a/modules/indexer/issues/base/model.go b/modules/indexer/issues/base/model.go new file mode 100644 index 0000000000000..1a17fccc99d08 --- /dev/null +++ b/modules/indexer/issues/base/model.go @@ -0,0 +1,27 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package base + +// IndexerData data stored in the issue indexer +type IndexerData struct { + ID int64 `json:"id"` + RepoID int64 `json:"repo_id"` + Title string `json:"title"` + Content string `json:"content"` + Comments []string `json:"comments"` + IsDelete bool `json:"is_delete"` + IDs []int64 `json:"ids"` +} + +// Match represents on search result +type Match struct { + ID int64 `json:"id"` + Score float64 `json:"score"` +} + +// SearchResult represents search results +type SearchResult struct { + Total int64 + Hits []Match +} diff --git a/modules/indexer/issues/bleve.go b/modules/indexer/issues/bleve/bleve.go similarity index 87% rename from modules/indexer/issues/bleve.go rename to modules/indexer/issues/bleve/bleve.go index 3c49741e3626e..2151d0ee298af 100644 --- a/modules/indexer/issues/bleve.go +++ b/modules/indexer/issues/bleve/bleve.go @@ -1,13 +1,14 @@ // Copyright 2018 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT -package issues +package bleve import ( "context" "code.gitea.io/gitea/modules/indexer/internal" inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve" + "code.gitea.io/gitea/modules/indexer/issues/base" "github.com/blevesearch/bleve/v2" "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" @@ -52,11 +53,11 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error { const maxBatchSize = 16 -// BleveIndexerData an update to the issue indexer -type BleveIndexerData IndexerData +// IndexerData an update to the issue indexer +type IndexerData base.IndexerData // Type returns the document type, for bleve's mapping.Classifier interface. -func (i *BleveIndexerData) Type() string { +func (i *IndexerData) Type() string { return issueIndexerDocType } @@ -94,25 +95,25 @@ func generateIssueIndexMapping() (mapping.IndexMapping, error) { return mapping, nil } -var _ Indexer = &BleveIndexer{} +var _ base.Indexer = &Indexer{} -// BleveIndexer implements Indexer interface -type BleveIndexer struct { +// Indexer implements Indexer interface +type Indexer struct { inner *inner_bleve.Indexer internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much } -// NewBleveIndexer creates a new bleve local indexer -func NewBleveIndexer(indexDir string) *BleveIndexer { +// NewIndexer creates a new bleve local indexer +func NewIndexer(indexDir string) *Indexer { inner := inner_bleve.NewIndexer(indexDir, issueIndexerLatestVersion, generateIssueIndexMapping) - return &BleveIndexer{ + return &Indexer{ Indexer: inner, inner: inner, } } // Index will save the index data -func (b *BleveIndexer) Index(issues []*IndexerData) error { +func (b *Indexer) Index(issues []*base.IndexerData) error { batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) for _, issue := range issues { if err := batch.Index(internal.Base36(issue.ID), struct { @@ -133,7 +134,7 @@ func (b *BleveIndexer) Index(issues []*IndexerData) error { } // Delete deletes indexes by ids -func (b *BleveIndexer) Delete(ids ...int64) error { +func (b *Indexer) Delete(ids ...int64) error { batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) for _, id := range ids { if err := batch.Delete(internal.Base36(id)); err != nil { @@ -145,7 +146,7 @@ func (b *BleveIndexer) Delete(ids ...int64) error { // Search searches for issues by given conditions. // Returns the matching issue IDs -func (b *BleveIndexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int) (*SearchResult, error) { +func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int) (*base.SearchResult, error) { var repoQueriesP []*query.NumericRangeQuery for _, repoID := range repoIDs { repoQueriesP = append(repoQueriesP, numericEqualityQuery(repoID, "RepoID")) @@ -170,15 +171,15 @@ func (b *BleveIndexer) Search(ctx context.Context, keyword string, repoIDs []int return nil, err } - ret := SearchResult{ - Hits: make([]Match, 0, len(result.Hits)), + ret := base.SearchResult{ + Hits: make([]base.Match, 0, len(result.Hits)), } for _, hit := range result.Hits { id, err := internal.ParseBase36(hit.ID) if err != nil { return nil, err } - ret.Hits = append(ret.Hits, Match{ + ret.Hits = append(ret.Hits, base.Match{ ID: id, }) } diff --git a/modules/indexer/issues/bleve_test.go b/modules/indexer/issues/bleve/bleve_test.go similarity index 91% rename from modules/indexer/issues/bleve_test.go rename to modules/indexer/issues/bleve/bleve_test.go index 22827158e4c07..6fc11ec44a418 100644 --- a/modules/indexer/issues/bleve_test.go +++ b/modules/indexer/issues/bleve/bleve_test.go @@ -1,18 +1,20 @@ // Copyright 2018 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT -package issues +package bleve import ( "context" "testing" + "code.gitea.io/gitea/modules/indexer/issues/base" + "github.com/stretchr/testify/assert" ) func TestBleveIndexAndSearch(t *testing.T) { dir := t.TempDir() - indexer := NewBleveIndexer(dir) + indexer := NewIndexer(dir) defer indexer.Close() if _, err := indexer.Init(); err != nil { @@ -20,7 +22,7 @@ func TestBleveIndexAndSearch(t *testing.T) { return } - err := indexer.Index([]*IndexerData{ + err := indexer.Index([]*base.IndexerData{ { ID: 1, RepoID: 2, diff --git a/modules/indexer/issues/db.go b/modules/indexer/issues/db/db.go similarity index 53% rename from modules/indexer/issues/db.go rename to modules/indexer/issues/db/db.go index 9800e132ba02d..a1addd70d2a3c 100644 --- a/modules/indexer/issues/db.go +++ b/modules/indexer/issues/db/db.go @@ -1,7 +1,7 @@ // Copyright 2019 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT -package issues +package db import ( "context" @@ -9,43 +9,44 @@ import ( issues_model "code.gitea.io/gitea/models/issues" "code.gitea.io/gitea/modules/indexer/internal" inner_db "code.gitea.io/gitea/modules/indexer/internal/db" + "code.gitea.io/gitea/modules/indexer/issues/base" ) -var _ Indexer = &DBIndexer{} +var _ base.Indexer = &Indexer{} -// DBIndexer implements Indexer interface to use database's like search -type DBIndexer struct { +// Indexer implements Indexer interface to use database's like search +type Indexer struct { internal.Indexer } -func NewDBIndexer() *DBIndexer { - return &DBIndexer{ +func NewIndexer() *Indexer { + return &Indexer{ Indexer: &inner_db.Indexer{}, } } // Index dummy function -func (i *DBIndexer) Index(issue []*IndexerData) error { +func (i *Indexer) Index(issue []*base.IndexerData) error { return nil } // Delete dummy function -func (i *DBIndexer) Delete(ids ...int64) error { +func (i *Indexer) Delete(ids ...int64) error { return nil } // Search searches for issues -func (i *DBIndexer) Search(ctx context.Context, kw string, repoIDs []int64, limit, start int) (*SearchResult, error) { +func (i *Indexer) Search(ctx context.Context, kw string, repoIDs []int64, limit, start int) (*base.SearchResult, error) { total, ids, err := issues_model.SearchIssueIDsByKeyword(ctx, kw, repoIDs, limit, start) if err != nil { return nil, err } - result := SearchResult{ + result := base.SearchResult{ Total: total, - Hits: make([]Match, 0, limit), + Hits: make([]base.Match, 0, limit), } for _, id := range ids { - result.Hits = append(result.Hits, Match{ + result.Hits = append(result.Hits, base.Match{ ID: id, }) } diff --git a/modules/indexer/issues/elasticsearch.go b/modules/indexer/issues/elasticsearch/elasticsearch.go similarity index 84% rename from modules/indexer/issues/elasticsearch.go rename to modules/indexer/issues/elasticsearch/elasticsearch.go index bd294d4c764dc..24933fb014cfe 100644 --- a/modules/indexer/issues/elasticsearch.go +++ b/modules/indexer/issues/elasticsearch/elasticsearch.go @@ -1,7 +1,7 @@ // Copyright 2019 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT -package issues +package elasticsearch import ( "context" @@ -11,22 +11,27 @@ import ( "code.gitea.io/gitea/modules/graceful" "code.gitea.io/gitea/modules/indexer/internal" inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" + "code.gitea.io/gitea/modules/indexer/issues/base" "github.com/olivere/elastic/v7" ) -var _ Indexer = &ElasticsearchIndexer{} +const ( + issueIndexerLatestVersion = 2 +) + +var _ base.Indexer = &Indexer{} -// ElasticsearchIndexer implements Indexer interface -type ElasticsearchIndexer struct { +// Indexer implements Indexer interface +type Indexer struct { inner *inner_elasticsearch.Indexer internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much } -// NewElasticsearchIndexer creates a new elasticsearch indexer -func NewElasticsearchIndexer(url, indexerName string) *ElasticsearchIndexer { +// NewIndexer creates a new elasticsearch indexer +func NewIndexer(url, indexerName string) *Indexer { in := inner_elasticsearch.NewIndexer(url, indexerName, issueIndexerLatestVersion, defaultMapping) - indexer := &ElasticsearchIndexer{ + indexer := &Indexer{ inner: in, Indexer: in, } @@ -63,7 +68,7 @@ const ( ) // Index will save the index data -func (b *ElasticsearchIndexer) Index(issues []*IndexerData) error { +func (b *Indexer) Index(issues []*base.IndexerData) error { if len(issues) == 0 { return nil } else if len(issues) == 1 { @@ -106,7 +111,7 @@ func (b *ElasticsearchIndexer) Index(issues []*IndexerData) error { } // Delete deletes indexes by ids -func (b *ElasticsearchIndexer) Delete(ids ...int64) error { +func (b *Indexer) Delete(ids ...int64) error { if len(ids) == 0 { return nil } else if len(ids) == 1 { @@ -135,7 +140,7 @@ func (b *ElasticsearchIndexer) Delete(ids ...int64) error { // Search searches for issues by given conditions. // Returns the matching issue IDs -func (b *ElasticsearchIndexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int) (*SearchResult, error) { +func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int) (*base.SearchResult, error) { kwQuery := elastic.NewMultiMatchQuery(keyword, "title", "content", "comments") query := elastic.NewBoolQuery() query = query.Must(kwQuery) @@ -157,15 +162,15 @@ func (b *ElasticsearchIndexer) Search(ctx context.Context, keyword string, repoI return nil, b.inner.CheckError(err) } - hits := make([]Match, 0, limit) + hits := make([]base.Match, 0, limit) for _, hit := range searchResult.Hits.Hits { id, _ := strconv.ParseInt(hit.Id, 10, 64) - hits = append(hits, Match{ + hits = append(hits, base.Match{ ID: id, }) } - return &SearchResult{ + return &base.SearchResult{ Total: searchResult.TotalHits(), Hits: hits, }, nil diff --git a/modules/indexer/issues/indexer.go b/modules/indexer/issues/indexer.go index 19a73e10ec5ae..da2632492b09c 100644 --- a/modules/indexer/issues/indexer.go +++ b/modules/indexer/issues/indexer.go @@ -10,11 +10,16 @@ import ( "runtime/pprof" "time" - "code.gitea.io/gitea/models/db" + db_model "code.gitea.io/gitea/models/db" issues_model "code.gitea.io/gitea/models/issues" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/graceful" "code.gitea.io/gitea/modules/indexer/internal" + "code.gitea.io/gitea/modules/indexer/issues/base" + "code.gitea.io/gitea/modules/indexer/issues/bleve" + "code.gitea.io/gitea/modules/indexer/issues/db" + "code.gitea.io/gitea/modules/indexer/issues/elasticsearch" + "code.gitea.io/gitea/modules/indexer/issues/meilisearch" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/process" "code.gitea.io/gitea/modules/queue" @@ -22,40 +27,9 @@ import ( "code.gitea.io/gitea/modules/util" ) -// IndexerData data stored in the issue indexer -type IndexerData struct { - ID int64 `json:"id"` - RepoID int64 `json:"repo_id"` - Title string `json:"title"` - Content string `json:"content"` - Comments []string `json:"comments"` - IsDelete bool `json:"is_delete"` - IDs []int64 `json:"ids"` -} - -// Match represents on search result -type Match struct { - ID int64 `json:"id"` - Score float64 `json:"score"` -} - -// SearchResult represents search results -type SearchResult struct { - Total int64 - Hits []Match -} - -// Indexer defines an interface to indexer issues contents -type Indexer interface { - internal.Indexer - Index(issue []*IndexerData) error - Delete(ids ...int64) error - Search(ctx context.Context, kw string, repoIDs []int64, limit, start int) (*SearchResult, error) -} - var ( // issueIndexerQueue queue of issue ids to be updated - issueIndexerQueue *queue.WorkerPoolQueue[*IndexerData] + issueIndexerQueue *queue.WorkerPoolQueue[*base.IndexerData] holder = internal.NewIndexerHolder() ) @@ -69,13 +43,13 @@ func InitIssueIndexer(syncReindex bool) { // Create the Queue switch setting.Indexer.IssueType { case "bleve", "elasticsearch", "meilisearch": - handler := func(items ...*IndexerData) (unhandled []*IndexerData) { - indexer := holder.Get().(Indexer) + handler := func(items ...*base.IndexerData) (unhandled []*base.IndexerData) { + indexer := holder.Get().(base.Indexer) if indexer == nil { log.Warn("Issue indexer handler: indexer is not ready, retry later.") return items } - toIndex := make([]*IndexerData, 0, len(items)) + toIndex := make([]*base.IndexerData, 0, len(items)) for _, indexerData := range items { log.Trace("IndexerData Process: %d %v %t", indexerData.ID, indexerData.IDs, indexerData.IsDelete) if indexerData.IsDelete { @@ -106,7 +80,7 @@ func InitIssueIndexer(syncReindex bool) { log.Fatal("Unable to create issue indexer queue") } default: - issueIndexerQueue = queue.CreateSimpleQueue[*IndexerData](ctx, "issue_indexer", nil) + issueIndexerQueue = queue.CreateSimpleQueue[*base.IndexerData](ctx, "issue_indexer", nil) } graceful.GetManager().RunAtTerminate(finished) @@ -128,7 +102,7 @@ func InitIssueIndexer(syncReindex bool) { log.Fatal("PID: %d Unable to initialize the Bleve Issue Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.IssuePath, err) } }() - issueIndexer := NewBleveIndexer(setting.Indexer.IssuePath) + issueIndexer := bleve.NewIndexer(setting.Indexer.IssuePath) exist, err := issueIndexer.Init() if err != nil { holder.Set(nil) @@ -146,7 +120,7 @@ func InitIssueIndexer(syncReindex bool) { }) log.Debug("Created Bleve Indexer") case "elasticsearch": - issueIndexer := NewElasticsearchIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueIndexerName) + issueIndexer := elasticsearch.NewIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueIndexerName) exist, err := issueIndexer.Init() if err != nil { log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", setting.Indexer.IssueConnStr, err) @@ -154,10 +128,10 @@ func InitIssueIndexer(syncReindex bool) { populate = !exist holder.Set(issueIndexer) case "db": - issueIndexer := NewDBIndexer() + issueIndexer := db.NewIndexer() holder.Set(issueIndexer) case "meilisearch": - issueIndexer, err := NewMeilisearchIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueConnAuth, setting.Indexer.IssueIndexerName) + issueIndexer, err := meilisearch.NewMeilisearchIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueConnAuth, setting.Indexer.IssueIndexerName) if err != nil { log.Fatal("Unable to initialize Meilisearch Issue Indexer at connection: %s Error: %v", setting.Indexer.IssueConnStr, err) } @@ -225,8 +199,8 @@ func populateIssueIndexer(ctx context.Context) { default: } repos, _, err := repo_model.SearchRepositoryByName(ctx, &repo_model.SearchRepoOptions{ - ListOptions: db.ListOptions{Page: page, PageSize: repo_model.RepositoryListDefaultPageSize}, - OrderBy: db.SearchOrderByID, + ListOptions: db_model.ListOptions{Page: page, PageSize: repo_model.RepositoryListDefaultPageSize}, + OrderBy: db_model.SearchOrderByID, Private: true, Collaborate: util.OptionalBoolFalse, }) @@ -279,7 +253,7 @@ func UpdateIssueIndexer(issue *issues_model.Issue) { comments = append(comments, comment.Content) } } - indexerData := &IndexerData{ + indexerData := &base.IndexerData{ ID: issue.ID, RepoID: issue.RepoID, Title: issue.Title, @@ -304,7 +278,7 @@ func DeleteRepoIssueIndexer(ctx context.Context, repo *repo_model.Repository) { if len(ids) == 0 { return } - indexerData := &IndexerData{ + indexerData := &base.IndexerData{ IDs: ids, IsDelete: true, } @@ -317,7 +291,7 @@ func DeleteRepoIssueIndexer(ctx context.Context, repo *repo_model.Repository) { // WARNNING: You have to ensure user have permission to visit repoIDs' issues func SearchIssuesByKeyword(ctx context.Context, repoIDs []int64, keyword string) ([]int64, error) { var issueIDs []int64 - indexer := holder.Get().(Indexer) + indexer := holder.Get().(base.Indexer) if indexer == nil { log.Error("SearchIssuesByKeyword(): unable to get indexer!") diff --git a/modules/indexer/issues/indexer_test.go b/modules/indexer/issues/indexer_test.go index a2d1794f4b662..9821e793729f2 100644 --- a/modules/indexer/issues/indexer_test.go +++ b/modules/indexer/issues/indexer_test.go @@ -11,6 +11,7 @@ import ( "time" "code.gitea.io/gitea/models/unittest" + "code.gitea.io/gitea/modules/indexer/issues/bleve" "code.gitea.io/gitea/modules/setting" _ "code.gitea.io/gitea/models" @@ -42,8 +43,8 @@ func TestBleveSearchIssues(t *testing.T) { setting.LoadQueueSettings() InitIssueIndexer(true) defer func() { - indexer := holder.get() - if bleveIndexer, ok := indexer.(*BleveIndexer); ok { + indexer := holder.Get() + if bleveIndexer, ok := indexer.(*bleve.Indexer); ok { bleveIndexer.Close() } }() diff --git a/modules/indexer/issues/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go similarity index 74% rename from modules/indexer/issues/meilisearch.go rename to modules/indexer/issues/meilisearch/meilisearch.go index 990bc57a05f5d..1d41a975472ad 100644 --- a/modules/indexer/issues/meilisearch.go +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -1,7 +1,7 @@ // Copyright 2023 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT -package issues +package meilisearch import ( "context" @@ -10,13 +10,15 @@ import ( "sync" "time" + "code.gitea.io/gitea/modules/indexer/issues/base" + "github.com/meilisearch/meilisearch-go" ) -var _ Indexer = &MeilisearchIndexer{} +var _ base.Indexer = &Indexer{} -// MeilisearchIndexer implements Indexer interface -type MeilisearchIndexer struct { +// Indexer implements Indexer interface +type Indexer struct { client *meilisearch.Client indexerName string available bool @@ -24,14 +26,14 @@ type MeilisearchIndexer struct { lock sync.RWMutex } -// MeilisearchIndexer creates a new meilisearch indexer -func NewMeilisearchIndexer(url, apiKey, indexerName string) (*MeilisearchIndexer, error) { +// Indexer creates a new meilisearch indexer +func NewMeilisearchIndexer(url, apiKey, indexerName string) (*Indexer, error) { client := meilisearch.NewClient(meilisearch.ClientConfig{ Host: url, APIKey: apiKey, }) - indexer := &MeilisearchIndexer{ + indexer := &Indexer{ client: client, indexerName: indexerName, available: true, @@ -55,7 +57,7 @@ func NewMeilisearchIndexer(url, apiKey, indexerName string) (*MeilisearchIndexer } // Init will initialize the indexer -func (b *MeilisearchIndexer) Init() (bool, error) { +func (b *Indexer) Init() (bool, error) { _, err := b.client.GetIndex(b.indexerName) if err == nil { return true, nil @@ -73,14 +75,14 @@ func (b *MeilisearchIndexer) Init() (bool, error) { } // Ping checks if meilisearch is available -func (b *MeilisearchIndexer) Ping() bool { +func (b *Indexer) Ping() bool { b.lock.RLock() defer b.lock.RUnlock() return b.available } // Index will save the index data -func (b *MeilisearchIndexer) Index(issues []*IndexerData) error { +func (b *Indexer) Index(issues []*base.IndexerData) error { if len(issues) == 0 { return nil } @@ -95,7 +97,7 @@ func (b *MeilisearchIndexer) Index(issues []*IndexerData) error { } // Delete deletes indexes by ids -func (b *MeilisearchIndexer) Delete(ids ...int64) error { +func (b *Indexer) Delete(ids ...int64) error { if len(ids) == 0 { return nil } @@ -112,7 +114,7 @@ func (b *MeilisearchIndexer) Delete(ids ...int64) error { // Search searches for issues by given conditions. // Returns the matching issue IDs -func (b *MeilisearchIndexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int) (*SearchResult, error) { +func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int) (*base.SearchResult, error) { repoFilters := make([]string, 0, len(repoIDs)) for _, repoID := range repoIDs { repoFilters = append(repoFilters, "repo_id = "+strconv.FormatInt(repoID, 10)) @@ -127,20 +129,20 @@ func (b *MeilisearchIndexer) Search(ctx context.Context, keyword string, repoIDs return nil, b.checkError(err) } - hits := make([]Match, 0, len(searchRes.Hits)) + hits := make([]base.Match, 0, len(searchRes.Hits)) for _, hit := range searchRes.Hits { - hits = append(hits, Match{ + hits = append(hits, base.Match{ ID: int64(hit.(map[string]interface{})["id"].(float64)), }) } - return &SearchResult{ + return &base.SearchResult{ Total: searchRes.TotalHits, Hits: hits, }, nil } // Close implements indexer -func (b *MeilisearchIndexer) Close() { +func (b *Indexer) Close() { select { case <-b.stopTimer: default: @@ -148,11 +150,11 @@ func (b *MeilisearchIndexer) Close() { } } -func (b *MeilisearchIndexer) checkError(err error) error { +func (b *Indexer) checkError(err error) error { return err } -func (b *MeilisearchIndexer) checkAvailability() { +func (b *Indexer) checkAvailability() { _, err := b.client.Health() if err != nil { b.setAvailability(false) @@ -161,7 +163,7 @@ func (b *MeilisearchIndexer) checkAvailability() { b.setAvailability(true) } -func (b *MeilisearchIndexer) setAvailability(available bool) { +func (b *Indexer) setAvailability(available bool) { b.lock.Lock() defer b.lock.Unlock() From a29eceecd58db8758cc74fdd1f87f10457bb25ce Mon Sep 17 00:00:00 2001 From: Jason Song Date: Fri, 9 Jun 2023 17:56:50 +0800 Subject: [PATCH 22/43] fix: split codes --- modules/indexer/code/base/indexer.go | 19 +++++ modules/indexer/code/base/model.go | 44 +++++++++++ modules/indexer/code/base/util.go | 32 ++++++++ modules/indexer/code/{ => bleve}/bleve.go | 41 +++++----- modules/indexer/code/bleve_test.go | 31 -------- .../code/{ => elasticsearch}/elasticsearch.go | 45 +++++------ .../code/elasticsearch/elasticsearch_test.go | 16 ++++ modules/indexer/code/elasticsearch_test.go | 42 ---------- modules/indexer/code/git.go | 32 +++----- modules/indexer/code/indexer.go | 79 +++---------------- modules/indexer/code/indexer_test.go | 47 ++++++++++- modules/indexer/code/search.go | 9 ++- 12 files changed, 230 insertions(+), 207 deletions(-) create mode 100644 modules/indexer/code/base/indexer.go create mode 100644 modules/indexer/code/base/model.go create mode 100644 modules/indexer/code/base/util.go rename modules/indexer/code/{ => bleve}/bleve.go (87%) delete mode 100644 modules/indexer/code/bleve_test.go rename modules/indexer/code/{ => elasticsearch}/elasticsearch.go (85%) create mode 100644 modules/indexer/code/elasticsearch/elasticsearch_test.go delete mode 100644 modules/indexer/code/elasticsearch_test.go diff --git a/modules/indexer/code/base/indexer.go b/modules/indexer/code/base/indexer.go new file mode 100644 index 0000000000000..8c38f21f7c475 --- /dev/null +++ b/modules/indexer/code/base/indexer.go @@ -0,0 +1,19 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package base + +import ( + "context" + + repo_model "code.gitea.io/gitea/models/repo" + "code.gitea.io/gitea/modules/indexer/internal" +) + +// Indexer defines an interface to index and search code contents +type Indexer interface { + internal.Indexer + Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error + Delete(repoID int64) error + Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error) +} diff --git a/modules/indexer/code/base/model.go b/modules/indexer/code/base/model.go new file mode 100644 index 0000000000000..c60a31a5005ee --- /dev/null +++ b/modules/indexer/code/base/model.go @@ -0,0 +1,44 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package base + +import "code.gitea.io/gitea/modules/timeutil" + +type FileUpdate struct { + Filename string + BlobSha string + Size int64 + Sized bool +} + +// RepoChanges changes (file additions/updates/removals) to a repo +type RepoChanges struct { + Updates []FileUpdate + RemovedFilenames []string +} + +// IndexerData represents data stored in the code indexer +type IndexerData struct { + RepoID int64 +} + +// SearchResult result of performing a search in a repo +type SearchResult struct { + RepoID int64 + StartIndex int + EndIndex int + Filename string + Content string + CommitID string + UpdatedUnix timeutil.TimeStamp + Language string + Color string +} + +// SearchResultLanguages result of top languages count in search results +type SearchResultLanguages struct { + Language string + Color string + Count int +} diff --git a/modules/indexer/code/base/util.go b/modules/indexer/code/base/util.go new file mode 100644 index 0000000000000..d8eda3196e13c --- /dev/null +++ b/modules/indexer/code/base/util.go @@ -0,0 +1,32 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package base + +import ( + "strings" + + "code.gitea.io/gitea/modules/indexer/internal" + "code.gitea.io/gitea/modules/log" +) + +func FilenameIndexerID(repoID int64, filename string) string { + return internal.Base36(repoID) + "_" + filename +} + +func ParseIndexerID(indexerID string) (int64, string) { + index := strings.IndexByte(indexerID, '_') + if index == -1 { + log.Error("Unexpected ID in repo indexer: %s", indexerID) + } + repoID, _ := internal.ParseBase36(indexerID[:index]) + return repoID, indexerID[index+1:] +} + +func FilenameOfIndexerID(indexerID string) string { + index := strings.IndexByte(indexerID, '_') + if index == -1 { + log.Error("Unexpected ID in repo indexer: %s", indexerID) + } + return indexerID[index+1:] +} diff --git a/modules/indexer/code/bleve.go b/modules/indexer/code/bleve/bleve.go similarity index 87% rename from modules/indexer/code/bleve.go rename to modules/indexer/code/bleve/bleve.go index 02e06a1ef40d7..82d2a787cfb6c 100644 --- a/modules/indexer/code/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -1,7 +1,7 @@ // Copyright 2019 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT -package code +package bleve import ( "bufio" @@ -16,6 +16,7 @@ import ( "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/indexer/code/base" "code.gitea.io/gitea/modules/indexer/internal" inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve" "code.gitea.io/gitea/modules/log" @@ -115,25 +116,25 @@ func generateBleveIndexMapping() (mapping.IndexMapping, error) { return mapping, nil } -var _ Indexer = &BleveIndexer{} +var _ base.Indexer = &Indexer{} -// BleveIndexer represents a bleve indexer implementation -type BleveIndexer struct { +// Indexer represents a bleve indexer implementation +type Indexer struct { inner *inner_bleve.Indexer internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much } -// NewBleveIndexer creates a new bleve local indexer -func NewBleveIndexer(indexDir string) *BleveIndexer { +// NewIndexer creates a new bleve local indexer +func NewIndexer(indexDir string) *Indexer { inner := inner_bleve.NewIndexer(indexDir, repoIndexerLatestVersion, generateBleveIndexMapping) - return &BleveIndexer{ + return &Indexer{ Indexer: inner, inner: inner, } } -func (b *BleveIndexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserError, batchReader *bufio.Reader, commitSha string, - update fileUpdate, repo *repo_model.Repository, batch *inner_bleve.FlushingBatch, +func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserError, batchReader *bufio.Reader, commitSha string, + update base.FileUpdate, repo *repo_model.Repository, batch *inner_bleve.FlushingBatch, ) error { // Ignore vendored files in code search if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { @@ -178,7 +179,7 @@ func (b *BleveIndexer) addUpdate(ctx context.Context, batchWriter git.WriteClose if _, err = batchReader.Discard(1); err != nil { return err } - id := filenameIndexerID(repo.ID, update.Filename) + id := base.FilenameIndexerID(repo.ID, update.Filename) return batch.Index(id, &RepoIndexerData{ RepoID: repo.ID, CommitID: commitSha, @@ -188,13 +189,13 @@ func (b *BleveIndexer) addUpdate(ctx context.Context, batchWriter git.WriteClose }) } -func (b *BleveIndexer) addDelete(filename string, repo *repo_model.Repository, batch *inner_bleve.FlushingBatch) error { - id := filenameIndexerID(repo.ID, filename) +func (b *Indexer) addDelete(filename string, repo *repo_model.Repository, batch *inner_bleve.FlushingBatch) error { + id := base.FilenameIndexerID(repo.ID, filename) return batch.Delete(id) } // Index indexes the data -func (b *BleveIndexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *repoChanges) error { +func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *base.RepoChanges) error { batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) if len(changes.Updates) > 0 { @@ -223,7 +224,7 @@ func (b *BleveIndexer) Index(ctx context.Context, repo *repo_model.Repository, s } // Delete deletes indexes by ids -func (b *BleveIndexer) Delete(repoID int64) error { +func (b *Indexer) Delete(repoID int64) error { query := numericEqualityQuery(repoID, "RepoID") searchRequest := bleve.NewSearchRequestOptions(query, 2147483647, 0, false) result, err := b.inner.Indexer.Search(searchRequest) @@ -241,7 +242,7 @@ func (b *BleveIndexer) Delete(repoID int64) error { // Search searches for files in the specified repo. // Returns the matching file-paths -func (b *BleveIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error) { +func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*base.SearchResult, []*base.SearchResultLanguages, error) { var ( indexerQuery query.Query keywordQuery query.Query @@ -301,7 +302,7 @@ func (b *BleveIndexer) Search(ctx context.Context, repoIDs []int64, language, ke total := int64(result.Total) - searchResults := make([]*SearchResult, len(result.Hits)) + searchResults := make([]*base.SearchResult, len(result.Hits)) for i, hit := range result.Hits { startIndex, endIndex := -1, -1 for _, locations := range hit.Locations["Content"] { @@ -320,11 +321,11 @@ func (b *BleveIndexer) Search(ctx context.Context, repoIDs []int64, language, ke if t, err := time.Parse(time.RFC3339, hit.Fields["UpdatedAt"].(string)); err == nil { updatedUnix = timeutil.TimeStamp(t.Unix()) } - searchResults[i] = &SearchResult{ + searchResults[i] = &base.SearchResult{ RepoID: int64(hit.Fields["RepoID"].(float64)), StartIndex: startIndex, EndIndex: endIndex, - Filename: filenameOfIndexerID(hit.ID), + Filename: base.FilenameOfIndexerID(hit.ID), Content: hit.Fields["Content"].(string), CommitID: hit.Fields["CommitID"].(string), UpdatedUnix: updatedUnix, @@ -333,7 +334,7 @@ func (b *BleveIndexer) Search(ctx context.Context, repoIDs []int64, language, ke } } - searchResultLanguages := make([]*SearchResultLanguages, 0, 10) + searchResultLanguages := make([]*base.SearchResultLanguages, 0, 10) if len(language) > 0 { // Use separate query to go get all language counts facetRequest := bleve.NewSearchRequestOptions(facetQuery, 1, 0, false) @@ -351,7 +352,7 @@ func (b *BleveIndexer) Search(ctx context.Context, repoIDs []int64, language, ke if len(term.Term) == 0 { continue } - searchResultLanguages = append(searchResultLanguages, &SearchResultLanguages{ + searchResultLanguages = append(searchResultLanguages, &base.SearchResultLanguages{ Language: term.Term, Color: enry.GetColor(term.Term), Count: term.Count, diff --git a/modules/indexer/code/bleve_test.go b/modules/indexer/code/bleve_test.go deleted file mode 100644 index 8476da71f5b82..0000000000000 --- a/modules/indexer/code/bleve_test.go +++ /dev/null @@ -1,31 +0,0 @@ -// Copyright 2019 The Gitea Authors. All rights reserved. -// SPDX-License-Identifier: MIT - -package code - -import ( - "testing" - - "code.gitea.io/gitea/models/unittest" - - "github.com/stretchr/testify/assert" -) - -func TestBleveIndexAndSearch(t *testing.T) { - unittest.PrepareTestEnv(t) - - dir := t.TempDir() - - idx := NewBleveIndexer(dir) - _, err := idx.Init() - if err != nil { - assert.Fail(t, "Unable to create bleve indexer Error: %v", err) - if idx != nil { - idx.Close() - } - return - } - defer idx.Close() - - testIndexer("beleve", t, idx) -} diff --git a/modules/indexer/code/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go similarity index 85% rename from modules/indexer/code/elasticsearch.go rename to modules/indexer/code/elasticsearch/elasticsearch.go index cd47da1d9638d..4a5bd64c0e645 100644 --- a/modules/indexer/code/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -1,7 +1,7 @@ // Copyright 2020 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT -package code +package elasticsearch import ( "bufio" @@ -16,6 +16,7 @@ import ( "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/indexer/code/base" "code.gitea.io/gitea/modules/indexer/internal" inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" "code.gitea.io/gitea/modules/json" @@ -36,18 +37,18 @@ const ( esMultiMatchTypePhrasePrefix = "phrase_prefix" ) -var _ Indexer = &ElasticsearchIndexer{} +var _ base.Indexer = &Indexer{} -// ElasticsearchIndexer implements Indexer interface -type ElasticsearchIndexer struct { +// Indexer implements Indexer interface +type Indexer struct { inner *inner_elasticsearch.Indexer internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much } -// NewElasticsearchIndexer creates a new elasticsearch indexer -func NewElasticsearchIndexer(url, indexerName string) *ElasticsearchIndexer { +// NewIndexer creates a new elasticsearch indexer +func NewIndexer(url, indexerName string) *Indexer { in := inner_elasticsearch.NewIndexer(url, indexerName, esRepoIndexerLatestVersion, defaultMapping) - indexer := &ElasticsearchIndexer{ + indexer := &Indexer{ inner: in, Indexer: in, } @@ -84,7 +85,7 @@ const ( }` ) -func (b *ElasticsearchIndexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserError, batchReader *bufio.Reader, sha string, update fileUpdate, repo *repo_model.Repository) ([]elastic.BulkableRequest, error) { +func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserError, batchReader *bufio.Reader, sha string, update base.FileUpdate, repo *repo_model.Repository) ([]elastic.BulkableRequest, error) { // Ignore vendored files in code search if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { return nil, nil @@ -127,7 +128,7 @@ func (b *ElasticsearchIndexer) addUpdate(ctx context.Context, batchWriter git.Wr if _, err = batchReader.Discard(1); err != nil { return nil, err } - id := filenameIndexerID(repo.ID, update.Filename) + id := base.FilenameIndexerID(repo.ID, update.Filename) return []elastic.BulkableRequest{ elastic.NewBulkIndexRequest(). @@ -143,15 +144,15 @@ func (b *ElasticsearchIndexer) addUpdate(ctx context.Context, batchWriter git.Wr }, nil } -func (b *ElasticsearchIndexer) addDelete(filename string, repo *repo_model.Repository) elastic.BulkableRequest { - id := filenameIndexerID(repo.ID, filename) +func (b *Indexer) addDelete(filename string, repo *repo_model.Repository) elastic.BulkableRequest { + id := base.FilenameIndexerID(repo.ID, filename) return elastic.NewBulkDeleteRequest(). Index(b.inner.IndexName()). Id(id) } // Index will save the index data -func (b *ElasticsearchIndexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *repoChanges) error { +func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *base.RepoChanges) error { reqs := make([]elastic.BulkableRequest, 0) if len(changes.Updates) > 0 { // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! @@ -190,7 +191,7 @@ func (b *ElasticsearchIndexer) Index(ctx context.Context, repo *repo_model.Repos } // Delete deletes indexes by ids -func (b *ElasticsearchIndexer) Delete(repoID int64) error { +func (b *Indexer) Delete(repoID int64) error { _, err := b.inner.Client.DeleteByQuery(b.inner.IndexName()). Query(elastic.NewTermsQuery("repo_id", repoID)). Do(graceful.GetManager().HammerContext()) @@ -213,8 +214,8 @@ func indexPos(content, start, end string) (int, int) { return startIdx, startIdx + len(start) + endIdx + len(end) } -func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) (int64, []*SearchResult, []*SearchResultLanguages, error) { - hits := make([]*SearchResult, 0, pageSize) +func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) (int64, []*base.SearchResult, []*base.SearchResultLanguages, error) { + hits := make([]*base.SearchResult, 0, pageSize) for _, hit := range searchResult.Hits.Hits { // FIXME: There is no way to get the position the keyword on the content currently on the same request. // So we get it from content, this may made the query slower. See @@ -233,7 +234,7 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) panic(fmt.Sprintf("2===%#v", hit.Highlight)) } - repoID, fileName := parseIndexerID(hit.Id) + repoID, fileName := base.ParseIndexerID(hit.Id) res := make(map[string]interface{}) if err := json.Unmarshal(hit.Source, &res); err != nil { return 0, nil, nil, err @@ -241,7 +242,7 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) language := res["language"].(string) - hits = append(hits, &SearchResult{ + hits = append(hits, &base.SearchResult{ RepoID: repoID, Filename: fileName, CommitID: res["commit_id"].(string), @@ -257,14 +258,14 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) return searchResult.TotalHits(), hits, extractAggs(searchResult), nil } -func extractAggs(searchResult *elastic.SearchResult) []*SearchResultLanguages { - var searchResultLanguages []*SearchResultLanguages +func extractAggs(searchResult *elastic.SearchResult) []*base.SearchResultLanguages { + var searchResultLanguages []*base.SearchResultLanguages agg, found := searchResult.Aggregations.Terms("language") if found { - searchResultLanguages = make([]*SearchResultLanguages, 0, 10) + searchResultLanguages = make([]*base.SearchResultLanguages, 0, 10) for _, bucket := range agg.Buckets { - searchResultLanguages = append(searchResultLanguages, &SearchResultLanguages{ + searchResultLanguages = append(searchResultLanguages, &base.SearchResultLanguages{ Language: bucket.Key.(string), Color: enry.GetColor(bucket.Key.(string)), Count: int(bucket.DocCount), @@ -275,7 +276,7 @@ func extractAggs(searchResult *elastic.SearchResult) []*SearchResultLanguages { } // Search searches for codes and language stats by given conditions. -func (b *ElasticsearchIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error) { +func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*base.SearchResult, []*base.SearchResultLanguages, error) { searchType := esMultiMatchTypeBestFields if isMatch { searchType = esMultiMatchTypePhrasePrefix diff --git a/modules/indexer/code/elasticsearch/elasticsearch_test.go b/modules/indexer/code/elasticsearch/elasticsearch_test.go new file mode 100644 index 0000000000000..c6ba93e76d469 --- /dev/null +++ b/modules/indexer/code/elasticsearch/elasticsearch_test.go @@ -0,0 +1,16 @@ +// Copyright 2020 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package elasticsearch + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestIndexPos(t *testing.T) { + startIdx, endIdx := indexPos("test index start and end", "start", "end") + assert.EqualValues(t, 11, startIdx) + assert.EqualValues(t, 24, endIdx) +} diff --git a/modules/indexer/code/elasticsearch_test.go b/modules/indexer/code/elasticsearch_test.go deleted file mode 100644 index 467f8582a5bf7..0000000000000 --- a/modules/indexer/code/elasticsearch_test.go +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright 2020 The Gitea Authors. All rights reserved. -// SPDX-License-Identifier: MIT - -package code - -import ( - "os" - "testing" - - "code.gitea.io/gitea/models/unittest" - - "github.com/stretchr/testify/assert" -) - -func TestESIndexAndSearch(t *testing.T) { - unittest.PrepareTestEnv(t) - - u := os.Getenv("TEST_INDEXER_CODE_ES_URL") - if u == "" { - t.SkipNow() - return - } - - indexer := NewElasticsearchIndexer(u, "gitea_codes") - if _, err := indexer.Init(); err != nil { - assert.Fail(t, "Unable to init ES indexer Error: %v", err) - if indexer != nil { - indexer.Close() - } - return - } - - defer indexer.Close() - - testIndexer("elastic_search", t, indexer) -} - -func TestIndexPos(t *testing.T) { - startIdx, endIdx := indexPos("test index start and end", "start", "end") - assert.EqualValues(t, 11, startIdx) - assert.EqualValues(t, 24, endIdx) -} diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go index 0ba4b9f1e19a4..c1f623e142822 100644 --- a/modules/indexer/code/git.go +++ b/modules/indexer/code/git.go @@ -10,23 +10,11 @@ import ( repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/indexer/code/base" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" ) -type fileUpdate struct { - Filename string - BlobSha string - Size int64 - Sized bool -} - -// repoChanges changes (file additions/updates/removals) to a repo -type repoChanges struct { - Updates []fileUpdate - RemovedFilenames []string -} - func getDefaultBranchSha(ctx context.Context, repo *repo_model.Repository) (string, error) { stdout, _, err := git.NewCommand(ctx, "show-ref", "-s").AddDynamicArguments(git.BranchPrefix + repo.DefaultBranch).RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) if err != nil { @@ -36,7 +24,7 @@ func getDefaultBranchSha(ctx context.Context, repo *repo_model.Repository) (stri } // getRepoChanges returns changes to repo since last indexer update -func getRepoChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*repoChanges, error) { +func getRepoChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*base.RepoChanges, error) { status, err := repo_model.GetIndexerStatus(ctx, repo, repo_model.RepoIndexerTypeCode) if err != nil { return nil, err @@ -67,16 +55,16 @@ func isIndexable(entry *git.TreeEntry) bool { } // parseGitLsTreeOutput parses the output of a `git ls-tree -r --full-name` command -func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) { +func parseGitLsTreeOutput(stdout []byte) ([]base.FileUpdate, error) { entries, err := git.ParseTreeEntries(stdout) if err != nil { return nil, err } idxCount := 0 - updates := make([]fileUpdate, len(entries)) + updates := make([]base.FileUpdate, len(entries)) for _, entry := range entries { if isIndexable(entry) { - updates[idxCount] = fileUpdate{ + updates[idxCount] = base.FileUpdate{ Filename: entry.Name(), BlobSha: entry.ID.String(), Size: entry.Size(), @@ -89,8 +77,8 @@ func parseGitLsTreeOutput(stdout []byte) ([]fileUpdate, error) { } // genesisChanges get changes to add repo to the indexer for the first time -func genesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*repoChanges, error) { - var changes repoChanges +func genesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*base.RepoChanges, error) { + var changes base.RepoChanges stdout, _, runErr := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l", "-r").AddDynamicArguments(revision).RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()}) if runErr != nil { return nil, runErr @@ -102,20 +90,20 @@ func genesisChanges(ctx context.Context, repo *repo_model.Repository, revision s } // nonGenesisChanges get changes since the previous indexer update -func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*repoChanges, error) { +func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*base.RepoChanges, error) { diffCmd := git.NewCommand(ctx, "diff", "--name-status").AddDynamicArguments(repo.CodeIndexerStatus.CommitSha, revision) stdout, _, runErr := diffCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) if runErr != nil { // previous commit sha may have been removed by a force push, so // try rebuilding from scratch log.Warn("git diff: %v", runErr) - if err := holder.Get().(Indexer).Delete(repo.ID); err != nil { + if err := holder.Get().(base.Indexer).Delete(repo.ID); err != nil { return nil, err } return genesisChanges(ctx, repo, revision) } - var changes repoChanges + var changes base.RepoChanges var err error updatedFilenames := make([]string, 0, 10) for _, line := range strings.Split(stdout, "\n") { diff --git a/modules/indexer/code/indexer.go b/modules/indexer/code/indexer.go index 74d4ded0cb573..df61f13b28fb7 100644 --- a/modules/indexer/code/indexer.go +++ b/modules/indexer/code/indexer.go @@ -7,81 +7,28 @@ import ( "context" "os" "runtime/pprof" - "strings" "time" "code.gitea.io/gitea/models/db" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/graceful" + "code.gitea.io/gitea/modules/indexer/code/base" + "code.gitea.io/gitea/modules/indexer/code/bleve" + "code.gitea.io/gitea/modules/indexer/code/elasticsearch" "code.gitea.io/gitea/modules/indexer/internal" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/process" "code.gitea.io/gitea/modules/queue" "code.gitea.io/gitea/modules/setting" - "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/util" ) -// SearchResult result of performing a search in a repo -type SearchResult struct { - RepoID int64 - StartIndex int - EndIndex int - Filename string - Content string - CommitID string - UpdatedUnix timeutil.TimeStamp - Language string - Color string -} - -// SearchResultLanguages result of top languages count in search results -type SearchResultLanguages struct { - Language string - Color string - Count int -} - -// Indexer defines an interface to index and search code contents -type Indexer interface { - internal.Indexer - Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *repoChanges) error - Delete(repoID int64) error - Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error) -} - -func filenameIndexerID(repoID int64, filename string) string { - return internal.Base36(repoID) + "_" + filename -} - -func parseIndexerID(indexerID string) (int64, string) { - index := strings.IndexByte(indexerID, '_') - if index == -1 { - log.Error("Unexpected ID in repo indexer: %s", indexerID) - } - repoID, _ := internal.ParseBase36(indexerID[:index]) - return repoID, indexerID[index+1:] -} - -func filenameOfIndexerID(indexerID string) string { - index := strings.IndexByte(indexerID, '_') - if index == -1 { - log.Error("Unexpected ID in repo indexer: %s", indexerID) - } - return indexerID[index+1:] -} - -// IndexerData represents data stored in the code indexer -type IndexerData struct { - RepoID int64 -} - var ( - indexerQueue *queue.WorkerPoolQueue[*IndexerData] + indexerQueue *queue.WorkerPoolQueue[*base.IndexerData] holder = internal.NewIndexerHolder() ) -func index(ctx context.Context, indexer Indexer, repoID int64) error { +func index(ctx context.Context, indexer base.Indexer, repoID int64) error { repo, err := repo_model.GetRepositoryByID(ctx, repoID) if repo_model.IsErrRepoNotExist(err) { return indexer.Delete(repoID) @@ -161,8 +108,8 @@ func Init() { // Create the Queue switch setting.Indexer.RepoType { case "bleve", "elasticsearch": - handler := func(items ...*IndexerData) (unhandled []*IndexerData) { - indexer := holder.Get().(Indexer) + handler := func(items ...*base.IndexerData) (unhandled []*base.IndexerData) { + indexer := holder.Get().(base.Indexer) if indexer == nil { log.Warn("Codes indexer handler: indexer is not ready, retry later.") return items @@ -211,7 +158,7 @@ func Init() { pprof.SetGoroutineLabels(ctx) start := time.Now() var ( - rIndexer Indexer + rIndexer base.Indexer existed bool err error ) @@ -226,7 +173,7 @@ func Init() { } }() - rIndexer = NewBleveIndexer(setting.Indexer.RepoPath) + rIndexer = bleve.NewIndexer(setting.Indexer.RepoPath) existed, err = rIndexer.Init() if err != nil { cancel() @@ -244,7 +191,7 @@ func Init() { } }() - rIndexer = NewElasticsearchIndexer(setting.Indexer.RepoConnStr, setting.Indexer.RepoIndexerName) + rIndexer = elasticsearch.NewIndexer(setting.Indexer.RepoConnStr, setting.Indexer.RepoIndexerName) if err != nil { cancel() holder.Get().Close() @@ -310,7 +257,7 @@ func Init() { // UpdateRepoIndexer update a repository's entries in the indexer func UpdateRepoIndexer(repo *repo_model.Repository) { - indexData := &IndexerData{RepoID: repo.ID} + indexData := &base.IndexerData{RepoID: repo.ID} if err := indexerQueue.Push(indexData); err != nil { log.Error("Update repo index data %v failed: %v", indexData, err) } @@ -318,7 +265,7 @@ func UpdateRepoIndexer(repo *repo_model.Repository) { // IsAvailable checks if issue indexer is available func IsAvailable() bool { - idx := holder.Get().(Indexer) + idx := holder.Get().(base.Indexer) if idx == nil { log.Error("IsAvailable(): unable to get indexer") return false @@ -375,7 +322,7 @@ func populateRepoIndexer(ctx context.Context) { return default: } - if err := indexerQueue.Push(&IndexerData{RepoID: id}); err != nil { + if err := indexerQueue.Push(&base.IndexerData{RepoID: id}); err != nil { log.Error("indexerQueue.Push: %v", err) return } diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go index 52f7e76e413f9..1db788403c694 100644 --- a/modules/indexer/code/indexer_test.go +++ b/modules/indexer/code/indexer_test.go @@ -5,11 +5,14 @@ package code import ( "context" + "os" "path/filepath" "testing" "code.gitea.io/gitea/models/unittest" "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/indexer/code/base" + "code.gitea.io/gitea/modules/indexer/code/bleve" _ "code.gitea.io/gitea/models" @@ -22,7 +25,7 @@ func TestMain(m *testing.M) { }) } -func testIndexer(name string, t *testing.T, indexer Indexer) { +func testIndexer(name string, t *testing.T, indexer base.Indexer) { t.Run(name, func(t *testing.T) { var repoID int64 = 1 err := index(git.DefaultContext, indexer, repoID) @@ -84,3 +87,45 @@ func testIndexer(name string, t *testing.T, indexer Indexer) { assert.NoError(t, indexer.Delete(repoID)) }) } + +func TestBleveIndexAndSearch(t *testing.T) { + unittest.PrepareTestEnv(t) + + dir := t.TempDir() + + idx := bleve.NewIndexer(dir) + _, err := idx.Init() + if err != nil { + assert.Fail(t, "Unable to create bleve indexer Error: %v", err) + if idx != nil { + idx.Close() + } + return + } + defer idx.Close() + + testIndexer("beleve", t, idx) +} + +func TestESIndexAndSearch(t *testing.T) { + unittest.PrepareTestEnv(t) + + u := os.Getenv("TEST_INDEXER_CODE_ES_URL") + if u == "" { + t.SkipNow() + return + } + + indexer := NewElasticsearchIndexer(u, "gitea_codes") + if _, err := indexer.Init(); err != nil { + assert.Fail(t, "Unable to init ES indexer Error: %v", err) + if indexer != nil { + indexer.Close() + } + return + } + + defer indexer.Close() + + testIndexer("elastic_search", t, indexer) +} diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go index e69ab3115a6f6..cb662cdfda286 100644 --- a/modules/indexer/code/search.go +++ b/modules/indexer/code/search.go @@ -9,6 +9,7 @@ import ( "strings" "code.gitea.io/gitea/modules/highlight" + "code.gitea.io/gitea/modules/indexer/code/base" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/util" ) @@ -25,6 +26,8 @@ type Result struct { FormattedLines string } +type SearchResultLanguages = base.SearchResultLanguages + func indices(content string, selectionStartIndex, selectionEndIndex int) (int, int) { startIndex := selectionStartIndex numLinesBefore := 0 @@ -61,7 +64,7 @@ func writeStrings(buf *bytes.Buffer, strs ...string) error { return nil } -func searchResult(result *SearchResult, startIndex, endIndex int) (*Result, error) { +func searchResult(result *base.SearchResult, startIndex, endIndex int) (*Result, error) { startLineNum := 1 + strings.Count(result.Content[:startIndex], "\n") var formattedLinesBuffer bytes.Buffer @@ -109,12 +112,12 @@ func searchResult(result *SearchResult, startIndex, endIndex int) (*Result, erro } // PerformSearch perform a search on a repository -func PerformSearch(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int, []*Result, []*SearchResultLanguages, error) { +func PerformSearch(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int, []*Result, []*base.SearchResultLanguages, error) { if len(keyword) == 0 { return 0, nil, nil, nil } - total, results, resultLanguages, err := holder.Get().(Indexer).Search(ctx, repoIDs, language, keyword, page, pageSize, isMatch) + total, results, resultLanguages, err := holder.Get().(base.Indexer).Search(ctx, repoIDs, language, keyword, page, pageSize, isMatch) if err != nil { return 0, nil, nil, err } From cc6d0aafb3fe38637f9ab2984f8d753213640ee5 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Fri, 9 Jun 2023 18:04:28 +0800 Subject: [PATCH 23/43] chore: issues/internal --- modules/indexer/issues/bleve/bleve.go | 28 +++++++++---------- modules/indexer/issues/bleve/bleve_test.go | 4 +-- modules/indexer/issues/db/db.go | 18 ++++++------ .../issues/elasticsearch/elasticsearch.go | 20 ++++++------- modules/indexer/issues/indexer.go | 22 +++++++-------- .../issues/{base => internal}/indexer.go | 2 +- .../issues/{base => internal}/model.go | 2 +- .../indexer/issues/meilisearch/meilisearch.go | 14 +++++----- 8 files changed, 55 insertions(+), 55 deletions(-) rename modules/indexer/issues/{base => internal}/indexer.go (96%) rename modules/indexer/issues/{base => internal}/model.go (97%) diff --git a/modules/indexer/issues/bleve/bleve.go b/modules/indexer/issues/bleve/bleve.go index 2151d0ee298af..f3a3daf139692 100644 --- a/modules/indexer/issues/bleve/bleve.go +++ b/modules/indexer/issues/bleve/bleve.go @@ -6,9 +6,9 @@ package bleve import ( "context" - "code.gitea.io/gitea/modules/indexer/internal" + indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve" - "code.gitea.io/gitea/modules/indexer/issues/base" + "code.gitea.io/gitea/modules/indexer/issues/internal" "github.com/blevesearch/bleve/v2" "github.com/blevesearch/bleve/v2/analysis/analyzer/custom" @@ -54,7 +54,7 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error { const maxBatchSize = 16 // IndexerData an update to the issue indexer -type IndexerData base.IndexerData +type IndexerData internal.IndexerData // Type returns the document type, for bleve's mapping.Classifier interface. func (i *IndexerData) Type() string { @@ -95,12 +95,12 @@ func generateIssueIndexMapping() (mapping.IndexMapping, error) { return mapping, nil } -var _ base.Indexer = &Indexer{} +var _ internal.Indexer = &Indexer{} // Indexer implements Indexer interface type Indexer struct { - inner *inner_bleve.Indexer - internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much + inner *inner_bleve.Indexer + indexer_internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much } // NewIndexer creates a new bleve local indexer @@ -113,10 +113,10 @@ func NewIndexer(indexDir string) *Indexer { } // Index will save the index data -func (b *Indexer) Index(issues []*base.IndexerData) error { +func (b *Indexer) Index(issues []*internal.IndexerData) error { batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) for _, issue := range issues { - if err := batch.Index(internal.Base36(issue.ID), struct { + if err := batch.Index(indexer_internal.Base36(issue.ID), struct { RepoID int64 Title string Content string @@ -137,7 +137,7 @@ func (b *Indexer) Index(issues []*base.IndexerData) error { func (b *Indexer) Delete(ids ...int64) error { batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) for _, id := range ids { - if err := batch.Delete(internal.Base36(id)); err != nil { + if err := batch.Delete(indexer_internal.Base36(id)); err != nil { return err } } @@ -146,7 +146,7 @@ func (b *Indexer) Delete(ids ...int64) error { // Search searches for issues by given conditions. // Returns the matching issue IDs -func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int) (*base.SearchResult, error) { +func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int) (*internal.SearchResult, error) { var repoQueriesP []*query.NumericRangeQuery for _, repoID := range repoIDs { repoQueriesP = append(repoQueriesP, numericEqualityQuery(repoID, "RepoID")) @@ -171,15 +171,15 @@ func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, l return nil, err } - ret := base.SearchResult{ - Hits: make([]base.Match, 0, len(result.Hits)), + ret := internal.SearchResult{ + Hits: make([]internal.Match, 0, len(result.Hits)), } for _, hit := range result.Hits { - id, err := internal.ParseBase36(hit.ID) + id, err := indexer_internal.ParseBase36(hit.ID) if err != nil { return nil, err } - ret.Hits = append(ret.Hits, base.Match{ + ret.Hits = append(ret.Hits, internal.Match{ ID: id, }) } diff --git a/modules/indexer/issues/bleve/bleve_test.go b/modules/indexer/issues/bleve/bleve_test.go index 6fc11ec44a418..8b4dc9ab095aa 100644 --- a/modules/indexer/issues/bleve/bleve_test.go +++ b/modules/indexer/issues/bleve/bleve_test.go @@ -7,7 +7,7 @@ import ( "context" "testing" - "code.gitea.io/gitea/modules/indexer/issues/base" + "code.gitea.io/gitea/modules/indexer/issues/internal" "github.com/stretchr/testify/assert" ) @@ -22,7 +22,7 @@ func TestBleveIndexAndSearch(t *testing.T) { return } - err := indexer.Index([]*base.IndexerData{ + err := indexer.Index([]*internal.IndexerData{ { ID: 1, RepoID: 2, diff --git a/modules/indexer/issues/db/db.go b/modules/indexer/issues/db/db.go index a1addd70d2a3c..39a5d4fce7627 100644 --- a/modules/indexer/issues/db/db.go +++ b/modules/indexer/issues/db/db.go @@ -7,16 +7,16 @@ import ( "context" issues_model "code.gitea.io/gitea/models/issues" - "code.gitea.io/gitea/modules/indexer/internal" + indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_db "code.gitea.io/gitea/modules/indexer/internal/db" - "code.gitea.io/gitea/modules/indexer/issues/base" + "code.gitea.io/gitea/modules/indexer/issues/internal" ) -var _ base.Indexer = &Indexer{} +var _ internal.Indexer = &Indexer{} // Indexer implements Indexer interface to use database's like search type Indexer struct { - internal.Indexer + indexer_internal.Indexer } func NewIndexer() *Indexer { @@ -26,7 +26,7 @@ func NewIndexer() *Indexer { } // Index dummy function -func (i *Indexer) Index(issue []*base.IndexerData) error { +func (i *Indexer) Index(issue []*internal.IndexerData) error { return nil } @@ -36,17 +36,17 @@ func (i *Indexer) Delete(ids ...int64) error { } // Search searches for issues -func (i *Indexer) Search(ctx context.Context, kw string, repoIDs []int64, limit, start int) (*base.SearchResult, error) { +func (i *Indexer) Search(ctx context.Context, kw string, repoIDs []int64, limit, start int) (*internal.SearchResult, error) { total, ids, err := issues_model.SearchIssueIDsByKeyword(ctx, kw, repoIDs, limit, start) if err != nil { return nil, err } - result := base.SearchResult{ + result := internal.SearchResult{ Total: total, - Hits: make([]base.Match, 0, limit), + Hits: make([]internal.Match, 0, limit), } for _, id := range ids { - result.Hits = append(result.Hits, base.Match{ + result.Hits = append(result.Hits, internal.Match{ ID: id, }) } diff --git a/modules/indexer/issues/elasticsearch/elasticsearch.go b/modules/indexer/issues/elasticsearch/elasticsearch.go index 24933fb014cfe..fa4dfc3fd61d4 100644 --- a/modules/indexer/issues/elasticsearch/elasticsearch.go +++ b/modules/indexer/issues/elasticsearch/elasticsearch.go @@ -9,9 +9,9 @@ import ( "strconv" "code.gitea.io/gitea/modules/graceful" - "code.gitea.io/gitea/modules/indexer/internal" + indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" - "code.gitea.io/gitea/modules/indexer/issues/base" + "code.gitea.io/gitea/modules/indexer/issues/internal" "github.com/olivere/elastic/v7" ) @@ -20,12 +20,12 @@ const ( issueIndexerLatestVersion = 2 ) -var _ base.Indexer = &Indexer{} +var _ internal.Indexer = &Indexer{} // Indexer implements Indexer interface type Indexer struct { - inner *inner_elasticsearch.Indexer - internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much + inner *inner_elasticsearch.Indexer + indexer_internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much } // NewIndexer creates a new elasticsearch indexer @@ -68,7 +68,7 @@ const ( ) // Index will save the index data -func (b *Indexer) Index(issues []*base.IndexerData) error { +func (b *Indexer) Index(issues []*internal.IndexerData) error { if len(issues) == 0 { return nil } else if len(issues) == 1 { @@ -140,7 +140,7 @@ func (b *Indexer) Delete(ids ...int64) error { // Search searches for issues by given conditions. // Returns the matching issue IDs -func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int) (*base.SearchResult, error) { +func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int) (*internal.SearchResult, error) { kwQuery := elastic.NewMultiMatchQuery(keyword, "title", "content", "comments") query := elastic.NewBoolQuery() query = query.Must(kwQuery) @@ -162,15 +162,15 @@ func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, l return nil, b.inner.CheckError(err) } - hits := make([]base.Match, 0, limit) + hits := make([]internal.Match, 0, limit) for _, hit := range searchResult.Hits.Hits { id, _ := strconv.ParseInt(hit.Id, 10, 64) - hits = append(hits, base.Match{ + hits = append(hits, internal.Match{ ID: id, }) } - return &base.SearchResult{ + return &internal.SearchResult{ Total: searchResult.TotalHits(), Hits: hits, }, nil diff --git a/modules/indexer/issues/indexer.go b/modules/indexer/issues/indexer.go index da2632492b09c..e4ce550a9d7e0 100644 --- a/modules/indexer/issues/indexer.go +++ b/modules/indexer/issues/indexer.go @@ -14,11 +14,11 @@ import ( issues_model "code.gitea.io/gitea/models/issues" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/graceful" - "code.gitea.io/gitea/modules/indexer/internal" - "code.gitea.io/gitea/modules/indexer/issues/base" + indexer_internal "code.gitea.io/gitea/modules/indexer/internal" "code.gitea.io/gitea/modules/indexer/issues/bleve" "code.gitea.io/gitea/modules/indexer/issues/db" "code.gitea.io/gitea/modules/indexer/issues/elasticsearch" + "code.gitea.io/gitea/modules/indexer/issues/internal" "code.gitea.io/gitea/modules/indexer/issues/meilisearch" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/process" @@ -29,8 +29,8 @@ import ( var ( // issueIndexerQueue queue of issue ids to be updated - issueIndexerQueue *queue.WorkerPoolQueue[*base.IndexerData] - holder = internal.NewIndexerHolder() + issueIndexerQueue *queue.WorkerPoolQueue[*internal.IndexerData] + holder = indexer_internal.NewIndexerHolder() ) // InitIssueIndexer initialize issue indexer, syncReindex is true then reindex until @@ -43,13 +43,13 @@ func InitIssueIndexer(syncReindex bool) { // Create the Queue switch setting.Indexer.IssueType { case "bleve", "elasticsearch", "meilisearch": - handler := func(items ...*base.IndexerData) (unhandled []*base.IndexerData) { - indexer := holder.Get().(base.Indexer) + handler := func(items ...*internal.IndexerData) (unhandled []*internal.IndexerData) { + indexer := holder.Get().(internal.Indexer) if indexer == nil { log.Warn("Issue indexer handler: indexer is not ready, retry later.") return items } - toIndex := make([]*base.IndexerData, 0, len(items)) + toIndex := make([]*internal.IndexerData, 0, len(items)) for _, indexerData := range items { log.Trace("IndexerData Process: %d %v %t", indexerData.ID, indexerData.IDs, indexerData.IsDelete) if indexerData.IsDelete { @@ -80,7 +80,7 @@ func InitIssueIndexer(syncReindex bool) { log.Fatal("Unable to create issue indexer queue") } default: - issueIndexerQueue = queue.CreateSimpleQueue[*base.IndexerData](ctx, "issue_indexer", nil) + issueIndexerQueue = queue.CreateSimpleQueue[*internal.IndexerData](ctx, "issue_indexer", nil) } graceful.GetManager().RunAtTerminate(finished) @@ -253,7 +253,7 @@ func UpdateIssueIndexer(issue *issues_model.Issue) { comments = append(comments, comment.Content) } } - indexerData := &base.IndexerData{ + indexerData := &internal.IndexerData{ ID: issue.ID, RepoID: issue.RepoID, Title: issue.Title, @@ -278,7 +278,7 @@ func DeleteRepoIssueIndexer(ctx context.Context, repo *repo_model.Repository) { if len(ids) == 0 { return } - indexerData := &base.IndexerData{ + indexerData := &internal.IndexerData{ IDs: ids, IsDelete: true, } @@ -291,7 +291,7 @@ func DeleteRepoIssueIndexer(ctx context.Context, repo *repo_model.Repository) { // WARNNING: You have to ensure user have permission to visit repoIDs' issues func SearchIssuesByKeyword(ctx context.Context, repoIDs []int64, keyword string) ([]int64, error) { var issueIDs []int64 - indexer := holder.Get().(base.Indexer) + indexer := holder.Get().(internal.Indexer) if indexer == nil { log.Error("SearchIssuesByKeyword(): unable to get indexer!") diff --git a/modules/indexer/issues/base/indexer.go b/modules/indexer/issues/internal/indexer.go similarity index 96% rename from modules/indexer/issues/base/indexer.go rename to modules/indexer/issues/internal/indexer.go index 8ee68b362e41a..fb73de2f34def 100644 --- a/modules/indexer/issues/base/indexer.go +++ b/modules/indexer/issues/internal/indexer.go @@ -1,7 +1,7 @@ // Copyright 2023 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT -package base +package internal import ( "context" diff --git a/modules/indexer/issues/base/model.go b/modules/indexer/issues/internal/model.go similarity index 97% rename from modules/indexer/issues/base/model.go rename to modules/indexer/issues/internal/model.go index 1a17fccc99d08..8c206fc1cfcdc 100644 --- a/modules/indexer/issues/base/model.go +++ b/modules/indexer/issues/internal/model.go @@ -1,7 +1,7 @@ // Copyright 2023 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT -package base +package internal // IndexerData data stored in the issue indexer type IndexerData struct { diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go index 1d41a975472ad..09199d91b7067 100644 --- a/modules/indexer/issues/meilisearch/meilisearch.go +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -10,12 +10,12 @@ import ( "sync" "time" - "code.gitea.io/gitea/modules/indexer/issues/base" + "code.gitea.io/gitea/modules/indexer/issues/internal" "github.com/meilisearch/meilisearch-go" ) -var _ base.Indexer = &Indexer{} +var _ internal.Indexer = &Indexer{} // Indexer implements Indexer interface type Indexer struct { @@ -82,7 +82,7 @@ func (b *Indexer) Ping() bool { } // Index will save the index data -func (b *Indexer) Index(issues []*base.IndexerData) error { +func (b *Indexer) Index(issues []*internal.IndexerData) error { if len(issues) == 0 { return nil } @@ -114,7 +114,7 @@ func (b *Indexer) Delete(ids ...int64) error { // Search searches for issues by given conditions. // Returns the matching issue IDs -func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int) (*base.SearchResult, error) { +func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int) (*internal.SearchResult, error) { repoFilters := make([]string, 0, len(repoIDs)) for _, repoID := range repoIDs { repoFilters = append(repoFilters, "repo_id = "+strconv.FormatInt(repoID, 10)) @@ -129,13 +129,13 @@ func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, l return nil, b.checkError(err) } - hits := make([]base.Match, 0, len(searchRes.Hits)) + hits := make([]internal.Match, 0, len(searchRes.Hits)) for _, hit := range searchRes.Hits { - hits = append(hits, base.Match{ + hits = append(hits, internal.Match{ ID: int64(hit.(map[string]interface{})["id"].(float64)), }) } - return &base.SearchResult{ + return &internal.SearchResult{ Total: searchRes.TotalHits, Hits: hits, }, nil From a1d48a604f5cc60e108efc25362f5139b722f20e Mon Sep 17 00:00:00 2001 From: Jason Song Date: Fri, 9 Jun 2023 18:07:54 +0800 Subject: [PATCH 24/43] fix: indexer_internal for code --- modules/indexer/code/bleve/bleve.go | 30 ++++++++-------- .../code/elasticsearch/elasticsearch.go | 36 +++++++++---------- modules/indexer/code/git.go | 20 +++++------ modules/indexer/code/indexer.go | 22 ++++++------ modules/indexer/code/indexer_test.go | 4 +-- .../code/{base => internal}/indexer.go | 2 +- .../indexer/code/{base => internal}/model.go | 2 +- .../indexer/code/{base => internal}/util.go | 2 +- modules/indexer/code/search.go | 10 +++--- 9 files changed, 64 insertions(+), 64 deletions(-) rename modules/indexer/code/{base => internal}/indexer.go (97%) rename modules/indexer/code/{base => internal}/model.go (98%) rename modules/indexer/code/{base => internal}/util.go (97%) diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index 82d2a787cfb6c..2d14bdfe017a0 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -16,8 +16,8 @@ import ( "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" - "code.gitea.io/gitea/modules/indexer/code/base" - "code.gitea.io/gitea/modules/indexer/internal" + "code.gitea.io/gitea/modules/indexer/code/internal" + indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_bleve "code.gitea.io/gitea/modules/indexer/internal/bleve" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" @@ -116,12 +116,12 @@ func generateBleveIndexMapping() (mapping.IndexMapping, error) { return mapping, nil } -var _ base.Indexer = &Indexer{} +var _ internal.Indexer = &Indexer{} // Indexer represents a bleve indexer implementation type Indexer struct { - inner *inner_bleve.Indexer - internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much + inner *inner_bleve.Indexer + indexer_internal.Indexer // do not composite inner_bleve.Indexer directly to avoid exposing too much } // NewIndexer creates a new bleve local indexer @@ -134,7 +134,7 @@ func NewIndexer(indexDir string) *Indexer { } func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserError, batchReader *bufio.Reader, commitSha string, - update base.FileUpdate, repo *repo_model.Repository, batch *inner_bleve.FlushingBatch, + update internal.FileUpdate, repo *repo_model.Repository, batch *inner_bleve.FlushingBatch, ) error { // Ignore vendored files in code search if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { @@ -179,7 +179,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro if _, err = batchReader.Discard(1); err != nil { return err } - id := base.FilenameIndexerID(repo.ID, update.Filename) + id := internal.FilenameIndexerID(repo.ID, update.Filename) return batch.Index(id, &RepoIndexerData{ RepoID: repo.ID, CommitID: commitSha, @@ -190,12 +190,12 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro } func (b *Indexer) addDelete(filename string, repo *repo_model.Repository, batch *inner_bleve.FlushingBatch) error { - id := base.FilenameIndexerID(repo.ID, filename) + id := internal.FilenameIndexerID(repo.ID, filename) return batch.Delete(id) } // Index indexes the data -func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *base.RepoChanges) error { +func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error { batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) if len(changes.Updates) > 0 { @@ -242,7 +242,7 @@ func (b *Indexer) Delete(repoID int64) error { // Search searches for files in the specified repo. // Returns the matching file-paths -func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*base.SearchResult, []*base.SearchResultLanguages, error) { +func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { var ( indexerQuery query.Query keywordQuery query.Query @@ -302,7 +302,7 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword total := int64(result.Total) - searchResults := make([]*base.SearchResult, len(result.Hits)) + searchResults := make([]*internal.SearchResult, len(result.Hits)) for i, hit := range result.Hits { startIndex, endIndex := -1, -1 for _, locations := range hit.Locations["Content"] { @@ -321,11 +321,11 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword if t, err := time.Parse(time.RFC3339, hit.Fields["UpdatedAt"].(string)); err == nil { updatedUnix = timeutil.TimeStamp(t.Unix()) } - searchResults[i] = &base.SearchResult{ + searchResults[i] = &internal.SearchResult{ RepoID: int64(hit.Fields["RepoID"].(float64)), StartIndex: startIndex, EndIndex: endIndex, - Filename: base.FilenameOfIndexerID(hit.ID), + Filename: internal.FilenameOfIndexerID(hit.ID), Content: hit.Fields["Content"].(string), CommitID: hit.Fields["CommitID"].(string), UpdatedUnix: updatedUnix, @@ -334,7 +334,7 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword } } - searchResultLanguages := make([]*base.SearchResultLanguages, 0, 10) + searchResultLanguages := make([]*internal.SearchResultLanguages, 0, 10) if len(language) > 0 { // Use separate query to go get all language counts facetRequest := bleve.NewSearchRequestOptions(facetQuery, 1, 0, false) @@ -352,7 +352,7 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword if len(term.Term) == 0 { continue } - searchResultLanguages = append(searchResultLanguages, &base.SearchResultLanguages{ + searchResultLanguages = append(searchResultLanguages, &internal.SearchResultLanguages{ Language: term.Term, Color: enry.GetColor(term.Term), Count: term.Count, diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index 4a5bd64c0e645..2b8128fa16388 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -16,8 +16,8 @@ import ( "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/graceful" - "code.gitea.io/gitea/modules/indexer/code/base" - "code.gitea.io/gitea/modules/indexer/internal" + "code.gitea.io/gitea/modules/indexer/code/internal" + indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" "code.gitea.io/gitea/modules/json" "code.gitea.io/gitea/modules/log" @@ -37,12 +37,12 @@ const ( esMultiMatchTypePhrasePrefix = "phrase_prefix" ) -var _ base.Indexer = &Indexer{} +var _ internal.Indexer = &Indexer{} // Indexer implements Indexer interface type Indexer struct { - inner *inner_elasticsearch.Indexer - internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much + inner *inner_elasticsearch.Indexer + indexer_internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much } // NewIndexer creates a new elasticsearch indexer @@ -85,7 +85,7 @@ const ( }` ) -func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserError, batchReader *bufio.Reader, sha string, update base.FileUpdate, repo *repo_model.Repository) ([]elastic.BulkableRequest, error) { +func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserError, batchReader *bufio.Reader, sha string, update internal.FileUpdate, repo *repo_model.Repository) ([]elastic.BulkableRequest, error) { // Ignore vendored files in code search if setting.Indexer.ExcludeVendored && analyze.IsVendor(update.Filename) { return nil, nil @@ -128,7 +128,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro if _, err = batchReader.Discard(1); err != nil { return nil, err } - id := base.FilenameIndexerID(repo.ID, update.Filename) + id := internal.FilenameIndexerID(repo.ID, update.Filename) return []elastic.BulkableRequest{ elastic.NewBulkIndexRequest(). @@ -145,14 +145,14 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro } func (b *Indexer) addDelete(filename string, repo *repo_model.Repository) elastic.BulkableRequest { - id := base.FilenameIndexerID(repo.ID, filename) + id := internal.FilenameIndexerID(repo.ID, filename) return elastic.NewBulkDeleteRequest(). Index(b.inner.IndexName()). Id(id) } // Index will save the index data -func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *base.RepoChanges) error { +func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *internal.RepoChanges) error { reqs := make([]elastic.BulkableRequest, 0) if len(changes.Updates) > 0 { // Now because of some insanity with git cat-file not immediately failing if not run in a valid git directory we need to run git rev-parse first! @@ -214,8 +214,8 @@ func indexPos(content, start, end string) (int, int) { return startIdx, startIdx + len(start) + endIdx + len(end) } -func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) (int64, []*base.SearchResult, []*base.SearchResultLanguages, error) { - hits := make([]*base.SearchResult, 0, pageSize) +func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { + hits := make([]*internal.SearchResult, 0, pageSize) for _, hit := range searchResult.Hits.Hits { // FIXME: There is no way to get the position the keyword on the content currently on the same request. // So we get it from content, this may made the query slower. See @@ -234,7 +234,7 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) panic(fmt.Sprintf("2===%#v", hit.Highlight)) } - repoID, fileName := base.ParseIndexerID(hit.Id) + repoID, fileName := internal.ParseIndexerID(hit.Id) res := make(map[string]interface{}) if err := json.Unmarshal(hit.Source, &res); err != nil { return 0, nil, nil, err @@ -242,7 +242,7 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) language := res["language"].(string) - hits = append(hits, &base.SearchResult{ + hits = append(hits, &internal.SearchResult{ RepoID: repoID, Filename: fileName, CommitID: res["commit_id"].(string), @@ -258,14 +258,14 @@ func convertResult(searchResult *elastic.SearchResult, kw string, pageSize int) return searchResult.TotalHits(), hits, extractAggs(searchResult), nil } -func extractAggs(searchResult *elastic.SearchResult) []*base.SearchResultLanguages { - var searchResultLanguages []*base.SearchResultLanguages +func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLanguages { + var searchResultLanguages []*internal.SearchResultLanguages agg, found := searchResult.Aggregations.Terms("language") if found { - searchResultLanguages = make([]*base.SearchResultLanguages, 0, 10) + searchResultLanguages = make([]*internal.SearchResultLanguages, 0, 10) for _, bucket := range agg.Buckets { - searchResultLanguages = append(searchResultLanguages, &base.SearchResultLanguages{ + searchResultLanguages = append(searchResultLanguages, &internal.SearchResultLanguages{ Language: bucket.Key.(string), Color: enry.GetColor(bucket.Key.(string)), Count: int(bucket.DocCount), @@ -276,7 +276,7 @@ func extractAggs(searchResult *elastic.SearchResult) []*base.SearchResultLanguag } // Search searches for codes and language stats by given conditions. -func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*base.SearchResult, []*base.SearchResultLanguages, error) { +func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { searchType := esMultiMatchTypeBestFields if isMatch { searchType = esMultiMatchTypePhrasePrefix diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go index c1f623e142822..f6d1fc513ef28 100644 --- a/modules/indexer/code/git.go +++ b/modules/indexer/code/git.go @@ -10,7 +10,7 @@ import ( repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/git" - "code.gitea.io/gitea/modules/indexer/code/base" + "code.gitea.io/gitea/modules/indexer/code/internal" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/setting" ) @@ -24,7 +24,7 @@ func getDefaultBranchSha(ctx context.Context, repo *repo_model.Repository) (stri } // getRepoChanges returns changes to repo since last indexer update -func getRepoChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*base.RepoChanges, error) { +func getRepoChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) { status, err := repo_model.GetIndexerStatus(ctx, repo, repo_model.RepoIndexerTypeCode) if err != nil { return nil, err @@ -55,16 +55,16 @@ func isIndexable(entry *git.TreeEntry) bool { } // parseGitLsTreeOutput parses the output of a `git ls-tree -r --full-name` command -func parseGitLsTreeOutput(stdout []byte) ([]base.FileUpdate, error) { +func parseGitLsTreeOutput(stdout []byte) ([]internal.FileUpdate, error) { entries, err := git.ParseTreeEntries(stdout) if err != nil { return nil, err } idxCount := 0 - updates := make([]base.FileUpdate, len(entries)) + updates := make([]internal.FileUpdate, len(entries)) for _, entry := range entries { if isIndexable(entry) { - updates[idxCount] = base.FileUpdate{ + updates[idxCount] = internal.FileUpdate{ Filename: entry.Name(), BlobSha: entry.ID.String(), Size: entry.Size(), @@ -77,8 +77,8 @@ func parseGitLsTreeOutput(stdout []byte) ([]base.FileUpdate, error) { } // genesisChanges get changes to add repo to the indexer for the first time -func genesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*base.RepoChanges, error) { - var changes base.RepoChanges +func genesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) { + var changes internal.RepoChanges stdout, _, runErr := git.NewCommand(ctx, "ls-tree", "--full-tree", "-l", "-r").AddDynamicArguments(revision).RunStdBytes(&git.RunOpts{Dir: repo.RepoPath()}) if runErr != nil { return nil, runErr @@ -90,20 +90,20 @@ func genesisChanges(ctx context.Context, repo *repo_model.Repository, revision s } // nonGenesisChanges get changes since the previous indexer update -func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*base.RepoChanges, error) { +func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revision string) (*internal.RepoChanges, error) { diffCmd := git.NewCommand(ctx, "diff", "--name-status").AddDynamicArguments(repo.CodeIndexerStatus.CommitSha, revision) stdout, _, runErr := diffCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) if runErr != nil { // previous commit sha may have been removed by a force push, so // try rebuilding from scratch log.Warn("git diff: %v", runErr) - if err := holder.Get().(base.Indexer).Delete(repo.ID); err != nil { + if err := holder.Get().(internal.Indexer).Delete(repo.ID); err != nil { return nil, err } return genesisChanges(ctx, repo, revision) } - var changes base.RepoChanges + var changes internal.RepoChanges var err error updatedFilenames := make([]string, 0, 10) for _, line := range strings.Split(stdout, "\n") { diff --git a/modules/indexer/code/indexer.go b/modules/indexer/code/indexer.go index df61f13b28fb7..a70fda8757d76 100644 --- a/modules/indexer/code/indexer.go +++ b/modules/indexer/code/indexer.go @@ -12,10 +12,10 @@ import ( "code.gitea.io/gitea/models/db" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/graceful" - "code.gitea.io/gitea/modules/indexer/code/base" "code.gitea.io/gitea/modules/indexer/code/bleve" "code.gitea.io/gitea/modules/indexer/code/elasticsearch" - "code.gitea.io/gitea/modules/indexer/internal" + "code.gitea.io/gitea/modules/indexer/code/internal" + indexer_internal "code.gitea.io/gitea/modules/indexer/internal" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/process" "code.gitea.io/gitea/modules/queue" @@ -24,11 +24,11 @@ import ( ) var ( - indexerQueue *queue.WorkerPoolQueue[*base.IndexerData] - holder = internal.NewIndexerHolder() + indexerQueue *queue.WorkerPoolQueue[*internal.IndexerData] + holder = indexer_internal.NewIndexerHolder() ) -func index(ctx context.Context, indexer base.Indexer, repoID int64) error { +func index(ctx context.Context, indexer internal.Indexer, repoID int64) error { repo, err := repo_model.GetRepositoryByID(ctx, repoID) if repo_model.IsErrRepoNotExist(err) { return indexer.Delete(repoID) @@ -108,8 +108,8 @@ func Init() { // Create the Queue switch setting.Indexer.RepoType { case "bleve", "elasticsearch": - handler := func(items ...*base.IndexerData) (unhandled []*base.IndexerData) { - indexer := holder.Get().(base.Indexer) + handler := func(items ...*internal.IndexerData) (unhandled []*internal.IndexerData) { + indexer := holder.Get().(internal.Indexer) if indexer == nil { log.Warn("Codes indexer handler: indexer is not ready, retry later.") return items @@ -158,7 +158,7 @@ func Init() { pprof.SetGoroutineLabels(ctx) start := time.Now() var ( - rIndexer base.Indexer + rIndexer internal.Indexer existed bool err error ) @@ -257,7 +257,7 @@ func Init() { // UpdateRepoIndexer update a repository's entries in the indexer func UpdateRepoIndexer(repo *repo_model.Repository) { - indexData := &base.IndexerData{RepoID: repo.ID} + indexData := &internal.IndexerData{RepoID: repo.ID} if err := indexerQueue.Push(indexData); err != nil { log.Error("Update repo index data %v failed: %v", indexData, err) } @@ -265,7 +265,7 @@ func UpdateRepoIndexer(repo *repo_model.Repository) { // IsAvailable checks if issue indexer is available func IsAvailable() bool { - idx := holder.Get().(base.Indexer) + idx := holder.Get().(internal.Indexer) if idx == nil { log.Error("IsAvailable(): unable to get indexer") return false @@ -322,7 +322,7 @@ func populateRepoIndexer(ctx context.Context) { return default: } - if err := indexerQueue.Push(&base.IndexerData{RepoID: id}); err != nil { + if err := indexerQueue.Push(&internal.IndexerData{RepoID: id}); err != nil { log.Error("indexerQueue.Push: %v", err) return } diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go index 1db788403c694..59b45fff31ff5 100644 --- a/modules/indexer/code/indexer_test.go +++ b/modules/indexer/code/indexer_test.go @@ -11,8 +11,8 @@ import ( "code.gitea.io/gitea/models/unittest" "code.gitea.io/gitea/modules/git" - "code.gitea.io/gitea/modules/indexer/code/base" "code.gitea.io/gitea/modules/indexer/code/bleve" + "code.gitea.io/gitea/modules/indexer/code/internal" _ "code.gitea.io/gitea/models" @@ -25,7 +25,7 @@ func TestMain(m *testing.M) { }) } -func testIndexer(name string, t *testing.T, indexer base.Indexer) { +func testIndexer(name string, t *testing.T, indexer internal.Indexer) { t.Run(name, func(t *testing.T) { var repoID int64 = 1 err := index(git.DefaultContext, indexer, repoID) diff --git a/modules/indexer/code/base/indexer.go b/modules/indexer/code/internal/indexer.go similarity index 97% rename from modules/indexer/code/base/indexer.go rename to modules/indexer/code/internal/indexer.go index 8c38f21f7c475..2bb91d528f5d4 100644 --- a/modules/indexer/code/base/indexer.go +++ b/modules/indexer/code/internal/indexer.go @@ -1,7 +1,7 @@ // Copyright 2023 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT -package base +package internal import ( "context" diff --git a/modules/indexer/code/base/model.go b/modules/indexer/code/internal/model.go similarity index 98% rename from modules/indexer/code/base/model.go rename to modules/indexer/code/internal/model.go index c60a31a5005ee..f75263c83cfe0 100644 --- a/modules/indexer/code/base/model.go +++ b/modules/indexer/code/internal/model.go @@ -1,7 +1,7 @@ // Copyright 2023 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT -package base +package internal import "code.gitea.io/gitea/modules/timeutil" diff --git a/modules/indexer/code/base/util.go b/modules/indexer/code/internal/util.go similarity index 97% rename from modules/indexer/code/base/util.go rename to modules/indexer/code/internal/util.go index d8eda3196e13c..689c4f4584b14 100644 --- a/modules/indexer/code/base/util.go +++ b/modules/indexer/code/internal/util.go @@ -1,7 +1,7 @@ // Copyright 2023 The Gitea Authors. All rights reserved. // SPDX-License-Identifier: MIT -package base +package internal import ( "strings" diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go index cb662cdfda286..db2ab93fc74ab 100644 --- a/modules/indexer/code/search.go +++ b/modules/indexer/code/search.go @@ -9,7 +9,7 @@ import ( "strings" "code.gitea.io/gitea/modules/highlight" - "code.gitea.io/gitea/modules/indexer/code/base" + "code.gitea.io/gitea/modules/indexer/code/internal" "code.gitea.io/gitea/modules/timeutil" "code.gitea.io/gitea/modules/util" ) @@ -26,7 +26,7 @@ type Result struct { FormattedLines string } -type SearchResultLanguages = base.SearchResultLanguages +type SearchResultLanguages = internal.SearchResultLanguages func indices(content string, selectionStartIndex, selectionEndIndex int) (int, int) { startIndex := selectionStartIndex @@ -64,7 +64,7 @@ func writeStrings(buf *bytes.Buffer, strs ...string) error { return nil } -func searchResult(result *base.SearchResult, startIndex, endIndex int) (*Result, error) { +func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Result, error) { startLineNum := 1 + strings.Count(result.Content[:startIndex], "\n") var formattedLinesBuffer bytes.Buffer @@ -112,12 +112,12 @@ func searchResult(result *base.SearchResult, startIndex, endIndex int) (*Result, } // PerformSearch perform a search on a repository -func PerformSearch(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int, []*Result, []*base.SearchResultLanguages, error) { +func PerformSearch(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int, []*Result, []*internal.SearchResultLanguages, error) { if len(keyword) == 0 { return 0, nil, nil, nil } - total, results, resultLanguages, err := holder.Get().(base.Indexer).Search(ctx, repoIDs, language, keyword, page, pageSize, isMatch) + total, results, resultLanguages, err := holder.Get().(internal.Indexer).Search(ctx, repoIDs, language, keyword, page, pageSize, isMatch) if err != nil { return 0, nil, nil, err } From 1bc8d3e31571379b0f1af13d12f9e1434a8814a8 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Fri, 9 Jun 2023 18:32:02 +0800 Subject: [PATCH 25/43] fix: meilisearch --- .../code/elasticsearch/elasticsearch.go | 6 +- .../indexer/internal/meilisearch/indexer.go | 77 ++++++++++++ modules/indexer/internal/meilisearch/util.go | 60 +++++++++ .../issues/elasticsearch/elasticsearch.go | 6 +- modules/indexer/issues/indexer.go | 21 ++-- .../indexer/issues/meilisearch/meilisearch.go | 114 +++--------------- 6 files changed, 169 insertions(+), 115 deletions(-) create mode 100644 modules/indexer/internal/meilisearch/util.go diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index 2b8128fa16388..6fe5b2a7e9d3a 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -47,10 +47,10 @@ type Indexer struct { // NewIndexer creates a new elasticsearch indexer func NewIndexer(url, indexerName string) *Indexer { - in := inner_elasticsearch.NewIndexer(url, indexerName, esRepoIndexerLatestVersion, defaultMapping) + inner := inner_elasticsearch.NewIndexer(url, indexerName, esRepoIndexerLatestVersion, defaultMapping) indexer := &Indexer{ - inner: in, - Indexer: in, + inner: inner, + Indexer: inner, } return indexer } diff --git a/modules/indexer/internal/meilisearch/indexer.go b/modules/indexer/internal/meilisearch/indexer.go index ccc0dfe6c8caf..971e3827b8901 100644 --- a/modules/indexer/internal/meilisearch/indexer.go +++ b/modules/indexer/internal/meilisearch/indexer.go @@ -2,3 +2,80 @@ // SPDX-License-Identifier: MIT package meilisearch + +import ( + "fmt" + "sync" + + "github.com/meilisearch/meilisearch-go" +) + +// Indexer represents a basic meilisearch indexer implementation +type Indexer struct { + Client *meilisearch.Client + + url, apiKey string + indexerName string + available bool + stopTimer chan struct{} + lock sync.RWMutex +} + +func NewIndexer(url, apiKey, indexerName string) *Indexer { + return &Indexer{ + url: url, + apiKey: apiKey, + indexerName: indexerName, + available: false, + stopTimer: make(chan struct{}), + } +} + +// Init initializes the indexer +func (i *Indexer) Init() (bool, error) { + if i == nil { + return false, fmt.Errorf("cannot init nil indexer") + } + + if err := i.initClient(); err != nil { + return false, err + } + _, err := i.Client.GetIndex(i.indexerName) + if err == nil { + return true, nil + } + _, err = i.Client.CreateIndex(&meilisearch.IndexConfig{ + Uid: i.indexerName, + PrimaryKey: "id", + }) + if err != nil { + return false, err + } + + // TODO support version ? + + _, err = i.Client.Index(i.indexerName).UpdateFilterableAttributes(&[]string{"repo_id"}) + return false, err +} + +// Ping checks if the indexer is available +func (i *Indexer) Ping() bool { + if i == nil { + return false + } + i.lock.RLock() + defer i.lock.RUnlock() + return i.available +} + +// Close closes the indexer +func (i *Indexer) Close() { + if i == nil { + return + } + select { + case <-i.stopTimer: + default: + close(i.stopTimer) + } +} diff --git a/modules/indexer/internal/meilisearch/util.go b/modules/indexer/internal/meilisearch/util.go new file mode 100644 index 0000000000000..1378e30ad02d6 --- /dev/null +++ b/modules/indexer/internal/meilisearch/util.go @@ -0,0 +1,60 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package meilisearch + +import ( + "time" + + "github.com/meilisearch/meilisearch-go" +) + +// IndexName returns the full index name with version +func (i *Indexer) IndexName() string { + return i.indexerName +} + +func (i *Indexer) initClient() error { + client := meilisearch.NewClient(meilisearch.ClientConfig{ + Host: i.url, + APIKey: i.apiKey, + }) + + i.Client = client + + i.available = true + ticker := time.NewTicker(10 * time.Second) + go func() { + for { + select { + case <-ticker.C: + i.checkAvailability() + case <-i.stopTimer: + ticker.Stop() + return + } + } + }() + + return nil +} + +func (i *Indexer) checkAvailability() { + _, err := i.Client.Health() + if err != nil { + i.setAvailability(false) + return + } + i.setAvailability(true) +} + +func (i *Indexer) setAvailability(available bool) { + i.lock.Lock() + defer i.lock.Unlock() + + if i.available == available { + return + } + + i.available = available +} diff --git a/modules/indexer/issues/elasticsearch/elasticsearch.go b/modules/indexer/issues/elasticsearch/elasticsearch.go index fa4dfc3fd61d4..7bf9018c0c8a9 100644 --- a/modules/indexer/issues/elasticsearch/elasticsearch.go +++ b/modules/indexer/issues/elasticsearch/elasticsearch.go @@ -30,10 +30,10 @@ type Indexer struct { // NewIndexer creates a new elasticsearch indexer func NewIndexer(url, indexerName string) *Indexer { - in := inner_elasticsearch.NewIndexer(url, indexerName, issueIndexerLatestVersion, defaultMapping) + inner := inner_elasticsearch.NewIndexer(url, indexerName, issueIndexerLatestVersion, defaultMapping) indexer := &Indexer{ - inner: in, - Indexer: in, + inner: inner, + Indexer: inner, } return indexer } diff --git a/modules/indexer/issues/indexer.go b/modules/indexer/issues/indexer.go index e4ce550a9d7e0..bfaa5c29cdce4 100644 --- a/modules/indexer/issues/indexer.go +++ b/modules/indexer/issues/indexer.go @@ -90,7 +90,10 @@ func InitIssueIndexer(syncReindex bool) { pprof.SetGoroutineLabels(ctx) start := time.Now() log.Info("PID %d: Initializing Issue Indexer: %s", os.Getpid(), setting.Indexer.IssueType) - var populate bool + var ( + existed bool + err error + ) switch setting.Indexer.IssueType { case "bleve": defer func() { @@ -103,12 +106,11 @@ func InitIssueIndexer(syncReindex bool) { } }() issueIndexer := bleve.NewIndexer(setting.Indexer.IssuePath) - exist, err := issueIndexer.Init() + existed, err = issueIndexer.Init() if err != nil { holder.Set(nil) log.Fatal("Unable to initialize Bleve Issue Indexer at path: %s Error: %v", setting.Indexer.IssuePath, err) } - populate = !exist holder.Set(issueIndexer) graceful.GetManager().RunAtTerminate(func() { log.Debug("Closing issue indexer") @@ -121,25 +123,20 @@ func InitIssueIndexer(syncReindex bool) { log.Debug("Created Bleve Indexer") case "elasticsearch": issueIndexer := elasticsearch.NewIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueIndexerName) - exist, err := issueIndexer.Init() + existed, err = issueIndexer.Init() if err != nil { log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", setting.Indexer.IssueConnStr, err) } - populate = !exist holder.Set(issueIndexer) case "db": issueIndexer := db.NewIndexer() holder.Set(issueIndexer) case "meilisearch": - issueIndexer, err := meilisearch.NewMeilisearchIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueConnAuth, setting.Indexer.IssueIndexerName) - if err != nil { - log.Fatal("Unable to initialize Meilisearch Issue Indexer at connection: %s Error: %v", setting.Indexer.IssueConnStr, err) - } - exist, err := issueIndexer.Init() + issueIndexer := meilisearch.NewIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueConnAuth, setting.Indexer.IssueIndexerName) + existed, err = issueIndexer.Init() if err != nil { log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", setting.Indexer.IssueConnStr, err) } - populate = !exist holder.Set(issueIndexer) default: holder.Set(nil) @@ -150,7 +147,7 @@ func InitIssueIndexer(syncReindex bool) { go graceful.GetManager().RunWithCancel(issueIndexerQueue) // Populate the index - if populate { + if !existed { if syncReindex { graceful.GetManager().RunWithShutdownContext(populateIssueIndexer) } else { diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go index 09199d91b7067..241932d95180d 100644 --- a/modules/indexer/issues/meilisearch/meilisearch.go +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -7,9 +7,9 @@ import ( "context" "strconv" "strings" - "sync" - "time" + indexer_internal "code.gitea.io/gitea/modules/indexer/internal" + inner_meilisearch "code.gitea.io/gitea/modules/indexer/internal/meilisearch" "code.gitea.io/gitea/modules/indexer/issues/internal" "github.com/meilisearch/meilisearch-go" @@ -19,66 +19,19 @@ var _ internal.Indexer = &Indexer{} // Indexer implements Indexer interface type Indexer struct { - client *meilisearch.Client - indexerName string - available bool - stopTimer chan struct{} - lock sync.RWMutex -} - -// Indexer creates a new meilisearch indexer -func NewMeilisearchIndexer(url, apiKey, indexerName string) (*Indexer, error) { - client := meilisearch.NewClient(meilisearch.ClientConfig{ - Host: url, - APIKey: apiKey, - }) - - indexer := &Indexer{ - client: client, - indexerName: indexerName, - available: true, - stopTimer: make(chan struct{}), - } - - ticker := time.NewTicker(10 * time.Second) - go func() { - for { - select { - case <-ticker.C: - indexer.checkAvailability() - case <-indexer.stopTimer: - ticker.Stop() - return - } - } - }() + inner *inner_meilisearch.Indexer + indexer_internal.Indexer // do not composite inner_meilisearch.Indexer directly to avoid exposing too much - return indexer, nil } -// Init will initialize the indexer -func (b *Indexer) Init() (bool, error) { - _, err := b.client.GetIndex(b.indexerName) - if err == nil { - return true, nil - } - _, err = b.client.CreateIndex(&meilisearch.IndexConfig{ - Uid: b.indexerName, - PrimaryKey: "id", - }) - if err != nil { - return false, b.checkError(err) +// NewIndexer creates a new meilisearch indexer +func NewIndexer(url, apiKey, indexerName string) *Indexer { + inner := inner_meilisearch.NewIndexer(url, apiKey, indexerName) + indexer := &Indexer{ + inner: inner, + Indexer: inner, } - - _, err = b.client.Index(b.indexerName).UpdateFilterableAttributes(&[]string{"repo_id"}) - return false, b.checkError(err) -} - -// Ping checks if meilisearch is available -func (b *Indexer) Ping() bool { - b.lock.RLock() - defer b.lock.RUnlock() - return b.available + return indexer } // Index will save the index data @@ -87,9 +40,9 @@ func (b *Indexer) Index(issues []*internal.IndexerData) error { return nil } for _, issue := range issues { - _, err := b.client.Index(b.indexerName).AddDocuments(issue) + _, err := b.inner.Client.Index(b.inner.IndexName()).AddDocuments(issue) if err != nil { - return b.checkError(err) + return err } } // TODO: bulk send index data @@ -103,9 +56,9 @@ func (b *Indexer) Delete(ids ...int64) error { } for _, id := range ids { - _, err := b.client.Index(b.indexerName).DeleteDocument(strconv.FormatInt(id, 10)) + _, err := b.inner.Client.Index(b.inner.IndexName()).DeleteDocument(strconv.FormatInt(id, 10)) if err != nil { - return b.checkError(err) + return err } } // TODO: bulk send deletes @@ -120,13 +73,13 @@ func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, l repoFilters = append(repoFilters, "repo_id = "+strconv.FormatInt(repoID, 10)) } filter := strings.Join(repoFilters, " OR ") - searchRes, err := b.client.Index(b.indexerName).Search(keyword, &meilisearch.SearchRequest{ + searchRes, err := b.inner.Client.Index(b.inner.IndexName()).Search(keyword, &meilisearch.SearchRequest{ Filter: filter, Limit: int64(limit), Offset: int64(start), }) if err != nil { - return nil, b.checkError(err) + return nil, err } hits := make([]internal.Match, 0, len(searchRes.Hits)) @@ -140,36 +93,3 @@ func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, l Hits: hits, }, nil } - -// Close implements indexer -func (b *Indexer) Close() { - select { - case <-b.stopTimer: - default: - close(b.stopTimer) - } -} - -func (b *Indexer) checkError(err error) error { - return err -} - -func (b *Indexer) checkAvailability() { - _, err := b.client.Health() - if err != nil { - b.setAvailability(false) - return - } - b.setAvailability(true) -} - -func (b *Indexer) setAvailability(available bool) { - b.lock.Lock() - defer b.lock.Unlock() - - if b.available == available { - return - } - - b.available = available -} From 419ea3f46e92efb9fb9626aa8f44b671b3f7dbc5 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Fri, 9 Jun 2023 20:00:06 +0800 Subject: [PATCH 26/43] fix: test --- modules/indexer/code/indexer_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go index 59b45fff31ff5..4231378201375 100644 --- a/modules/indexer/code/indexer_test.go +++ b/modules/indexer/code/indexer_test.go @@ -12,6 +12,7 @@ import ( "code.gitea.io/gitea/models/unittest" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/indexer/code/bleve" + "code.gitea.io/gitea/modules/indexer/code/elasticsearch" "code.gitea.io/gitea/modules/indexer/code/internal" _ "code.gitea.io/gitea/models" @@ -116,7 +117,7 @@ func TestESIndexAndSearch(t *testing.T) { return } - indexer := NewElasticsearchIndexer(u, "gitea_codes") + indexer := elasticsearch.NewIndexer(u, "gitea_codes") if _, err := indexer.Init(); err != nil { assert.Fail(t, "Unable to init ES indexer Error: %v", err) if indexer != nil { From afc43376c3b6d7be635e1f616f976dee40ace88e Mon Sep 17 00:00:00 2001 From: Jason Song Date: Fri, 9 Jun 2023 20:08:23 +0800 Subject: [PATCH 27/43] fix: format code --- modules/indexer/issues/meilisearch/meilisearch.go | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go index 241932d95180d..fe7ed6288c93e 100644 --- a/modules/indexer/issues/meilisearch/meilisearch.go +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -21,7 +21,6 @@ var _ internal.Indexer = &Indexer{} type Indexer struct { inner *inner_meilisearch.Indexer indexer_internal.Indexer // do not composite inner_meilisearch.Indexer directly to avoid exposing too much - } // NewIndexer creates a new meilisearch indexer From ff0ab3d06fcea1e2ad95dfe124def6215f379296 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Tue, 13 Jun 2023 17:20:50 +0800 Subject: [PATCH 28/43] fix: rewrite ping --- modules/context/repo.go | 2 +- .../code/elasticsearch/elasticsearch.go | 12 ++-- modules/indexer/code/indexer.go | 12 ++-- modules/indexer/internal/bleve/indexer.go | 35 ++++++---- modules/indexer/internal/db/indexer.go | 8 ++- .../indexer/internal/elasticsearch/indexer.go | 50 +++++++------- .../indexer/internal/elasticsearch/util.go | 66 +------------------ modules/indexer/internal/indexer.go | 6 +- .../indexer/internal/meilisearch/indexer.go | 45 ++++++++----- modules/indexer/internal/meilisearch/util.go | 51 -------------- .../issues/elasticsearch/elasticsearch.go | 10 +-- modules/indexer/issues/indexer.go | 18 ++--- routers/web/explore/code.go | 4 +- routers/web/repo/issue.go | 2 +- routers/web/repo/search.go | 4 +- routers/web/user/code.go | 4 +- 16 files changed, 121 insertions(+), 208 deletions(-) diff --git a/modules/context/repo.go b/modules/context/repo.go index fd5f20857663e..003309f1b01ec 100644 --- a/modules/context/repo.go +++ b/modules/context/repo.go @@ -593,7 +593,7 @@ func RepoAssignment(ctx *Context) (cancel context.CancelFunc) { ctx.Data["RepoSearchEnabled"] = setting.Indexer.RepoIndexerEnabled if setting.Indexer.RepoIndexerEnabled { - ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable() + ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx) } if ctx.IsSigned { diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index 6fe5b2a7e9d3a..79dcabc865fc8 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -185,7 +185,7 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st Index(b.inner.IndexName()). Add(reqs...). Do(ctx) - return b.inner.CheckError(err) + return err } return nil } @@ -195,7 +195,7 @@ func (b *Indexer) Delete(repoID int64) error { _, err := b.inner.Client.DeleteByQuery(b.inner.IndexName()). Query(elastic.NewTermsQuery("repo_id", repoID)). Do(graceful.GetManager().HammerContext()) - return b.inner.CheckError(err) + return err } // indexPos find words positions for start and the following end on content. It will @@ -319,7 +319,7 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword From(start).Size(pageSize). Do(ctx) if err != nil { - return 0, nil, nil, b.inner.CheckError(err) + return 0, nil, nil, err } return convertResult(searchResult, kw, pageSize) @@ -330,10 +330,10 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword Index(b.inner.IndexName()). Aggregation("language", aggregation). Query(query). - Size(0). // We only needs stats information + Size(0). // We only need stats information Do(ctx) if err != nil { - return 0, nil, nil, b.inner.CheckError(err) + return 0, nil, nil, err } query = query.Must(langQuery) @@ -350,7 +350,7 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword From(start).Size(pageSize). Do(ctx) if err != nil { - return 0, nil, nil, b.inner.CheckError(err) + return 0, nil, nil, err } total, hits, _, err := convertResult(searchResult, kw, pageSize) diff --git a/modules/indexer/code/indexer.go b/modules/indexer/code/indexer.go index a70fda8757d76..31e866ebe73ad 100644 --- a/modules/indexer/code/indexer.go +++ b/modules/indexer/code/indexer.go @@ -133,8 +133,8 @@ func Init() { code.gitea.io/gitea/modules/indexer/code.index(indexer.go:105) */ if err := index(ctx, indexer, indexerData.RepoID); err != nil { - if !indexer.Ping() { - log.Error("Code indexer handler: indexer is unavailable.") + if err := indexer.Ping(ctx); err != nil { + log.Error("Code indexer handler: indexer is unavailable: %v.", err) unhandled = append(unhandled, indexerData) continue } @@ -174,7 +174,7 @@ func Init() { }() rIndexer = bleve.NewIndexer(setting.Indexer.RepoPath) - existed, err = rIndexer.Init() + existed, err = rIndexer.Init(ctx) if err != nil { cancel() holder.Get().Close() @@ -198,7 +198,7 @@ func Init() { close(waitChannel) log.Fatal("PID: %d Unable to create the elasticsearch Repository Indexer connstr: %s Error: %v", os.Getpid(), setting.Indexer.RepoConnStr, err) } - existed, err = rIndexer.Init() + existed, err = rIndexer.Init(ctx) if err != nil { cancel() holder.Get().Close() @@ -264,14 +264,14 @@ func UpdateRepoIndexer(repo *repo_model.Repository) { } // IsAvailable checks if issue indexer is available -func IsAvailable() bool { +func IsAvailable(ctx context.Context) bool { idx := holder.Get().(internal.Indexer) if idx == nil { log.Error("IsAvailable(): unable to get indexer") return false } - return idx.Ping() + return idx.Ping(ctx) == nil } // populateRepoIndexer populate the repo indexer with pre-existing data. This diff --git a/modules/indexer/internal/bleve/indexer.go b/modules/indexer/internal/bleve/indexer.go index 44f8c48804d50..9392939698faf 100644 --- a/modules/indexer/internal/bleve/indexer.go +++ b/modules/indexer/internal/bleve/indexer.go @@ -4,6 +4,7 @@ package bleve import ( + "context" "fmt" "code.gitea.io/gitea/modules/indexer/internal" @@ -36,16 +37,21 @@ func NewIndexer(indexDir string, version int, mappingGetter func() (mapping.Inde } // Init initializes the indexer -func (i *Indexer) Init() (bool, error) { +func (i *Indexer) Init(_ context.Context) (bool, error) { if i == nil { return false, fmt.Errorf("cannot init nil indexer") } - var err error - i.Indexer, err = openIndexer(i.indexDir, i.version) + + if i.Indexer != nil { + return false, fmt.Errorf("indexer is already initialized") + } + + indexer, err := openIndexer(i.indexDir, i.version) if err != nil { return false, err } - if i.Indexer != nil { + if indexer != nil { + i.Indexer = indexer return true, nil } @@ -54,7 +60,7 @@ func (i *Indexer) Init() (bool, error) { return false, err } - i.Indexer, err = bleve.New(i.indexDir, indexMapping) + indexer, err = bleve.New(i.indexDir, indexMapping) if err != nil { return false, err } @@ -65,24 +71,29 @@ func (i *Indexer) Init() (bool, error) { return false, err } + i.Indexer = indexer + return false, nil } // Ping checks if the indexer is available -func (i *Indexer) Ping() bool { +func (i *Indexer) Ping(_ context.Context) error { if i == nil { - return false + return fmt.Errorf("cannot ping nil indexer") } - return i.Indexer != nil + if i.Indexer == nil { + return fmt.Errorf("indexer is not initialized") + } + return nil } func (i *Indexer) Close() { if i == nil { return } - if indexer := i.Indexer; indexer != nil { - if err := indexer.Close(); err != nil { - log.Error("Failed to close bleve indexer in %q: %v", i.indexDir, err) - } + + if err := i.Indexer.Close(); err != nil { + log.Error("Failed to close bleve indexer in %q: %v", i.indexDir, err) } + i.Indexer = nil } diff --git a/modules/indexer/internal/db/indexer.go b/modules/indexer/internal/db/indexer.go index c03c2a40e5bb4..3f7e00efbb715 100644 --- a/modules/indexer/internal/db/indexer.go +++ b/modules/indexer/internal/db/indexer.go @@ -4,6 +4,8 @@ package db import ( + "context" + "code.gitea.io/gitea/modules/indexer/internal" ) @@ -13,16 +15,16 @@ var _ internal.Indexer = &Indexer{} type Indexer struct{} // Init initializes the indexer -func (i *Indexer) Init() (bool, error) { +func (i *Indexer) Init(_ context.Context) (bool, error) { // nothing to do return false, nil } // Ping checks if the indexer is available -func (i *Indexer) Ping() bool { +func (i *Indexer) Ping(_ context.Context) error { // No need to ping database to check if it is available. // If the database goes down, Gitea will go down, so nobody will care if the indexer is available. - return true + return nil } // Close closes the indexer diff --git a/modules/indexer/internal/elasticsearch/indexer.go b/modules/indexer/internal/elasticsearch/indexer.go index 7398632cbcbb9..5013c17ea6714 100644 --- a/modules/indexer/internal/elasticsearch/indexer.go +++ b/modules/indexer/internal/elasticsearch/indexer.go @@ -4,10 +4,9 @@ package elasticsearch import ( + "context" "fmt" - "sync" - "code.gitea.io/gitea/modules/graceful" "code.gitea.io/gitea/modules/indexer/internal" "github.com/olivere/elastic/v7" @@ -23,10 +22,6 @@ type Indexer struct { indexAliasName string version int mapping string - - available bool - stopTimer chan struct{} - lock sync.RWMutex } func NewIndexer(url, indexName string, version int, mapping string) *Indexer { @@ -35,46 +30,57 @@ func NewIndexer(url, indexName string, version int, mapping string) *Indexer { indexAliasName: indexName, version: version, mapping: mapping, - available: false, - stopTimer: make(chan struct{}), } } // Init initializes the indexer -func (i *Indexer) Init() (bool, error) { +func (i *Indexer) Init(ctx context.Context) (bool, error) { if i == nil { return false, fmt.Errorf("cannot init nil indexer") } + if i.Client != nil { + return false, fmt.Errorf("indexer is already initialized") + } - if err := i.initClient(); err != nil { + client, err := i.initClient() + if err != nil { return false, err } - - ctx := graceful.GetManager().HammerContext() + i.Client = client exists, err := i.Client.IndexExists(i.IndexName()).Do(ctx) if err != nil { - return false, i.CheckError(err) + return false, err } if exists { return true, nil } if err := i.createIndex(ctx); err != nil { - return false, i.CheckError(err) + return false, err } return exists, nil } // Ping checks if the indexer is available -func (i *Indexer) Ping() bool { +func (i *Indexer) Ping(ctx context.Context) error { if i == nil { - return false + return fmt.Errorf("cannot ping nil indexer") } - i.lock.RLock() - defer i.lock.RUnlock() - return i.available + if i.Client == nil { + return fmt.Errorf("indexer is not initialized") + } + + resp, err := i.Client.ClusterHealth().Do(ctx) + if err != nil { + return err + } + if resp.Status != "green" { + // see https://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-health.html + return fmt.Errorf("status of elasticsearch cluster is %s", resp.Status) + } + return nil } // Close closes the indexer @@ -82,9 +88,5 @@ func (i *Indexer) Close() { if i == nil { return } - select { - case <-i.stopTimer: - default: - close(i.stopTimer) - } + i.Client = nil } diff --git a/modules/indexer/internal/elasticsearch/util.go b/modules/indexer/internal/elasticsearch/util.go index 18d8c72265557..82b60dd5887cb 100644 --- a/modules/indexer/internal/elasticsearch/util.go +++ b/modules/indexer/internal/elasticsearch/util.go @@ -5,60 +5,19 @@ package elasticsearch import ( "context" - "errors" "fmt" - "net" "time" - "code.gitea.io/gitea/modules/graceful" "code.gitea.io/gitea/modules/log" "github.com/olivere/elastic/v7" ) -// CheckError checks if the error is a connection error and sets the availability -func (i *Indexer) CheckError(err error) error { - var opErr *net.OpError - if !(elastic.IsConnErr(err) || (errors.As(err, &opErr) && (opErr.Op == "dial" || opErr.Op == "read"))) { - return err - } - - i.setAvailability(false) - - return err -} - // IndexName returns the full index name with version func (i *Indexer) IndexName() string { return fmt.Sprintf("%s.v%d", i.indexAliasName, i.version) } -func (i *Indexer) setAvailability(available bool) { - i.lock.Lock() - defer i.lock.Unlock() - - if i.available == available { - return - } - - i.available = available -} - -func (i *Indexer) checkAvailability() { - if i.Ping() { - return - } - - // Request cluster state to check if elastic is available again - _, err := i.Client.ClusterState().Do(graceful.GetManager().ShutdownContext()) - if err != nil { - i.setAvailability(false) - return - } - - i.setAvailability(true) -} - func (i *Indexer) createIndex(ctx context.Context) error { createIndex, err := i.Client.CreateIndex(i.IndexName()).BodyString(i.mapping).Do(ctx) if err != nil { @@ -103,7 +62,7 @@ func (i *Indexer) createIndex(ctx context.Context) error { return nil } -func (i *Indexer) initClient() error { +func (i *Indexer) initClient() (*elastic.Client, error) { opts := []elastic.ClientOptionFunc{ elastic.SetURL(i.url), elastic.SetSniff(false), @@ -117,26 +76,5 @@ func (i *Indexer) initClient() error { opts = append(opts, elastic.SetInfoLog(&log.PrintfLogger{Logf: logger.Info})) opts = append(opts, elastic.SetErrorLog(&log.PrintfLogger{Logf: logger.Error})) - client, err := elastic.NewClient(opts...) - if err != nil { - return err - } - - i.Client = client - - i.available = true - ticker := time.NewTicker(10 * time.Second) - go func() { - for { - select { - case <-ticker.C: - i.checkAvailability() - case <-i.stopTimer: - ticker.Stop() - return - } - } - }() - - return nil + return elastic.NewClient(opts...) } diff --git a/modules/indexer/internal/indexer.go b/modules/indexer/internal/indexer.go index c71fc86ce8cb2..4f50e0f722d3d 100644 --- a/modules/indexer/internal/indexer.go +++ b/modules/indexer/internal/indexer.go @@ -3,13 +3,15 @@ package internal +import "context" + // Indexer defines an basic indexer interface type Indexer interface { // Init initializes the indexer // returns true if the index was opened/existed (with data populated), false if it was created/not-existed (with no data) - Init() (bool, error) + Init(ctx context.Context) (bool, error) // Ping checks if the indexer is available - Ping() bool + Ping(ctx context.Context) error // Close closes the indexer Close() } diff --git a/modules/indexer/internal/meilisearch/indexer.go b/modules/indexer/internal/meilisearch/indexer.go index 971e3827b8901..a196ec717f58a 100644 --- a/modules/indexer/internal/meilisearch/indexer.go +++ b/modules/indexer/internal/meilisearch/indexer.go @@ -4,8 +4,8 @@ package meilisearch import ( + "context" "fmt" - "sync" "github.com/meilisearch/meilisearch-go" ) @@ -16,9 +16,6 @@ type Indexer struct { url, apiKey string indexerName string - available bool - stopTimer chan struct{} - lock sync.RWMutex } func NewIndexer(url, apiKey, indexerName string) *Indexer { @@ -26,20 +23,24 @@ func NewIndexer(url, apiKey, indexerName string) *Indexer { url: url, apiKey: apiKey, indexerName: indexerName, - available: false, - stopTimer: make(chan struct{}), } } // Init initializes the indexer -func (i *Indexer) Init() (bool, error) { +func (i *Indexer) Init(_ context.Context) (bool, error) { if i == nil { return false, fmt.Errorf("cannot init nil indexer") } - if err := i.initClient(); err != nil { - return false, err + if i.Client != nil { + return false, fmt.Errorf("indexer is already initialized") } + + i.Client = meilisearch.NewClient(meilisearch.ClientConfig{ + Host: i.url, + APIKey: i.apiKey, + }) + _, err := i.Client.GetIndex(i.indexerName) if err == nil { return true, nil @@ -59,13 +60,22 @@ func (i *Indexer) Init() (bool, error) { } // Ping checks if the indexer is available -func (i *Indexer) Ping() bool { +func (i *Indexer) Ping(ctx context.Context) error { if i == nil { - return false + return fmt.Errorf("cannot ping nil indexer") + } + if i.Client == nil { + return fmt.Errorf("indexer is not initialized") } - i.lock.RLock() - defer i.lock.RUnlock() - return i.available + resp, err := i.Client.Health() + if err != nil { + return err + } + if resp.Status != "available" { + // See https://docs.meilisearch.com/reference/api/health.html#status + return fmt.Errorf("status of meilisearch is not available: %s", resp.Status) + } + return nil } // Close closes the indexer @@ -73,9 +83,8 @@ func (i *Indexer) Close() { if i == nil { return } - select { - case <-i.stopTimer: - default: - close(i.stopTimer) + if i.Client == nil { + return } + i.Client = nil } diff --git a/modules/indexer/internal/meilisearch/util.go b/modules/indexer/internal/meilisearch/util.go index 1378e30ad02d6..385511da2046a 100644 --- a/modules/indexer/internal/meilisearch/util.go +++ b/modules/indexer/internal/meilisearch/util.go @@ -3,58 +3,7 @@ package meilisearch -import ( - "time" - - "github.com/meilisearch/meilisearch-go" -) - // IndexName returns the full index name with version func (i *Indexer) IndexName() string { return i.indexerName } - -func (i *Indexer) initClient() error { - client := meilisearch.NewClient(meilisearch.ClientConfig{ - Host: i.url, - APIKey: i.apiKey, - }) - - i.Client = client - - i.available = true - ticker := time.NewTicker(10 * time.Second) - go func() { - for { - select { - case <-ticker.C: - i.checkAvailability() - case <-i.stopTimer: - ticker.Stop() - return - } - } - }() - - return nil -} - -func (i *Indexer) checkAvailability() { - _, err := i.Client.Health() - if err != nil { - i.setAvailability(false) - return - } - i.setAvailability(true) -} - -func (i *Indexer) setAvailability(available bool) { - i.lock.Lock() - defer i.lock.Unlock() - - if i.available == available { - return - } - - i.available = available -} diff --git a/modules/indexer/issues/elasticsearch/elasticsearch.go b/modules/indexer/issues/elasticsearch/elasticsearch.go index 7bf9018c0c8a9..921250cdd8db1 100644 --- a/modules/indexer/issues/elasticsearch/elasticsearch.go +++ b/modules/indexer/issues/elasticsearch/elasticsearch.go @@ -84,7 +84,7 @@ func (b *Indexer) Index(issues []*internal.IndexerData) error { "comments": issue.Comments, }). Do(graceful.GetManager().HammerContext()) - return b.inner.CheckError(err) + return err } reqs := make([]elastic.BulkableRequest, 0) @@ -107,7 +107,7 @@ func (b *Indexer) Index(issues []*internal.IndexerData) error { Index(b.inner.IndexName()). Add(reqs...). Do(graceful.GetManager().HammerContext()) - return b.inner.CheckError(err) + return err } // Delete deletes indexes by ids @@ -119,7 +119,7 @@ func (b *Indexer) Delete(ids ...int64) error { Index(b.inner.IndexName()). Id(fmt.Sprintf("%d", ids[0])). Do(graceful.GetManager().HammerContext()) - return b.inner.CheckError(err) + return err } reqs := make([]elastic.BulkableRequest, 0) @@ -135,7 +135,7 @@ func (b *Indexer) Delete(ids ...int64) error { Index(b.inner.IndexName()). Add(reqs...). Do(graceful.GetManager().HammerContext()) - return b.inner.CheckError(err) + return err } // Search searches for issues by given conditions. @@ -159,7 +159,7 @@ func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, l From(start).Size(limit). Do(ctx) if err != nil { - return nil, b.inner.CheckError(err) + return nil, err } hits := make([]internal.Match, 0, limit) diff --git a/modules/indexer/issues/indexer.go b/modules/indexer/issues/indexer.go index bfaa5c29cdce4..a003ce76ca2c5 100644 --- a/modules/indexer/issues/indexer.go +++ b/modules/indexer/issues/indexer.go @@ -55,8 +55,8 @@ func InitIssueIndexer(syncReindex bool) { if indexerData.IsDelete { if err := indexer.Delete(indexerData.IDs...); err != nil { log.Error("Issue indexer handler: failed to from index: %v Error: %v", indexerData.IDs, err) - if !indexer.Ping() { - log.Error("Issue indexer handler: indexer is unavailable when deleting") + if err := indexer.Ping(ctx); err != nil { + log.Error("Issue indexer handler: indexer is unavailable when deleting: %v", err) unhandled = append(unhandled, indexerData) } } @@ -66,8 +66,8 @@ func InitIssueIndexer(syncReindex bool) { } if err := indexer.Index(toIndex); err != nil { log.Error("Error whilst indexing: %v Error: %v", toIndex, err) - if !indexer.Ping() { - log.Error("Issue indexer handler: indexer is unavailable when indexing") + if err := indexer.Ping(ctx); err != nil { + log.Error("Issue indexer handler: indexer is unavailable when indexing: %v", err) unhandled = append(unhandled, toIndex...) } } @@ -106,7 +106,7 @@ func InitIssueIndexer(syncReindex bool) { } }() issueIndexer := bleve.NewIndexer(setting.Indexer.IssuePath) - existed, err = issueIndexer.Init() + existed, err = issueIndexer.Init(ctx) if err != nil { holder.Set(nil) log.Fatal("Unable to initialize Bleve Issue Indexer at path: %s Error: %v", setting.Indexer.IssuePath, err) @@ -123,7 +123,7 @@ func InitIssueIndexer(syncReindex bool) { log.Debug("Created Bleve Indexer") case "elasticsearch": issueIndexer := elasticsearch.NewIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueIndexerName) - existed, err = issueIndexer.Init() + existed, err = issueIndexer.Init(ctx) if err != nil { log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", setting.Indexer.IssueConnStr, err) } @@ -133,7 +133,7 @@ func InitIssueIndexer(syncReindex bool) { holder.Set(issueIndexer) case "meilisearch": issueIndexer := meilisearch.NewIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueConnAuth, setting.Indexer.IssueIndexerName) - existed, err = issueIndexer.Init() + existed, err = issueIndexer.Init(ctx) if err != nil { log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", setting.Indexer.IssueConnStr, err) } @@ -305,12 +305,12 @@ func SearchIssuesByKeyword(ctx context.Context, repoIDs []int64, keyword string) } // IsAvailable checks if issue indexer is available -func IsAvailable() bool { +func IsAvailable(ctx context.Context) bool { indexer := holder.Get() if indexer == nil { log.Error("IsAvailable(): unable to get indexer!") return false } - return indexer.Ping() + return indexer.Ping(ctx) == nil } diff --git a/routers/web/explore/code.go b/routers/web/explore/code.go index 942b1f83789b8..94d83818fc635 100644 --- a/routers/web/explore/code.go +++ b/routers/web/explore/code.go @@ -79,13 +79,13 @@ func Code(ctx *context.Context) { if (len(repoIDs) > 0) || isAdmin { total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isMatch) if err != nil { - if code_indexer.IsAvailable() { + if code_indexer.IsAvailable(ctx) { ctx.ServerError("SearchResults", err) return } ctx.Data["CodeIndexerUnavailable"] = true } else { - ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable() + ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx) } loadRepoIDs := make([]int64, 0, len(searchResults)) diff --git a/routers/web/repo/issue.go b/routers/web/repo/issue.go index 5ab8db2e057fe..18a466fd852f1 100644 --- a/routers/web/repo/issue.go +++ b/routers/web/repo/issue.go @@ -191,7 +191,7 @@ func issues(ctx *context.Context, milestoneID, projectID int64, isPullOption uti if len(keyword) > 0 { issueIDs, err = issue_indexer.SearchIssuesByKeyword(ctx, []int64{repo.ID}, keyword) if err != nil { - if issue_indexer.IsAvailable() { + if issue_indexer.IsAvailable(ctx) { ctx.ServerError("issueIndexer.Search", err) return } diff --git a/routers/web/repo/search.go b/routers/web/repo/search.go index a043198472ae6..3c0fa4bc00ea0 100644 --- a/routers/web/repo/search.go +++ b/routers/web/repo/search.go @@ -45,13 +45,13 @@ func Search(ctx *context.Context) { total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, []int64{ctx.Repo.Repository.ID}, language, keyword, page, setting.UI.RepoSearchPagingNum, isMatch) if err != nil { - if code_indexer.IsAvailable() { + if code_indexer.IsAvailable(ctx) { ctx.ServerError("SearchResults", err) return } ctx.Data["CodeIndexerUnavailable"] = true } else { - ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable() + ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx) } ctx.Data["SourcePath"] = ctx.Repo.Repository.Link() diff --git a/routers/web/user/code.go b/routers/web/user/code.go index b3adbcb8d3a8f..15524de7d651e 100644 --- a/routers/web/user/code.go +++ b/routers/web/user/code.go @@ -71,13 +71,13 @@ func CodeSearch(ctx *context.Context) { if len(repoIDs) > 0 { total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isMatch) if err != nil { - if code_indexer.IsAvailable() { + if code_indexer.IsAvailable(ctx) { ctx.ServerError("SearchResults", err) return } ctx.Data["CodeIndexerUnavailable"] = true } else { - ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable() + ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx) } loadRepoIDs := make([]int64, 0, len(searchResults)) From 136fa3f2d793cdcf4c4d60bdc812cb544bdc9e2b Mon Sep 17 00:00:00 2001 From: Jason Song Date: Tue, 13 Jun 2023 17:25:12 +0800 Subject: [PATCH 29/43] fix: use context --- modules/indexer/code/bleve/bleve.go | 2 +- modules/indexer/code/elasticsearch/elasticsearch.go | 5 ++--- modules/indexer/code/git.go | 2 +- modules/indexer/code/indexer.go | 2 +- modules/indexer/code/internal/indexer.go | 2 +- modules/indexer/issues/bleve/bleve.go | 4 ++-- modules/indexer/issues/db/db.go | 4 ++-- modules/indexer/issues/elasticsearch/elasticsearch.go | 8 ++++---- modules/indexer/issues/indexer.go | 4 ++-- modules/indexer/issues/internal/indexer.go | 4 ++-- modules/indexer/issues/meilisearch/meilisearch.go | 4 ++-- 11 files changed, 20 insertions(+), 21 deletions(-) diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index 2d14bdfe017a0..33cc4e02b5149 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -224,7 +224,7 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st } // Delete deletes indexes by ids -func (b *Indexer) Delete(repoID int64) error { +func (b *Indexer) Delete(_ context.Context, repoID int64) error { query := numericEqualityQuery(repoID, "RepoID") searchRequest := bleve.NewSearchRequestOptions(query, 2147483647, 0, false) result, err := b.inner.Indexer.Search(searchRequest) diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index 79dcabc865fc8..7c394f1a6c316 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -15,7 +15,6 @@ import ( "code.gitea.io/gitea/modules/analyze" "code.gitea.io/gitea/modules/charset" "code.gitea.io/gitea/modules/git" - "code.gitea.io/gitea/modules/graceful" "code.gitea.io/gitea/modules/indexer/code/internal" indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" @@ -191,10 +190,10 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st } // Delete deletes indexes by ids -func (b *Indexer) Delete(repoID int64) error { +func (b *Indexer) Delete(ctx context.Context, repoID int64) error { _, err := b.inner.Client.DeleteByQuery(b.inner.IndexName()). Query(elastic.NewTermsQuery("repo_id", repoID)). - Do(graceful.GetManager().HammerContext()) + Do(ctx) return err } diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go index f6d1fc513ef28..36d2ed231ea39 100644 --- a/modules/indexer/code/git.go +++ b/modules/indexer/code/git.go @@ -97,7 +97,7 @@ func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revisio // previous commit sha may have been removed by a force push, so // try rebuilding from scratch log.Warn("git diff: %v", runErr) - if err := holder.Get().(internal.Indexer).Delete(repo.ID); err != nil { + if err := holder.Get().(internal.Indexer).Delete(ctx, repo.ID); err != nil { return nil, err } return genesisChanges(ctx, repo, revision) diff --git a/modules/indexer/code/indexer.go b/modules/indexer/code/indexer.go index 31e866ebe73ad..d6f42bef83097 100644 --- a/modules/indexer/code/indexer.go +++ b/modules/indexer/code/indexer.go @@ -31,7 +31,7 @@ var ( func index(ctx context.Context, indexer internal.Indexer, repoID int64) error { repo, err := repo_model.GetRepositoryByID(ctx, repoID) if repo_model.IsErrRepoNotExist(err) { - return indexer.Delete(repoID) + return indexer.Delete(ctx, repoID) } if err != nil { return err diff --git a/modules/indexer/code/internal/indexer.go b/modules/indexer/code/internal/indexer.go index 2bb91d528f5d4..0358a24d542d3 100644 --- a/modules/indexer/code/internal/indexer.go +++ b/modules/indexer/code/internal/indexer.go @@ -14,6 +14,6 @@ import ( type Indexer interface { internal.Indexer Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error - Delete(repoID int64) error + Delete(ctx context.Context, repoID int64) error Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error) } diff --git a/modules/indexer/issues/bleve/bleve.go b/modules/indexer/issues/bleve/bleve.go index f3a3daf139692..bb0bc4b04a414 100644 --- a/modules/indexer/issues/bleve/bleve.go +++ b/modules/indexer/issues/bleve/bleve.go @@ -113,7 +113,7 @@ func NewIndexer(indexDir string) *Indexer { } // Index will save the index data -func (b *Indexer) Index(issues []*internal.IndexerData) error { +func (b *Indexer) Index(_ context.Context, issues []*internal.IndexerData) error { batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) for _, issue := range issues { if err := batch.Index(indexer_internal.Base36(issue.ID), struct { @@ -134,7 +134,7 @@ func (b *Indexer) Index(issues []*internal.IndexerData) error { } // Delete deletes indexes by ids -func (b *Indexer) Delete(ids ...int64) error { +func (b *Indexer) Delete(_ context.Context, ids ...int64) error { batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize) for _, id := range ids { if err := batch.Delete(indexer_internal.Base36(id)); err != nil { diff --git a/modules/indexer/issues/db/db.go b/modules/indexer/issues/db/db.go index 39a5d4fce7627..17ed426b384ba 100644 --- a/modules/indexer/issues/db/db.go +++ b/modules/indexer/issues/db/db.go @@ -26,12 +26,12 @@ func NewIndexer() *Indexer { } // Index dummy function -func (i *Indexer) Index(issue []*internal.IndexerData) error { +func (i *Indexer) Index(_ context.Context, _ []*internal.IndexerData) error { return nil } // Delete dummy function -func (i *Indexer) Delete(ids ...int64) error { +func (i *Indexer) Delete(_ context.Context, _ ...int64) error { return nil } diff --git a/modules/indexer/issues/elasticsearch/elasticsearch.go b/modules/indexer/issues/elasticsearch/elasticsearch.go index 921250cdd8db1..8194c35bfe6d3 100644 --- a/modules/indexer/issues/elasticsearch/elasticsearch.go +++ b/modules/indexer/issues/elasticsearch/elasticsearch.go @@ -68,7 +68,7 @@ const ( ) // Index will save the index data -func (b *Indexer) Index(issues []*internal.IndexerData) error { +func (b *Indexer) Index(ctx context.Context, issues []*internal.IndexerData) error { if len(issues) == 0 { return nil } else if len(issues) == 1 { @@ -83,7 +83,7 @@ func (b *Indexer) Index(issues []*internal.IndexerData) error { "content": issue.Content, "comments": issue.Comments, }). - Do(graceful.GetManager().HammerContext()) + Do(ctx) return err } @@ -111,14 +111,14 @@ func (b *Indexer) Index(issues []*internal.IndexerData) error { } // Delete deletes indexes by ids -func (b *Indexer) Delete(ids ...int64) error { +func (b *Indexer) Delete(ctx context.Context, ids ...int64) error { if len(ids) == 0 { return nil } else if len(ids) == 1 { _, err := b.inner.Client.Delete(). Index(b.inner.IndexName()). Id(fmt.Sprintf("%d", ids[0])). - Do(graceful.GetManager().HammerContext()) + Do(ctx) return err } diff --git a/modules/indexer/issues/indexer.go b/modules/indexer/issues/indexer.go index a003ce76ca2c5..5c58eef2ec1a9 100644 --- a/modules/indexer/issues/indexer.go +++ b/modules/indexer/issues/indexer.go @@ -53,7 +53,7 @@ func InitIssueIndexer(syncReindex bool) { for _, indexerData := range items { log.Trace("IndexerData Process: %d %v %t", indexerData.ID, indexerData.IDs, indexerData.IsDelete) if indexerData.IsDelete { - if err := indexer.Delete(indexerData.IDs...); err != nil { + if err := indexer.Delete(ctx, indexerData.IDs...); err != nil { log.Error("Issue indexer handler: failed to from index: %v Error: %v", indexerData.IDs, err) if err := indexer.Ping(ctx); err != nil { log.Error("Issue indexer handler: indexer is unavailable when deleting: %v", err) @@ -64,7 +64,7 @@ func InitIssueIndexer(syncReindex bool) { } toIndex = append(toIndex, indexerData) } - if err := indexer.Index(toIndex); err != nil { + if err := indexer.Index(ctx, toIndex); err != nil { log.Error("Error whilst indexing: %v Error: %v", toIndex, err) if err := indexer.Ping(ctx); err != nil { log.Error("Issue indexer handler: indexer is unavailable when indexing: %v", err) diff --git a/modules/indexer/issues/internal/indexer.go b/modules/indexer/issues/internal/indexer.go index fb73de2f34def..cc676788d25d0 100644 --- a/modules/indexer/issues/internal/indexer.go +++ b/modules/indexer/issues/internal/indexer.go @@ -12,7 +12,7 @@ import ( // Indexer defines an interface to indexer issues contents type Indexer interface { internal.Indexer - Index(issue []*IndexerData) error - Delete(ids ...int64) error + Index(ctx context.Context, issue []*IndexerData) error + Delete(ctx context.Context, ids ...int64) error Search(ctx context.Context, kw string, repoIDs []int64, limit, start int) (*SearchResult, error) } diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go index fe7ed6288c93e..fb5188f07b8d5 100644 --- a/modules/indexer/issues/meilisearch/meilisearch.go +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -34,7 +34,7 @@ func NewIndexer(url, apiKey, indexerName string) *Indexer { } // Index will save the index data -func (b *Indexer) Index(issues []*internal.IndexerData) error { +func (b *Indexer) Index(_ context.Context, issues []*internal.IndexerData) error { if len(issues) == 0 { return nil } @@ -49,7 +49,7 @@ func (b *Indexer) Index(issues []*internal.IndexerData) error { } // Delete deletes indexes by ids -func (b *Indexer) Delete(ids ...int64) error { +func (b *Indexer) Delete(_ context.Context, ids ...int64) error { if len(ids) == 0 { return nil } From d2707dab9f00d8b0ad9dba409382f1f277198650 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Tue, 13 Jun 2023 17:39:02 +0800 Subject: [PATCH 30/43] fix: check all old versions index --- .../indexer/internal/elasticsearch/util.go | 52 +++++++------------ 1 file changed, 20 insertions(+), 32 deletions(-) diff --git a/modules/indexer/internal/elasticsearch/util.go b/modules/indexer/internal/elasticsearch/util.go index 82b60dd5887cb..a80301f6fc878 100644 --- a/modules/indexer/internal/elasticsearch/util.go +++ b/modules/indexer/internal/elasticsearch/util.go @@ -15,7 +15,11 @@ import ( // IndexName returns the full index name with version func (i *Indexer) IndexName() string { - return fmt.Sprintf("%s.v%d", i.indexAliasName, i.version) + return formatIndexName(i.indexAliasName, i.version) +} + +func formatIndexName(indexAliasName string, version int) string { + return fmt.Sprintf("%s.v%d", indexAliasName, version) } func (i *Indexer) createIndex(ctx context.Context) error { @@ -27,37 +31,7 @@ func (i *Indexer) createIndex(ctx context.Context) error { return fmt.Errorf("create index %s with %s failed", i.IndexName(), i.mapping) } - // check version - r, err := i.Client.Aliases().Do(ctx) - if err != nil { - return err - } - - realIndexerNames := r.IndicesByAlias(i.indexAliasName) - if len(realIndexerNames) < 1 { - res, err := i.Client.Alias(). - Add(i.IndexName(), i.indexAliasName). - Do(ctx) - if err != nil { - return err - } - if !res.Acknowledged { - return fmt.Errorf("create alias %s to index %s failed", i.indexAliasName, i.IndexName()) - } - } else if len(realIndexerNames) >= 1 && realIndexerNames[0] < i.IndexName() { - log.Warn("Found older gitea indexer named %s, but we will create a new one %s and keep the old NOT DELETED. You can delete the old version after the upgrade succeed.", - realIndexerNames[0], i.IndexName()) - res, err := i.Client.Alias(). - Remove(realIndexerNames[0], i.indexAliasName). - Add(i.IndexName(), i.indexAliasName). - Do(ctx) - if err != nil { - return err - } - if !res.Acknowledged { - return fmt.Errorf("change alias %s to index %s failed", i.indexAliasName, i.IndexName()) - } - } + i.checkOldIndexes(ctx) return nil } @@ -78,3 +52,17 @@ func (i *Indexer) initClient() (*elastic.Client, error) { return elastic.NewClient(opts...) } + +func (i *Indexer) checkOldIndexes(ctx context.Context) { + i.checkOldIndex(ctx, i.indexAliasName) // Old index name without version + for v := 1; v < i.version; v++ { + i.checkOldIndex(ctx, formatIndexName(i.indexAliasName, v)) + } +} + +func (i *Indexer) checkOldIndex(ctx context.Context, indexName string) { + exists, err := i.Client.IndexExists(indexName).Do(ctx) + if err == nil && exists { + log.Warn("Found older elasticsearch index named %q, Gitea will keep the old NOT DELETED. You can delete the old version after the upgrade succeed.", indexName) + } +} From 77d788bf072002d84039294159ed5211d641d845 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Tue, 13 Jun 2023 17:45:07 +0800 Subject: [PATCH 31/43] fix: VersionedIndexName --- .../code/elasticsearch/elasticsearch.go | 14 +++++++------- .../indexer/internal/elasticsearch/indexer.go | 18 +++++++++--------- modules/indexer/internal/elasticsearch/util.go | 18 +++++++++--------- .../issues/elasticsearch/elasticsearch.go | 14 +++++++------- 4 files changed, 32 insertions(+), 32 deletions(-) diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index 7c394f1a6c316..88054585cd28f 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -131,7 +131,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro return []elastic.BulkableRequest{ elastic.NewBulkIndexRequest(). - Index(b.inner.IndexName()). + Index(b.inner.VersionedIndexName()). Id(id). Doc(map[string]interface{}{ "repo_id": repo.ID, @@ -146,7 +146,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro func (b *Indexer) addDelete(filename string, repo *repo_model.Repository) elastic.BulkableRequest { id := internal.FilenameIndexerID(repo.ID, filename) return elastic.NewBulkDeleteRequest(). - Index(b.inner.IndexName()). + Index(b.inner.VersionedIndexName()). Id(id) } @@ -181,7 +181,7 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st if len(reqs) > 0 { _, err := b.inner.Client.Bulk(). - Index(b.inner.IndexName()). + Index(b.inner.VersionedIndexName()). Add(reqs...). Do(ctx) return err @@ -191,7 +191,7 @@ func (b *Indexer) Index(ctx context.Context, repo *repo_model.Repository, sha st // Delete deletes indexes by ids func (b *Indexer) Delete(ctx context.Context, repoID int64) error { - _, err := b.inner.Client.DeleteByQuery(b.inner.IndexName()). + _, err := b.inner.Client.DeleteByQuery(b.inner.VersionedIndexName()). Query(elastic.NewTermsQuery("repo_id", repoID)). Do(ctx) return err @@ -305,7 +305,7 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword if len(language) == 0 { searchResult, err := b.inner.Client.Search(). - Index(b.inner.IndexName()). + Index(b.inner.VersionedIndexName()). Aggregation("language", aggregation). Query(query). Highlight( @@ -326,7 +326,7 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword langQuery := elastic.NewMatchQuery("language", language) countResult, err := b.inner.Client.Search(). - Index(b.inner.IndexName()). + Index(b.inner.VersionedIndexName()). Aggregation("language", aggregation). Query(query). Size(0). // We only need stats information @@ -337,7 +337,7 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword query = query.Must(langQuery) searchResult, err := b.inner.Client.Search(). - Index(b.inner.IndexName()). + Index(b.inner.VersionedIndexName()). Query(query). Highlight( elastic.NewHighlight(). diff --git a/modules/indexer/internal/elasticsearch/indexer.go b/modules/indexer/internal/elasticsearch/indexer.go index 5013c17ea6714..2c60efad564fc 100644 --- a/modules/indexer/internal/elasticsearch/indexer.go +++ b/modules/indexer/internal/elasticsearch/indexer.go @@ -18,18 +18,18 @@ var _ internal.Indexer = &Indexer{} type Indexer struct { Client *elastic.Client - url string - indexAliasName string - version int - mapping string + url string + indexName string + version int + mapping string } func NewIndexer(url, indexName string, version int, mapping string) *Indexer { return &Indexer{ - url: url, - indexAliasName: indexName, - version: version, - mapping: mapping, + url: url, + indexName: indexName, + version: version, + mapping: mapping, } } @@ -48,7 +48,7 @@ func (i *Indexer) Init(ctx context.Context) (bool, error) { } i.Client = client - exists, err := i.Client.IndexExists(i.IndexName()).Do(ctx) + exists, err := i.Client.IndexExists(i.VersionedIndexName()).Do(ctx) if err != nil { return false, err } diff --git a/modules/indexer/internal/elasticsearch/util.go b/modules/indexer/internal/elasticsearch/util.go index a80301f6fc878..7656c5487f527 100644 --- a/modules/indexer/internal/elasticsearch/util.go +++ b/modules/indexer/internal/elasticsearch/util.go @@ -13,22 +13,22 @@ import ( "github.com/olivere/elastic/v7" ) -// IndexName returns the full index name with version -func (i *Indexer) IndexName() string { - return formatIndexName(i.indexAliasName, i.version) +// VersionedIndexName returns the full index name with version +func (i *Indexer) VersionedIndexName() string { + return versionedIndexName(i.indexName, i.version) } -func formatIndexName(indexAliasName string, version int) string { - return fmt.Sprintf("%s.v%d", indexAliasName, version) +func versionedIndexName(indexName string, version int) string { + return fmt.Sprintf("%s.v%d", indexName, version) } func (i *Indexer) createIndex(ctx context.Context) error { - createIndex, err := i.Client.CreateIndex(i.IndexName()).BodyString(i.mapping).Do(ctx) + createIndex, err := i.Client.CreateIndex(i.VersionedIndexName()).BodyString(i.mapping).Do(ctx) if err != nil { return err } if !createIndex.Acknowledged { - return fmt.Errorf("create index %s with %s failed", i.IndexName(), i.mapping) + return fmt.Errorf("create index %s with %s failed", i.VersionedIndexName(), i.mapping) } i.checkOldIndexes(ctx) @@ -54,9 +54,9 @@ func (i *Indexer) initClient() (*elastic.Client, error) { } func (i *Indexer) checkOldIndexes(ctx context.Context) { - i.checkOldIndex(ctx, i.indexAliasName) // Old index name without version + i.checkOldIndex(ctx, i.indexName) // Old index name without version for v := 1; v < i.version; v++ { - i.checkOldIndex(ctx, formatIndexName(i.indexAliasName, v)) + i.checkOldIndex(ctx, versionedIndexName(i.indexName, v)) } } diff --git a/modules/indexer/issues/elasticsearch/elasticsearch.go b/modules/indexer/issues/elasticsearch/elasticsearch.go index 8194c35bfe6d3..9e2b4645cd0f1 100644 --- a/modules/indexer/issues/elasticsearch/elasticsearch.go +++ b/modules/indexer/issues/elasticsearch/elasticsearch.go @@ -74,7 +74,7 @@ func (b *Indexer) Index(ctx context.Context, issues []*internal.IndexerData) err } else if len(issues) == 1 { issue := issues[0] _, err := b.inner.Client.Index(). - Index(b.inner.IndexName()). + Index(b.inner.VersionedIndexName()). Id(fmt.Sprintf("%d", issue.ID)). BodyJson(map[string]interface{}{ "id": issue.ID, @@ -91,7 +91,7 @@ func (b *Indexer) Index(ctx context.Context, issues []*internal.IndexerData) err for _, issue := range issues { reqs = append(reqs, elastic.NewBulkIndexRequest(). - Index(b.inner.IndexName()). + Index(b.inner.VersionedIndexName()). Id(fmt.Sprintf("%d", issue.ID)). Doc(map[string]interface{}{ "id": issue.ID, @@ -104,7 +104,7 @@ func (b *Indexer) Index(ctx context.Context, issues []*internal.IndexerData) err } _, err := b.inner.Client.Bulk(). - Index(b.inner.IndexName()). + Index(b.inner.VersionedIndexName()). Add(reqs...). Do(graceful.GetManager().HammerContext()) return err @@ -116,7 +116,7 @@ func (b *Indexer) Delete(ctx context.Context, ids ...int64) error { return nil } else if len(ids) == 1 { _, err := b.inner.Client.Delete(). - Index(b.inner.IndexName()). + Index(b.inner.VersionedIndexName()). Id(fmt.Sprintf("%d", ids[0])). Do(ctx) return err @@ -126,13 +126,13 @@ func (b *Indexer) Delete(ctx context.Context, ids ...int64) error { for _, id := range ids { reqs = append(reqs, elastic.NewBulkDeleteRequest(). - Index(b.inner.IndexName()). + Index(b.inner.VersionedIndexName()). Id(fmt.Sprintf("%d", id)), ) } _, err := b.inner.Client.Bulk(). - Index(b.inner.IndexName()). + Index(b.inner.VersionedIndexName()). Add(reqs...). Do(graceful.GetManager().HammerContext()) return err @@ -153,7 +153,7 @@ func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, l query = query.Must(repoQuery) } searchResult, err := b.inner.Client.Search(). - Index(b.inner.IndexName()). + Index(b.inner.VersionedIndexName()). Query(query). Sort("_score", false). From(start).Size(limit). From 45c26bc349aee5ad3bbbcbd9794bf4f23db74ad6 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Tue, 13 Jun 2023 18:01:36 +0800 Subject: [PATCH 32/43] feat: meilisearch index version --- .../indexer/internal/meilisearch/indexer.go | 20 +++++++------ modules/indexer/internal/meilisearch/util.go | 30 +++++++++++++++++-- .../indexer/issues/meilisearch/meilisearch.go | 12 +++++--- 3 files changed, 46 insertions(+), 16 deletions(-) diff --git a/modules/indexer/internal/meilisearch/indexer.go b/modules/indexer/internal/meilisearch/indexer.go index a196ec717f58a..06747ff7e07ae 100644 --- a/modules/indexer/internal/meilisearch/indexer.go +++ b/modules/indexer/internal/meilisearch/indexer.go @@ -15,14 +15,16 @@ type Indexer struct { Client *meilisearch.Client url, apiKey string - indexerName string + indexName string + version int } -func NewIndexer(url, apiKey, indexerName string) *Indexer { +func NewIndexer(url, apiKey, indexName string, version int) *Indexer { return &Indexer{ - url: url, - apiKey: apiKey, - indexerName: indexerName, + url: url, + apiKey: apiKey, + indexName: indexName, + version: version, } } @@ -41,21 +43,21 @@ func (i *Indexer) Init(_ context.Context) (bool, error) { APIKey: i.apiKey, }) - _, err := i.Client.GetIndex(i.indexerName) + _, err := i.Client.GetIndex(i.VersionedIndexName()) if err == nil { return true, nil } _, err = i.Client.CreateIndex(&meilisearch.IndexConfig{ - Uid: i.indexerName, + Uid: i.VersionedIndexName(), PrimaryKey: "id", }) if err != nil { return false, err } - // TODO support version ? + i.checkOldIndexes() - _, err = i.Client.Index(i.indexerName).UpdateFilterableAttributes(&[]string{"repo_id"}) + _, err = i.Client.Index(i.VersionedIndexName()).UpdateFilterableAttributes(&[]string{"repo_id"}) return false, err } diff --git a/modules/indexer/internal/meilisearch/util.go b/modules/indexer/internal/meilisearch/util.go index 385511da2046a..34df93e13e8dd 100644 --- a/modules/indexer/internal/meilisearch/util.go +++ b/modules/indexer/internal/meilisearch/util.go @@ -3,7 +3,31 @@ package meilisearch -// IndexName returns the full index name with version -func (i *Indexer) IndexName() string { - return i.indexerName +import ( + "fmt" + + "code.gitea.io/gitea/modules/log" +) + +// VersionedIndexName returns the full index name with version +func (i *Indexer) VersionedIndexName() string { + return versionedIndexName(i.indexName, i.version) +} + +func versionedIndexName(indexName string, version int) string { + return fmt.Sprintf("%s.v%d", indexName, version) +} + +func (i *Indexer) checkOldIndexes() { + i.checkOldIndex(i.indexName) // Old index name without version + for v := 1; v < i.version; v++ { + i.checkOldIndex(versionedIndexName(i.indexName, v)) + } +} + +func (i *Indexer) checkOldIndex(indexName string) { + _, err := i.Client.GetIndex(indexName) + if err == nil { + log.Warn("Found older meilisearch index named %q, Gitea will keep the old NOT DELETED. You can delete the old version after the upgrade succeed.", indexName) + } } diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go index fb5188f07b8d5..09ab806cc87ae 100644 --- a/modules/indexer/issues/meilisearch/meilisearch.go +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -15,6 +15,10 @@ import ( "github.com/meilisearch/meilisearch-go" ) +const ( + issueIndexerLatestVersion = 1 +) + var _ internal.Indexer = &Indexer{} // Indexer implements Indexer interface @@ -25,7 +29,7 @@ type Indexer struct { // NewIndexer creates a new meilisearch indexer func NewIndexer(url, apiKey, indexerName string) *Indexer { - inner := inner_meilisearch.NewIndexer(url, apiKey, indexerName) + inner := inner_meilisearch.NewIndexer(url, apiKey, indexerName, issueIndexerLatestVersion) indexer := &Indexer{ inner: inner, Indexer: inner, @@ -39,7 +43,7 @@ func (b *Indexer) Index(_ context.Context, issues []*internal.IndexerData) error return nil } for _, issue := range issues { - _, err := b.inner.Client.Index(b.inner.IndexName()).AddDocuments(issue) + _, err := b.inner.Client.Index(b.inner.VersionedIndexName()).AddDocuments(issue) if err != nil { return err } @@ -55,7 +59,7 @@ func (b *Indexer) Delete(_ context.Context, ids ...int64) error { } for _, id := range ids { - _, err := b.inner.Client.Index(b.inner.IndexName()).DeleteDocument(strconv.FormatInt(id, 10)) + _, err := b.inner.Client.Index(b.inner.VersionedIndexName()).DeleteDocument(strconv.FormatInt(id, 10)) if err != nil { return err } @@ -72,7 +76,7 @@ func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, l repoFilters = append(repoFilters, "repo_id = "+strconv.FormatInt(repoID, 10)) } filter := strings.Join(repoFilters, " OR ") - searchRes, err := b.inner.Client.Index(b.inner.IndexName()).Search(keyword, &meilisearch.SearchRequest{ + searchRes, err := b.inner.Client.Index(b.inner.VersionedIndexName()).Search(keyword, &meilisearch.SearchRequest{ Filter: filter, Limit: int64(limit), Offset: int64(start), From aa7a79565a7d39cd7b80aec60f9b721ada5c9b0d Mon Sep 17 00:00:00 2001 From: Jason Song Date: Wed, 14 Jun 2023 11:02:08 +0800 Subject: [PATCH 33/43] test: fix --- modules/indexer/code/indexer_test.go | 6 +++--- modules/indexer/issues/bleve/bleve_test.go | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go index 4231378201375..55616a0361e21 100644 --- a/modules/indexer/code/indexer_test.go +++ b/modules/indexer/code/indexer_test.go @@ -85,7 +85,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { }) } - assert.NoError(t, indexer.Delete(repoID)) + assert.NoError(t, indexer.Delete(context.Background(), repoID)) }) } @@ -95,7 +95,7 @@ func TestBleveIndexAndSearch(t *testing.T) { dir := t.TempDir() idx := bleve.NewIndexer(dir) - _, err := idx.Init() + _, err := idx.Init(context.Background()) if err != nil { assert.Fail(t, "Unable to create bleve indexer Error: %v", err) if idx != nil { @@ -118,7 +118,7 @@ func TestESIndexAndSearch(t *testing.T) { } indexer := elasticsearch.NewIndexer(u, "gitea_codes") - if _, err := indexer.Init(); err != nil { + if _, err := indexer.Init(context.Background()); err != nil { assert.Fail(t, "Unable to init ES indexer Error: %v", err) if indexer != nil { indexer.Close() diff --git a/modules/indexer/issues/bleve/bleve_test.go b/modules/indexer/issues/bleve/bleve_test.go index 8b4dc9ab095aa..f890f8eb488fe 100644 --- a/modules/indexer/issues/bleve/bleve_test.go +++ b/modules/indexer/issues/bleve/bleve_test.go @@ -17,12 +17,12 @@ func TestBleveIndexAndSearch(t *testing.T) { indexer := NewIndexer(dir) defer indexer.Close() - if _, err := indexer.Init(); err != nil { + if _, err := indexer.Init(context.Background()); err != nil { assert.Fail(t, "Unable to initialize bleve indexer: %v", err) return } - err := indexer.Index([]*internal.IndexerData{ + err := indexer.Index(context.Background(), []*internal.IndexerData{ { ID: 1, RepoID: 2, From 01f6235d69cc1358df13882fb32f5dec3bf0aa4b Mon Sep 17 00:00:00 2001 From: Jason Song Date: Wed, 14 Jun 2023 11:11:25 +0800 Subject: [PATCH 34/43] feat: support version 0 --- .../indexer/internal/elasticsearch/util.go | 20 +++++++++---------- modules/indexer/internal/meilisearch/util.go | 20 +++++++++---------- .../issues/elasticsearch/elasticsearch.go | 2 +- .../indexer/issues/meilisearch/meilisearch.go | 2 +- 4 files changed, 22 insertions(+), 22 deletions(-) diff --git a/modules/indexer/internal/elasticsearch/util.go b/modules/indexer/internal/elasticsearch/util.go index 7656c5487f527..9e034bd553095 100644 --- a/modules/indexer/internal/elasticsearch/util.go +++ b/modules/indexer/internal/elasticsearch/util.go @@ -19,6 +19,10 @@ func (i *Indexer) VersionedIndexName() string { } func versionedIndexName(indexName string, version int) string { + if version == 0 { + // Old index name without version + return indexName + } return fmt.Sprintf("%s.v%d", indexName, version) } @@ -54,15 +58,11 @@ func (i *Indexer) initClient() (*elastic.Client, error) { } func (i *Indexer) checkOldIndexes(ctx context.Context) { - i.checkOldIndex(ctx, i.indexName) // Old index name without version - for v := 1; v < i.version; v++ { - i.checkOldIndex(ctx, versionedIndexName(i.indexName, v)) - } -} - -func (i *Indexer) checkOldIndex(ctx context.Context, indexName string) { - exists, err := i.Client.IndexExists(indexName).Do(ctx) - if err == nil && exists { - log.Warn("Found older elasticsearch index named %q, Gitea will keep the old NOT DELETED. You can delete the old version after the upgrade succeed.", indexName) + for v := 0; v < i.version; v++ { + indexName := versionedIndexName(i.indexName, v) + exists, err := i.Client.IndexExists(indexName).Do(ctx) + if err == nil && exists { + log.Warn("Found older elasticsearch index named %q, Gitea will keep the old NOT DELETED. You can delete the old version after the upgrade succeed.", indexName) + } } } diff --git a/modules/indexer/internal/meilisearch/util.go b/modules/indexer/internal/meilisearch/util.go index 34df93e13e8dd..44d42e2f7c9e6 100644 --- a/modules/indexer/internal/meilisearch/util.go +++ b/modules/indexer/internal/meilisearch/util.go @@ -15,19 +15,19 @@ func (i *Indexer) VersionedIndexName() string { } func versionedIndexName(indexName string, version int) string { + if version == 0 { + // Old index name without version + return indexName + } return fmt.Sprintf("%s.v%d", indexName, version) } func (i *Indexer) checkOldIndexes() { - i.checkOldIndex(i.indexName) // Old index name without version - for v := 1; v < i.version; v++ { - i.checkOldIndex(versionedIndexName(i.indexName, v)) - } -} - -func (i *Indexer) checkOldIndex(indexName string) { - _, err := i.Client.GetIndex(indexName) - if err == nil { - log.Warn("Found older meilisearch index named %q, Gitea will keep the old NOT DELETED. You can delete the old version after the upgrade succeed.", indexName) + for v := 0; v < i.version; v++ { + indexName := versionedIndexName(i.indexName, v) + _, err := i.Client.GetIndex(indexName) + if err == nil { + log.Warn("Found older meilisearch index named %q, Gitea will keep the old NOT DELETED. You can delete the old version after the upgrade succeed.", indexName) + } } } diff --git a/modules/indexer/issues/elasticsearch/elasticsearch.go b/modules/indexer/issues/elasticsearch/elasticsearch.go index 9e2b4645cd0f1..33a7dfc21e0c7 100644 --- a/modules/indexer/issues/elasticsearch/elasticsearch.go +++ b/modules/indexer/issues/elasticsearch/elasticsearch.go @@ -17,7 +17,7 @@ import ( ) const ( - issueIndexerLatestVersion = 2 + issueIndexerLatestVersion = 0 ) var _ internal.Indexer = &Indexer{} diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go index 09ab806cc87ae..877c04f1dcb2d 100644 --- a/modules/indexer/issues/meilisearch/meilisearch.go +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -16,7 +16,7 @@ import ( ) const ( - issueIndexerLatestVersion = 1 + issueIndexerLatestVersion = 0 ) var _ internal.Indexer = &Indexer{} From 847dac13a50a1d4ed47186cc0cc1af238d6382d5 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Wed, 14 Jun 2023 11:30:41 +0800 Subject: [PATCH 35/43] chore: update comments --- modules/indexer/stats/indexer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/indexer/stats/indexer.go b/modules/indexer/stats/indexer.go index d8f355e3f4d20..6bfa8bdedb975 100644 --- a/modules/indexer/stats/indexer.go +++ b/modules/indexer/stats/indexer.go @@ -11,7 +11,7 @@ import ( ) // Indexer defines an interface to index repository stats -// TODO: this indexer is quite different from the others, maybe it should be moved out for module/indexer +// TODO: this indexer is quite different from the others, maybe this package should be moved out from module/indexer type Indexer interface { Index(id int64) error Close() From fd48e00efb23a710a96988bd9eebf519a3f37ca5 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Wed, 14 Jun 2023 12:33:38 +0800 Subject: [PATCH 36/43] fix: give up IndexerHolder --- modules/indexer/code/git.go | 2 +- modules/indexer/code/indexer.go | 27 +++++++-------- modules/indexer/code/internal/indexer.go | 24 +++++++++++++ modules/indexer/code/search.go | 2 +- modules/indexer/internal/holder.go | 38 -------------------- modules/indexer/internal/indexer.go | 22 +++++++++++- modules/indexer/issues/indexer.go | 40 ++++++++++------------ modules/indexer/issues/internal/indexer.go | 24 +++++++++++++ 8 files changed, 102 insertions(+), 77 deletions(-) delete mode 100644 modules/indexer/internal/holder.go diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go index 36d2ed231ea39..30662d8bc964d 100644 --- a/modules/indexer/code/git.go +++ b/modules/indexer/code/git.go @@ -97,7 +97,7 @@ func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revisio // previous commit sha may have been removed by a force push, so // try rebuilding from scratch log.Warn("git diff: %v", runErr) - if err := holder.Get().(internal.Indexer).Delete(ctx, repo.ID); err != nil { + if err := globalIndexer.Delete(ctx, repo.ID); err != nil { return nil, err } return genesisChanges(ctx, repo, revision) diff --git a/modules/indexer/code/indexer.go b/modules/indexer/code/indexer.go index d6f42bef83097..b1a607ce1fc1e 100644 --- a/modules/indexer/code/indexer.go +++ b/modules/indexer/code/indexer.go @@ -15,7 +15,6 @@ import ( "code.gitea.io/gitea/modules/indexer/code/bleve" "code.gitea.io/gitea/modules/indexer/code/elasticsearch" "code.gitea.io/gitea/modules/indexer/code/internal" - indexer_internal "code.gitea.io/gitea/modules/indexer/internal" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/process" "code.gitea.io/gitea/modules/queue" @@ -24,8 +23,8 @@ import ( ) var ( - indexerQueue *queue.WorkerPoolQueue[*internal.IndexerData] - holder = indexer_internal.NewIndexerHolder() + indexerQueue *queue.WorkerPoolQueue[*internal.IndexerData] + globalIndexer = internal.NewDummyIndexer() ) func index(ctx context.Context, indexer internal.Indexer, repoID int64) error { @@ -84,7 +83,7 @@ func index(ctx context.Context, indexer internal.Indexer, repoID int64) error { // Init initialize the repo indexer func Init() { if !setting.Indexer.RepoIndexerEnabled { - holder.Get().Close() + globalIndexer.Close() return } @@ -98,7 +97,7 @@ func Init() { } cancel() log.Debug("Closing repository indexer") - holder.Get().Close() + globalIndexer.Close() log.Info("PID: %d Repository Indexer closed", os.Getpid()) finished() }) @@ -109,7 +108,7 @@ func Init() { switch setting.Indexer.RepoType { case "bleve", "elasticsearch": handler := func(items ...*internal.IndexerData) (unhandled []*internal.IndexerData) { - indexer := holder.Get().(internal.Indexer) + indexer := globalIndexer if indexer == nil { log.Warn("Codes indexer handler: indexer is not ready, retry later.") return items @@ -177,7 +176,7 @@ func Init() { existed, err = rIndexer.Init(ctx) if err != nil { cancel() - holder.Get().Close() + globalIndexer.Close() close(waitChannel) log.Fatal("PID: %d Unable to initialize the bleve Repository Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.RepoPath, err) } @@ -194,14 +193,14 @@ func Init() { rIndexer = elasticsearch.NewIndexer(setting.Indexer.RepoConnStr, setting.Indexer.RepoIndexerName) if err != nil { cancel() - holder.Get().Close() + globalIndexer.Close() close(waitChannel) log.Fatal("PID: %d Unable to create the elasticsearch Repository Indexer connstr: %s Error: %v", os.Getpid(), setting.Indexer.RepoConnStr, err) } existed, err = rIndexer.Init(ctx) if err != nil { cancel() - holder.Get().Close() + globalIndexer.Close() close(waitChannel) log.Fatal("PID: %d Unable to initialize the elasticsearch Repository Indexer connstr: %s Error: %v", os.Getpid(), setting.Indexer.RepoConnStr, err) } @@ -210,7 +209,7 @@ func Init() { log.Fatal("PID: %d Unknown Indexer type: %s", os.Getpid(), setting.Indexer.RepoType) } - holder.Set(rIndexer) + globalIndexer = rIndexer // Start processing the queue go graceful.GetManager().RunWithCancel(indexerQueue) @@ -237,18 +236,18 @@ func Init() { case <-graceful.GetManager().IsShutdown(): log.Warn("Shutdown before Repository Indexer completed initialization") cancel() - holder.Get().Close() + globalIndexer.Close() case duration, ok := <-waitChannel: if !ok { log.Warn("Repository Indexer Initialization failed") cancel() - holder.Get().Close() + globalIndexer.Close() return } log.Info("Repository Indexer Initialization took %v", duration) case <-time.After(timeout): cancel() - holder.Get().Close() + globalIndexer.Close() log.Fatal("Repository Indexer Initialization Timed-Out after: %v", timeout) } }() @@ -265,7 +264,7 @@ func UpdateRepoIndexer(repo *repo_model.Repository) { // IsAvailable checks if issue indexer is available func IsAvailable(ctx context.Context) bool { - idx := holder.Get().(internal.Indexer) + idx := globalIndexer if idx == nil { log.Error("IsAvailable(): unable to get indexer") return false diff --git a/modules/indexer/code/internal/indexer.go b/modules/indexer/code/internal/indexer.go index 0358a24d542d3..da3ac3623c92f 100644 --- a/modules/indexer/code/internal/indexer.go +++ b/modules/indexer/code/internal/indexer.go @@ -5,6 +5,7 @@ package internal import ( "context" + "fmt" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/indexer/internal" @@ -17,3 +18,26 @@ type Indexer interface { Delete(ctx context.Context, repoID int64) error Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error) } + +// NewDummyIndexer returns a dummy indexer +func NewDummyIndexer() Indexer { + return &dummyIndexer{ + Indexer: internal.NewDummyIndexer(), + } +} + +type dummyIndexer struct { + internal.Indexer +} + +func (d *dummyIndexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error { + return fmt.Errorf("indexer is not ready") +} + +func (d *dummyIndexer) Delete(ctx context.Context, repoID int64) error { + return fmt.Errorf("indexer is not ready") +} + +func (d *dummyIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error) { + return 0, nil, nil, fmt.Errorf("indexer is not ready") +} diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go index db2ab93fc74ab..93a7fb791fa8d 100644 --- a/modules/indexer/code/search.go +++ b/modules/indexer/code/search.go @@ -117,7 +117,7 @@ func PerformSearch(ctx context.Context, repoIDs []int64, language, keyword strin return 0, nil, nil, nil } - total, results, resultLanguages, err := holder.Get().(internal.Indexer).Search(ctx, repoIDs, language, keyword, page, pageSize, isMatch) + total, results, resultLanguages, err := globalIndexer.Search(ctx, repoIDs, language, keyword, page, pageSize, isMatch) if err != nil { return 0, nil, nil, err } diff --git a/modules/indexer/internal/holder.go b/modules/indexer/internal/holder.go deleted file mode 100644 index 3ff529eaa24b4..0000000000000 --- a/modules/indexer/internal/holder.go +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright 2023 The Gitea Authors. All rights reserved. -// SPDX-License-Identifier: MIT - -package internal - -import ( - "sync" -) - -type IndexerHolder struct { - indexer Indexer - mutex sync.RWMutex - cond *sync.Cond -} - -func NewIndexerHolder() *IndexerHolder { - h := &IndexerHolder{} - h.cond = sync.NewCond(h.mutex.RLocker()) - return h -} - -func (h *IndexerHolder) Set(indexer Indexer) { - h.mutex.Lock() - defer h.mutex.Unlock() - h.indexer = indexer - h.cond.Broadcast() -} - -// Get returns the indexer, blocking until it is set -// It never returns nil -func (h *IndexerHolder) Get() Indexer { - h.mutex.RLock() - defer h.mutex.RUnlock() - for h.indexer == nil { // make sure it never return nil even called Set(nil) - h.cond.Wait() - } - return h.indexer -} diff --git a/modules/indexer/internal/indexer.go b/modules/indexer/internal/indexer.go index 4f50e0f722d3d..c7f356da1efeb 100644 --- a/modules/indexer/internal/indexer.go +++ b/modules/indexer/internal/indexer.go @@ -3,7 +3,10 @@ package internal -import "context" +import ( + "context" + "fmt" +) // Indexer defines an basic indexer interface type Indexer interface { @@ -15,3 +18,20 @@ type Indexer interface { // Close closes the indexer Close() } + +// NewDummyIndexer returns a dummy indexer +func NewDummyIndexer() Indexer { + return &dummyIndexer{} +} + +type dummyIndexer struct{} + +func (d *dummyIndexer) Init(ctx context.Context) (bool, error) { + return false, fmt.Errorf("indexer is not ready") +} + +func (d *dummyIndexer) Ping(ctx context.Context) error { + return fmt.Errorf("indexer is not ready") +} + +func (d *dummyIndexer) Close() {} diff --git a/modules/indexer/issues/indexer.go b/modules/indexer/issues/indexer.go index 5c58eef2ec1a9..09596c5db6954 100644 --- a/modules/indexer/issues/indexer.go +++ b/modules/indexer/issues/indexer.go @@ -14,7 +14,6 @@ import ( issues_model "code.gitea.io/gitea/models/issues" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/graceful" - indexer_internal "code.gitea.io/gitea/modules/indexer/internal" "code.gitea.io/gitea/modules/indexer/issues/bleve" "code.gitea.io/gitea/modules/indexer/issues/db" "code.gitea.io/gitea/modules/indexer/issues/elasticsearch" @@ -30,7 +29,7 @@ import ( var ( // issueIndexerQueue queue of issue ids to be updated issueIndexerQueue *queue.WorkerPoolQueue[*internal.IndexerData] - holder = indexer_internal.NewIndexerHolder() + globalIndexer = internal.NewDummyIndexer() ) // InitIssueIndexer initialize issue indexer, syncReindex is true then reindex until @@ -44,7 +43,7 @@ func InitIssueIndexer(syncReindex bool) { switch setting.Indexer.IssueType { case "bleve", "elasticsearch", "meilisearch": handler := func(items ...*internal.IndexerData) (unhandled []*internal.IndexerData) { - indexer := holder.Get().(internal.Indexer) + indexer := globalIndexer if indexer == nil { log.Warn("Issue indexer handler: indexer is not ready, retry later.") return items @@ -101,48 +100,45 @@ func InitIssueIndexer(syncReindex bool) { log.Error("PANIC whilst initializing issue indexer: %v\nStacktrace: %s", err, log.Stack(2)) log.Error("The indexer files are likely corrupted and may need to be deleted") log.Error("You can completely remove the %q directory to make Gitea recreate the indexes", setting.Indexer.IssuePath) - holder.Set(nil) + globalIndexer = internal.NewDummyIndexer() log.Fatal("PID: %d Unable to initialize the Bleve Issue Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.IssuePath, err) } }() issueIndexer := bleve.NewIndexer(setting.Indexer.IssuePath) existed, err = issueIndexer.Init(ctx) if err != nil { - holder.Set(nil) + globalIndexer = internal.NewDummyIndexer() log.Fatal("Unable to initialize Bleve Issue Indexer at path: %s Error: %v", setting.Indexer.IssuePath, err) } - holder.Set(issueIndexer) - graceful.GetManager().RunAtTerminate(func() { - log.Debug("Closing issue indexer") - issueIndexer := holder.Get() - if issueIndexer != nil { - issueIndexer.Close() - } - log.Info("PID: %d Issue Indexer closed", os.Getpid()) - }) - log.Debug("Created Bleve Indexer") + globalIndexer = issueIndexer case "elasticsearch": issueIndexer := elasticsearch.NewIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueIndexerName) existed, err = issueIndexer.Init(ctx) if err != nil { log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", setting.Indexer.IssueConnStr, err) } - holder.Set(issueIndexer) + globalIndexer = issueIndexer case "db": issueIndexer := db.NewIndexer() - holder.Set(issueIndexer) + globalIndexer = issueIndexer case "meilisearch": issueIndexer := meilisearch.NewIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueConnAuth, setting.Indexer.IssueIndexerName) existed, err = issueIndexer.Init(ctx) if err != nil { log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", setting.Indexer.IssueConnStr, err) } - holder.Set(issueIndexer) + globalIndexer = issueIndexer default: - holder.Set(nil) + globalIndexer = internal.NewDummyIndexer() log.Fatal("Unknown issue indexer type: %s", setting.Indexer.IssueType) } + graceful.GetManager().RunAtTerminate(func() { + log.Debug("Closing issue indexer") + globalIndexer.Close() + log.Info("PID: %d Issue Indexer closed", os.Getpid()) + }) + // Start processing the queue go graceful.GetManager().RunWithCancel(issueIndexerQueue) @@ -288,7 +284,7 @@ func DeleteRepoIssueIndexer(ctx context.Context, repo *repo_model.Repository) { // WARNNING: You have to ensure user have permission to visit repoIDs' issues func SearchIssuesByKeyword(ctx context.Context, repoIDs []int64, keyword string) ([]int64, error) { var issueIDs []int64 - indexer := holder.Get().(internal.Indexer) + indexer := globalIndexer if indexer == nil { log.Error("SearchIssuesByKeyword(): unable to get indexer!") @@ -306,11 +302,11 @@ func SearchIssuesByKeyword(ctx context.Context, repoIDs []int64, keyword string) // IsAvailable checks if issue indexer is available func IsAvailable(ctx context.Context) bool { - indexer := holder.Get() + indexer := globalIndexer if indexer == nil { log.Error("IsAvailable(): unable to get indexer!") return false } - return indexer.Ping(ctx) == nil + return globalIndexer.Ping(ctx) == nil } diff --git a/modules/indexer/issues/internal/indexer.go b/modules/indexer/issues/internal/indexer.go index cc676788d25d0..553c8a573cdcb 100644 --- a/modules/indexer/issues/internal/indexer.go +++ b/modules/indexer/issues/internal/indexer.go @@ -5,6 +5,7 @@ package internal import ( "context" + "fmt" "code.gitea.io/gitea/modules/indexer/internal" ) @@ -16,3 +17,26 @@ type Indexer interface { Delete(ctx context.Context, ids ...int64) error Search(ctx context.Context, kw string, repoIDs []int64, limit, start int) (*SearchResult, error) } + +// NewDummyIndexer returns a dummy indexer +func NewDummyIndexer() Indexer { + return &dummyIndexer{ + Indexer: internal.NewDummyIndexer(), + } +} + +type dummyIndexer struct { + internal.Indexer +} + +func (d *dummyIndexer) Index(ctx context.Context, issue []*IndexerData) error { + return fmt.Errorf("indexer is not ready") +} + +func (d *dummyIndexer) Delete(ctx context.Context, ids ...int64) error { + return fmt.Errorf("indexer is not ready") +} + +func (d *dummyIndexer) Search(ctx context.Context, kw string, repoIDs []int64, limit, start int) (*SearchResult, error) { + return nil, fmt.Errorf("indexer is not ready") +} From b36dbac78322229fa37a8d7942179f9ea3366af5 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Wed, 14 Jun 2023 14:42:16 +0800 Subject: [PATCH 37/43] test: fix --- modules/indexer/issues/indexer_test.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/modules/indexer/issues/indexer_test.go b/modules/indexer/issues/indexer_test.go index 9821e793729f2..0e26471923a9e 100644 --- a/modules/indexer/issues/indexer_test.go +++ b/modules/indexer/issues/indexer_test.go @@ -43,8 +43,7 @@ func TestBleveSearchIssues(t *testing.T) { setting.LoadQueueSettings() InitIssueIndexer(true) defer func() { - indexer := holder.Get() - if bleveIndexer, ok := indexer.(*bleve.Indexer); ok { + if bleveIndexer, ok := globalIndexer.(*bleve.Indexer); ok { bleveIndexer.Close() } }() From 8882f2eb886c57f80e6e47a707368333aa2a4cb6 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Wed, 14 Jun 2023 16:14:01 +0800 Subject: [PATCH 38/43] fix: return unhandled data when index failed --- modules/indexer/issues/indexer.go | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/modules/indexer/issues/indexer.go b/modules/indexer/issues/indexer.go index 09596c5db6954..d3d1c40ceb4c6 100644 --- a/modules/indexer/issues/indexer.go +++ b/modules/indexer/issues/indexer.go @@ -54,10 +54,7 @@ func InitIssueIndexer(syncReindex bool) { if indexerData.IsDelete { if err := indexer.Delete(ctx, indexerData.IDs...); err != nil { log.Error("Issue indexer handler: failed to from index: %v Error: %v", indexerData.IDs, err) - if err := indexer.Ping(ctx); err != nil { - log.Error("Issue indexer handler: indexer is unavailable when deleting: %v", err) - unhandled = append(unhandled, indexerData) - } + unhandled = append(unhandled, indexerData) } continue } @@ -65,10 +62,7 @@ func InitIssueIndexer(syncReindex bool) { } if err := indexer.Index(ctx, toIndex); err != nil { log.Error("Error whilst indexing: %v Error: %v", toIndex, err) - if err := indexer.Ping(ctx); err != nil { - log.Error("Issue indexer handler: indexer is unavailable when indexing: %v", err) - unhandled = append(unhandled, toIndex...) - } + unhandled = append(unhandled, toIndex...) } return unhandled } From 54a019527e364d75307b02ec55f3f0b60f5a5eb1 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Wed, 14 Jun 2023 17:19:48 +0800 Subject: [PATCH 39/43] docs: update config --- custom/conf/app.example.ini | 6 +++--- docs/content/doc/administration/config-cheat-sheet.en-us.md | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/custom/conf/app.example.ini b/custom/conf/app.example.ini index f53d9ee089000..a6ab4b98a406d 100644 --- a/custom/conf/app.example.ini +++ b/custom/conf/app.example.ini @@ -1337,10 +1337,10 @@ LEVEL = Info ;; Issue indexer storage path, available when ISSUE_INDEXER_TYPE is bleve ;ISSUE_INDEXER_PATH = indexers/issues.bleve ; Relative paths will be made absolute against _`AppWorkPath`_. ;; -;; Issue indexer connection string, available when ISSUE_INDEXER_TYPE is elasticsearch or meilisearch -;ISSUE_INDEXER_CONN_STR = http://elastic:changeme@localhost:9200 +;; Issue indexer connection string, available when ISSUE_INDEXER_TYPE is elasticsearch (e.g. http://elastic:password@localhost:9200) or meilisearch (e.g. http://:apikey@localhost:7700) +;ISSUE_INDEXER_CONN_STR = ;; -;; Issue indexer name, available when ISSUE_INDEXER_TYPE is elasticsearch +;; Issue indexer name, available when ISSUE_INDEXER_TYPE is elasticsearch or meilisearch. ;ISSUE_INDEXER_NAME = gitea_issues ;; ;; Timeout the indexer if it takes longer than this to start. diff --git a/docs/content/doc/administration/config-cheat-sheet.en-us.md b/docs/content/doc/administration/config-cheat-sheet.en-us.md index 7b94c7a4882e1..79e2167442553 100644 --- a/docs/content/doc/administration/config-cheat-sheet.en-us.md +++ b/docs/content/doc/administration/config-cheat-sheet.en-us.md @@ -459,15 +459,15 @@ relation to port exhaustion. ## Indexer (`indexer`) - `ISSUE_INDEXER_TYPE`: **bleve**: Issue indexer type, currently supported: `bleve`, `db`, `elasticsearch` or `meilisearch`. -- `ISSUE_INDEXER_CONN_STR`: ****: Issue indexer connection string, available when ISSUE_INDEXER_TYPE is elasticsearch, or meilisearch. i.e. http://elastic:changeme@localhost:9200 -- `ISSUE_INDEXER_NAME`: **gitea_issues**: Issue indexer name, available when ISSUE_INDEXER_TYPE is elasticsearch +- `ISSUE_INDEXER_CONN_STR`: ****: Issue indexer connection string, available when ISSUE_INDEXER_TYPE is elasticsearch (e.g. http://elastic:password@localhost:9200) or meilisearch (e.g. http://:apikey@localhost:7700) +- `ISSUE_INDEXER_NAME`: **gitea_issues**: Issue indexer name, available when ISSUE_INDEXER_TYPE is elasticsearch or meilisearch. - `ISSUE_INDEXER_PATH`: **indexers/issues.bleve**: Index file used for issue search; available when ISSUE_INDEXER_TYPE is bleve and elasticsearch. Relative paths will be made absolute against _`AppWorkPath`_. - `REPO_INDEXER_ENABLED`: **false**: Enables code search (uses a lot of disk space, about 6 times more than the repository size). - `REPO_INDEXER_REPO_TYPES`: **sources,forks,mirrors,templates**: Repo indexer units. The items to index could be `sources`, `forks`, `mirrors`, `templates` or any combination of them separated by a comma. If empty then it defaults to `sources` only, as if you'd like to disable fully please see `REPO_INDEXER_ENABLED`. - `REPO_INDEXER_TYPE`: **bleve**: Code search engine type, could be `bleve` or `elasticsearch`. - `REPO_INDEXER_PATH`: **indexers/repos.bleve**: Index file used for code search. -- `REPO_INDEXER_CONN_STR`: ****: Code indexer connection string, available when `REPO_INDEXER_TYPE` is elasticsearch. i.e. http://elastic:changeme@localhost:9200 +- `REPO_INDEXER_CONN_STR`: ****: Code indexer connection string, available when `REPO_INDEXER_TYPE` is elasticsearch. i.e. http://elastic:password@localhost:9200 - `REPO_INDEXER_NAME`: **gitea_codes**: Code indexer name, available when `REPO_INDEXER_TYPE` is elasticsearch - `REPO_INDEXER_INCLUDE`: **empty**: A comma separated list of glob patterns (see https://github.com/gobwas/glob) to **include** in the index. Use `**.txt` to match any files with .txt extension. An empty list means include all files. From c5f4f9baf5a0b524d6fc0a4b0ff2638c37fa6643 Mon Sep 17 00:00:00 2001 From: Jason Song Date: Wed, 14 Jun 2023 17:38:02 +0800 Subject: [PATCH 40/43] fix: requeue tasks --- modules/indexer/code/indexer.go | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/modules/indexer/code/indexer.go b/modules/indexer/code/indexer.go index b1a607ce1fc1e..980da92f8c6c8 100644 --- a/modules/indexer/code/indexer.go +++ b/modules/indexer/code/indexer.go @@ -132,11 +132,7 @@ func Init() { code.gitea.io/gitea/modules/indexer/code.index(indexer.go:105) */ if err := index(ctx, indexer, indexerData.RepoID); err != nil { - if err := indexer.Ping(ctx); err != nil { - log.Error("Code indexer handler: indexer is unavailable: %v.", err) - unhandled = append(unhandled, indexerData) - continue - } + unhandled = append(unhandled, indexerData) if !setting.IsInTesting { log.Error("Codes indexer handler: index error for repo %v: %v", indexerData.RepoID, err) } From 15e4518aa8a16c2d85a8ea884be3bad8f7656c3c Mon Sep 17 00:00:00 2001 From: Jason Song Date: Wed, 14 Jun 2023 18:28:36 +0800 Subject: [PATCH 41/43] fix: format of meilisearch index --- modules/indexer/internal/meilisearch/util.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/modules/indexer/internal/meilisearch/util.go b/modules/indexer/internal/meilisearch/util.go index 44d42e2f7c9e6..e6d8fefadeb8c 100644 --- a/modules/indexer/internal/meilisearch/util.go +++ b/modules/indexer/internal/meilisearch/util.go @@ -19,7 +19,12 @@ func versionedIndexName(indexName string, version int) string { // Old index name without version return indexName } - return fmt.Sprintf("%s.v%d", indexName, version) + + // The format of the index name is _v, not .v like elasticsearch. + // Because meilisearch does not support "." in index name, it should contain only alphanumeric characters, hyphens (-) and underscores (_). + // See https://www.meilisearch.com/docs/learn/core_concepts/indexes#index-uid + + return fmt.Sprintf("%s_v%d", indexName, version) } func (i *Indexer) checkOldIndexes() { From e30d5c15dc4285cee425645481eec43ac780e5ea Mon Sep 17 00:00:00 2001 From: Jason Song Date: Thu, 15 Jun 2023 11:17:34 +0800 Subject: [PATCH 42/43] feat: warn before rebuild bleve index --- modules/indexer/internal/bleve/indexer.go | 6 +++++- modules/indexer/internal/bleve/util.go | 26 ++++++++++++----------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/modules/indexer/internal/bleve/indexer.go b/modules/indexer/internal/bleve/indexer.go index 9392939698faf..ce06b5afcb788 100644 --- a/modules/indexer/internal/bleve/indexer.go +++ b/modules/indexer/internal/bleve/indexer.go @@ -46,7 +46,7 @@ func (i *Indexer) Init(_ context.Context) (bool, error) { return false, fmt.Errorf("indexer is already initialized") } - indexer, err := openIndexer(i.indexDir, i.version) + indexer, version, err := openIndexer(i.indexDir, i.version) if err != nil { return false, err } @@ -55,6 +55,10 @@ func (i *Indexer) Init(_ context.Context) (bool, error) { return true, nil } + if version != 0 { + log.Warn("Found older bleve index with version %d, Gitea will remove it and rebuild", version) + } + indexMapping, err := i.mappingGetter() if err != nil { return false, err diff --git a/modules/indexer/internal/bleve/util.go b/modules/indexer/internal/bleve/util.go index 94dbbce4bcd8d..43a7c3c5ec1bd 100644 --- a/modules/indexer/internal/bleve/util.go +++ b/modules/indexer/internal/bleve/util.go @@ -4,8 +4,10 @@ package bleve import ( + "errors" "os" + "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/util" "github.com/blevesearch/bleve/v2" @@ -16,32 +18,32 @@ import ( // openIndexer open the index at the specified path, checking for metadata // updates and bleve version updates. If index needs to be created (or // re-created), returns (nil, nil) -func openIndexer(path string, latestVersion int) (bleve.Index, error) { +func openIndexer(path string, latestVersion int) (bleve.Index, int, error) { _, err := os.Stat(path) if err != nil && os.IsNotExist(err) { - return nil, nil + return nil, 0, nil } else if err != nil { - return nil, err + return nil, 0, err } metadata, err := rupture.ReadIndexMetadata(path) if err != nil { - return nil, err + return nil, 0, err } if metadata.Version < latestVersion { // the indexer is using a previous version, so we should delete it and // re-populate - return nil, util.RemoveAll(path) + return nil, metadata.Version, util.RemoveAll(path) } index, err := bleve.Open(path) - if err != nil && err == upsidedown.IncompatibleVersion { - // the indexer was built with a previous version of bleve, so we should - // delete it and re-populate - return nil, util.RemoveAll(path) - } else if err != nil { - return nil, err + if err != nil { + if errors.Is(err, upsidedown.IncompatibleVersion) { + log.Warn("Indexer was built with a previous version of bleve, deleting and rebuilding") + return nil, 0, util.RemoveAll(path) + } + return nil, 0, err } - return index, nil + return index, 0, nil } From 11fd0f9895770973fbb1697f53c390a0d398dfbe Mon Sep 17 00:00:00 2001 From: Jason Song Date: Sun, 18 Jun 2023 19:33:42 +0800 Subject: [PATCH 43/43] fix: globalIndexer atomic --- modules/indexer/code/git.go | 2 +- modules/indexer/code/indexer.go | 48 ++++++++++----------- modules/indexer/code/search.go | 2 +- modules/indexer/issues/indexer.go | 59 +++++++++++--------------- modules/indexer/issues/indexer_test.go | 2 +- 5 files changed, 52 insertions(+), 61 deletions(-) diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go index 30662d8bc964d..1ba6b849d11db 100644 --- a/modules/indexer/code/git.go +++ b/modules/indexer/code/git.go @@ -97,7 +97,7 @@ func nonGenesisChanges(ctx context.Context, repo *repo_model.Repository, revisio // previous commit sha may have been removed by a force push, so // try rebuilding from scratch log.Warn("git diff: %v", runErr) - if err := globalIndexer.Delete(ctx, repo.ID); err != nil { + if err := (*globalIndexer.Load()).Delete(ctx, repo.ID); err != nil { return nil, err } return genesisChanges(ctx, repo, revision) diff --git a/modules/indexer/code/indexer.go b/modules/indexer/code/indexer.go index 980da92f8c6c8..13d06874c96dc 100644 --- a/modules/indexer/code/indexer.go +++ b/modules/indexer/code/indexer.go @@ -7,6 +7,7 @@ import ( "context" "os" "runtime/pprof" + "sync/atomic" "time" "code.gitea.io/gitea/models/db" @@ -23,10 +24,20 @@ import ( ) var ( - indexerQueue *queue.WorkerPoolQueue[*internal.IndexerData] - globalIndexer = internal.NewDummyIndexer() + indexerQueue *queue.WorkerPoolQueue[*internal.IndexerData] + // globalIndexer is the global indexer, it cannot be nil. + // When the real indexer is not ready, it will be a dummy indexer which will return error to explain it's not ready. + // So it's always safe use it as *globalIndexer.Load() and call its methods. + globalIndexer atomic.Pointer[internal.Indexer] + dummyIndexer *internal.Indexer ) +func init() { + i := internal.NewDummyIndexer() + dummyIndexer = &i + globalIndexer.Store(dummyIndexer) +} + func index(ctx context.Context, indexer internal.Indexer, repoID int64) error { repo, err := repo_model.GetRepositoryByID(ctx, repoID) if repo_model.IsErrRepoNotExist(err) { @@ -83,7 +94,7 @@ func index(ctx context.Context, indexer internal.Indexer, repoID int64) error { // Init initialize the repo indexer func Init() { if !setting.Indexer.RepoIndexerEnabled { - globalIndexer.Close() + (*globalIndexer.Load()).Close() return } @@ -97,7 +108,7 @@ func Init() { } cancel() log.Debug("Closing repository indexer") - globalIndexer.Close() + (*globalIndexer.Load()).Close() log.Info("PID: %d Repository Indexer closed", os.Getpid()) finished() }) @@ -108,12 +119,7 @@ func Init() { switch setting.Indexer.RepoType { case "bleve", "elasticsearch": handler := func(items ...*internal.IndexerData) (unhandled []*internal.IndexerData) { - indexer := globalIndexer - if indexer == nil { - log.Warn("Codes indexer handler: indexer is not ready, retry later.") - return items - } - + indexer := *globalIndexer.Load() for _, indexerData := range items { log.Trace("IndexerData Process Repo: %d", indexerData.RepoID) @@ -172,7 +178,7 @@ func Init() { existed, err = rIndexer.Init(ctx) if err != nil { cancel() - globalIndexer.Close() + (*globalIndexer.Load()).Close() close(waitChannel) log.Fatal("PID: %d Unable to initialize the bleve Repository Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.RepoPath, err) } @@ -189,14 +195,14 @@ func Init() { rIndexer = elasticsearch.NewIndexer(setting.Indexer.RepoConnStr, setting.Indexer.RepoIndexerName) if err != nil { cancel() - globalIndexer.Close() + (*globalIndexer.Load()).Close() close(waitChannel) log.Fatal("PID: %d Unable to create the elasticsearch Repository Indexer connstr: %s Error: %v", os.Getpid(), setting.Indexer.RepoConnStr, err) } existed, err = rIndexer.Init(ctx) if err != nil { cancel() - globalIndexer.Close() + (*globalIndexer.Load()).Close() close(waitChannel) log.Fatal("PID: %d Unable to initialize the elasticsearch Repository Indexer connstr: %s Error: %v", os.Getpid(), setting.Indexer.RepoConnStr, err) } @@ -205,7 +211,7 @@ func Init() { log.Fatal("PID: %d Unknown Indexer type: %s", os.Getpid(), setting.Indexer.RepoType) } - globalIndexer = rIndexer + globalIndexer.Store(&rIndexer) // Start processing the queue go graceful.GetManager().RunWithCancel(indexerQueue) @@ -232,18 +238,18 @@ func Init() { case <-graceful.GetManager().IsShutdown(): log.Warn("Shutdown before Repository Indexer completed initialization") cancel() - globalIndexer.Close() + (*globalIndexer.Load()).Close() case duration, ok := <-waitChannel: if !ok { log.Warn("Repository Indexer Initialization failed") cancel() - globalIndexer.Close() + (*globalIndexer.Load()).Close() return } log.Info("Repository Indexer Initialization took %v", duration) case <-time.After(timeout): cancel() - globalIndexer.Close() + (*globalIndexer.Load()).Close() log.Fatal("Repository Indexer Initialization Timed-Out after: %v", timeout) } }() @@ -260,13 +266,7 @@ func UpdateRepoIndexer(repo *repo_model.Repository) { // IsAvailable checks if issue indexer is available func IsAvailable(ctx context.Context) bool { - idx := globalIndexer - if idx == nil { - log.Error("IsAvailable(): unable to get indexer") - return false - } - - return idx.Ping(ctx) == nil + return (*globalIndexer.Load()).Ping(ctx) == nil } // populateRepoIndexer populate the repo indexer with pre-existing data. This diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go index 93a7fb791fa8d..1f9bddff7b105 100644 --- a/modules/indexer/code/search.go +++ b/modules/indexer/code/search.go @@ -117,7 +117,7 @@ func PerformSearch(ctx context.Context, repoIDs []int64, language, keyword strin return 0, nil, nil, nil } - total, results, resultLanguages, err := globalIndexer.Search(ctx, repoIDs, language, keyword, page, pageSize, isMatch) + total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, repoIDs, language, keyword, page, pageSize, isMatch) if err != nil { return 0, nil, nil, err } diff --git a/modules/indexer/issues/indexer.go b/modules/indexer/issues/indexer.go index d3d1c40ceb4c6..9e2f13371e4a3 100644 --- a/modules/indexer/issues/indexer.go +++ b/modules/indexer/issues/indexer.go @@ -5,9 +5,9 @@ package issues import ( "context" - "fmt" "os" "runtime/pprof" + "sync/atomic" "time" db_model "code.gitea.io/gitea/models/db" @@ -29,9 +29,19 @@ import ( var ( // issueIndexerQueue queue of issue ids to be updated issueIndexerQueue *queue.WorkerPoolQueue[*internal.IndexerData] - globalIndexer = internal.NewDummyIndexer() + // globalIndexer is the global indexer, it cannot be nil. + // When the real indexer is not ready, it will be a dummy indexer which will return error to explain it's not ready. + // So it's always safe use it as *globalIndexer.Load() and call its methods. + globalIndexer atomic.Pointer[internal.Indexer] + dummyIndexer *internal.Indexer ) +func init() { + i := internal.NewDummyIndexer() + dummyIndexer = &i + globalIndexer.Store(dummyIndexer) +} + // InitIssueIndexer initialize issue indexer, syncReindex is true then reindex until // all issue index done. func InitIssueIndexer(syncReindex bool) { @@ -43,11 +53,7 @@ func InitIssueIndexer(syncReindex bool) { switch setting.Indexer.IssueType { case "bleve", "elasticsearch", "meilisearch": handler := func(items ...*internal.IndexerData) (unhandled []*internal.IndexerData) { - indexer := globalIndexer - if indexer == nil { - log.Warn("Issue indexer handler: indexer is not ready, retry later.") - return items - } + indexer := *globalIndexer.Load() toIndex := make([]*internal.IndexerData, 0, len(items)) for _, indexerData := range items { log.Trace("IndexerData Process: %d %v %t", indexerData.ID, indexerData.IDs, indexerData.IsDelete) @@ -84,8 +90,9 @@ func InitIssueIndexer(syncReindex bool) { start := time.Now() log.Info("PID %d: Initializing Issue Indexer: %s", os.Getpid(), setting.Indexer.IssueType) var ( - existed bool - err error + issueIndexer internal.Indexer + existed bool + err error ) switch setting.Indexer.IssueType { case "bleve": @@ -94,42 +101,37 @@ func InitIssueIndexer(syncReindex bool) { log.Error("PANIC whilst initializing issue indexer: %v\nStacktrace: %s", err, log.Stack(2)) log.Error("The indexer files are likely corrupted and may need to be deleted") log.Error("You can completely remove the %q directory to make Gitea recreate the indexes", setting.Indexer.IssuePath) - globalIndexer = internal.NewDummyIndexer() + globalIndexer.Store(dummyIndexer) log.Fatal("PID: %d Unable to initialize the Bleve Issue Indexer at path: %s Error: %v", os.Getpid(), setting.Indexer.IssuePath, err) } }() - issueIndexer := bleve.NewIndexer(setting.Indexer.IssuePath) + issueIndexer = bleve.NewIndexer(setting.Indexer.IssuePath) existed, err = issueIndexer.Init(ctx) if err != nil { - globalIndexer = internal.NewDummyIndexer() log.Fatal("Unable to initialize Bleve Issue Indexer at path: %s Error: %v", setting.Indexer.IssuePath, err) } - globalIndexer = issueIndexer case "elasticsearch": - issueIndexer := elasticsearch.NewIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueIndexerName) + issueIndexer = elasticsearch.NewIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueIndexerName) existed, err = issueIndexer.Init(ctx) if err != nil { log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", setting.Indexer.IssueConnStr, err) } - globalIndexer = issueIndexer case "db": - issueIndexer := db.NewIndexer() - globalIndexer = issueIndexer + issueIndexer = db.NewIndexer() case "meilisearch": - issueIndexer := meilisearch.NewIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueConnAuth, setting.Indexer.IssueIndexerName) + issueIndexer = meilisearch.NewIndexer(setting.Indexer.IssueConnStr, setting.Indexer.IssueConnAuth, setting.Indexer.IssueIndexerName) existed, err = issueIndexer.Init(ctx) if err != nil { log.Fatal("Unable to issueIndexer.Init with connection %s Error: %v", setting.Indexer.IssueConnStr, err) } - globalIndexer = issueIndexer default: - globalIndexer = internal.NewDummyIndexer() log.Fatal("Unknown issue indexer type: %s", setting.Indexer.IssueType) } + globalIndexer.Store(&issueIndexer) graceful.GetManager().RunAtTerminate(func() { log.Debug("Closing issue indexer") - globalIndexer.Close() + (*globalIndexer.Load()).Close() log.Info("PID: %d Issue Indexer closed", os.Getpid()) }) @@ -278,12 +280,7 @@ func DeleteRepoIssueIndexer(ctx context.Context, repo *repo_model.Repository) { // WARNNING: You have to ensure user have permission to visit repoIDs' issues func SearchIssuesByKeyword(ctx context.Context, repoIDs []int64, keyword string) ([]int64, error) { var issueIDs []int64 - indexer := globalIndexer - - if indexer == nil { - log.Error("SearchIssuesByKeyword(): unable to get indexer!") - return nil, fmt.Errorf("unable to get issue indexer") - } + indexer := *globalIndexer.Load() res, err := indexer.Search(ctx, keyword, repoIDs, 50, 0) if err != nil { return nil, err @@ -296,11 +293,5 @@ func SearchIssuesByKeyword(ctx context.Context, repoIDs []int64, keyword string) // IsAvailable checks if issue indexer is available func IsAvailable(ctx context.Context) bool { - indexer := globalIndexer - if indexer == nil { - log.Error("IsAvailable(): unable to get indexer!") - return false - } - - return globalIndexer.Ping(ctx) == nil + return (*globalIndexer.Load()).Ping(ctx) == nil } diff --git a/modules/indexer/issues/indexer_test.go b/modules/indexer/issues/indexer_test.go index 0e26471923a9e..5962a4ee9cb76 100644 --- a/modules/indexer/issues/indexer_test.go +++ b/modules/indexer/issues/indexer_test.go @@ -43,7 +43,7 @@ func TestBleveSearchIssues(t *testing.T) { setting.LoadQueueSettings() InitIssueIndexer(true) defer func() { - if bleveIndexer, ok := globalIndexer.(*bleve.Indexer); ok { + if bleveIndexer, ok := (*globalIndexer.Load()).(*bleve.Indexer); ok { bleveIndexer.Close() } }()