Skip to content

Refactor code_indexer to use an SearchOptions struct for PerformSearch #29724

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Mar 16, 2024
26 changes: 13 additions & 13 deletions modules/indexer/code/bleve/bleve.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro
return err
}
if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil {
return fmt.Errorf("Misformatted git cat-file output: %w", err)
return fmt.Errorf("misformatted git cat-file output: %w", err)
}
}

Expand Down Expand Up @@ -233,26 +233,26 @@ func (b *Indexer) Delete(_ context.Context, repoID int64) error {

// Search searches for files in the specified repo.
// Returns the matching file-paths
func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
var (
indexerQuery query.Query
keywordQuery query.Query
)

if isFuzzy {
phraseQuery := bleve.NewMatchPhraseQuery(keyword)
if opts.IsKeywordFuzzy {
phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword)
phraseQuery.FieldVal = "Content"
phraseQuery.Analyzer = repoIndexerAnalyzer
keywordQuery = phraseQuery
} else {
prefixQuery := bleve.NewPrefixQuery(keyword)
prefixQuery := bleve.NewPrefixQuery(opts.Keyword)
prefixQuery.FieldVal = "Content"
keywordQuery = prefixQuery
}

if len(repoIDs) > 0 {
repoQueries := make([]query.Query, 0, len(repoIDs))
for _, repoID := range repoIDs {
if len(opts.RepoIDs) > 0 {
repoQueries := make([]query.Query, 0, len(opts.RepoIDs))
for _, repoID := range opts.RepoIDs {
repoQueries = append(repoQueries, inner_bleve.NumericEqualityQuery(repoID, "RepoID"))
}

Expand All @@ -266,8 +266,8 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword

// Save for reuse without language filter
facetQuery := indexerQuery
if len(language) > 0 {
languageQuery := bleve.NewMatchQuery(language)
if len(opts.Language) > 0 {
languageQuery := bleve.NewMatchQuery(opts.Language)
languageQuery.FieldVal = "Language"
languageQuery.Analyzer = analyzer_keyword.Name

Expand All @@ -277,12 +277,12 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
)
}

from := (page - 1) * pageSize
from, pageSize := opts.GetSkipTake()
searchRequest := bleve.NewSearchRequestOptions(indexerQuery, pageSize, from, false)
searchRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"}
searchRequest.IncludeLocations = true

if len(language) == 0 {
if len(opts.Language) == 0 {
searchRequest.AddFacet("languages", bleve.NewFacetRequest("Language", 10))
}

Expand Down Expand Up @@ -326,7 +326,7 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
}

searchResultLanguages := make([]*internal.SearchResultLanguages, 0, 10)
if len(language) > 0 {
if len(opts.Language) > 0 {
// Use separate query to go get all language counts
facetRequest := bleve.NewSearchRequestOptions(facetQuery, 1, 0, false)
facetRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"}
Expand Down
26 changes: 11 additions & 15 deletions modules/indexer/code/elasticsearch/elasticsearch.go
Original file line number Diff line number Diff line change
Expand Up @@ -281,35 +281,31 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan
}

// Search searches for codes and language stats by given conditions.
func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
searchType := esMultiMatchTypePhrasePrefix
if isFuzzy {
if opts.IsKeywordFuzzy {
searchType = esMultiMatchTypeBestFields
}

kwQuery := elastic.NewMultiMatchQuery(keyword, "content").Type(searchType)
kwQuery := elastic.NewMultiMatchQuery(opts.Keyword, "content").Type(searchType)
query := elastic.NewBoolQuery()
query = query.Must(kwQuery)
if len(repoIDs) > 0 {
repoStrs := make([]any, 0, len(repoIDs))
for _, repoID := range repoIDs {
if len(opts.RepoIDs) > 0 {
repoStrs := make([]any, 0, len(opts.RepoIDs))
for _, repoID := range opts.RepoIDs {
repoStrs = append(repoStrs, repoID)
}
repoQuery := elastic.NewTermsQuery("repo_id", repoStrs...)
query = query.Must(repoQuery)
}

var (
start int
kw = "<em>" + keyword + "</em>"
aggregation = elastic.NewTermsAggregation().Field("language").Size(10).OrderByCountDesc()
start, pageSize = opts.GetSkipTake()
kw = "<em>" + opts.Keyword + "</em>"
aggregation = elastic.NewTermsAggregation().Field("language").Size(10).OrderByCountDesc()
)

if page > 0 {
start = (page - 1) * pageSize
}

if len(language) == 0 {
if len(opts.Language) == 0 {
searchResult, err := b.inner.Client.Search().
Index(b.inner.VersionedIndexName()).
Aggregation("language", aggregation).
Expand All @@ -330,7 +326,7 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword
return convertResult(searchResult, kw, pageSize)
}

langQuery := elastic.NewMatchQuery("language", language)
langQuery := elastic.NewMatchQuery("language", opts.Language)
countResult, err := b.inner.Client.Search().
Index(b.inner.VersionedIndexName()).
Aggregation("language", aggregation).
Expand Down
2 changes: 1 addition & 1 deletion modules/indexer/code/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ func getRepoChanges(ctx context.Context, repo *repo_model.Repository, revision s

needGenesis := len(status.CommitSha) == 0
if !needGenesis {
hasAncestorCmd := git.NewCommand(ctx, "merge-base").AddDynamicArguments(repo.CodeIndexerStatus.CommitSha, revision)
hasAncestorCmd := git.NewCommand(ctx, "merge-base").AddDynamicArguments(status.CommitSha, revision)
stdout, _, _ := hasAncestorCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()})
needGenesis = len(stdout) == 0
}
Expand Down
11 changes: 10 additions & 1 deletion modules/indexer/code/indexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"os"
"testing"

"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/models/unittest"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/indexer/code/bleve"
Expand Down Expand Up @@ -70,7 +71,15 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {

for _, kw := range keywords {
t.Run(kw.Keyword, func(t *testing.T) {
total, res, langs, err := indexer.Search(context.TODO(), kw.RepoIDs, "", kw.Keyword, 1, 10, true)
total, res, langs, err := indexer.Search(context.TODO(), &internal.SearchOptions{
RepoIDs: kw.RepoIDs,
Keyword: kw.Keyword,
Paginator: &db.ListOptions{
Page: 1,
PageSize: 10,
},
IsKeywordFuzzy: true,
})
assert.NoError(t, err)
assert.Len(t, kw.IDs, int(total))
assert.Len(t, langs, kw.Langs)
Expand Down
15 changes: 13 additions & 2 deletions modules/indexer/code/internal/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"context"
"fmt"

"code.gitea.io/gitea/models/db"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/indexer/internal"
)
Expand All @@ -16,7 +17,17 @@ type Indexer interface {
internal.Indexer
Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error
Delete(ctx context.Context, repoID int64) error
Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error)
Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error)
}

type SearchOptions struct {
RepoIDs []int64
Keyword string
Language string

IsKeywordFuzzy bool

db.Paginator
}

// NewDummyIndexer returns a dummy indexer
Expand All @@ -38,6 +49,6 @@ func (d *dummyIndexer) Delete(ctx context.Context, repoID int64) error {
return fmt.Errorf("indexer is not ready")
}

func (d *dummyIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error) {
func (d *dummyIndexer) Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error) {
return 0, nil, nil, fmt.Errorf("indexer is not ready")
}
8 changes: 5 additions & 3 deletions modules/indexer/code/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ type ResultLine struct {

type SearchResultLanguages = internal.SearchResultLanguages

type SearchOptions = internal.SearchOptions

func indices(content string, selectionStartIndex, selectionEndIndex int) (int, int) {
startIndex := selectionStartIndex
numLinesBefore := 0
Expand Down Expand Up @@ -125,12 +127,12 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res

// PerformSearch perform a search on a repository
// if isFuzzy is true set the Damerau-Levenshtein distance from 0 to 2
func PerformSearch(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int, []*Result, []*internal.SearchResultLanguages, error) {
if len(keyword) == 0 {
func PerformSearch(ctx context.Context, opts *SearchOptions) (int, []*Result, []*SearchResultLanguages, error) {
if opts == nil || len(opts.Keyword) == 0 {
Copy link
Contributor

@wxiaoguang wxiaoguang Mar 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I do not think opts == nil makes sense, how could it be nil? If "nil" opts means a bug, it should be handled in development stage.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should not be nil ... but better not panic

Copy link
Contributor

@wxiaoguang wxiaoguang Mar 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should not be nil ... but better not panic

If it shouldn't be nil, why not report the errors in development stage? Just by a panic.

I really dislike hiding errors. It makes it more difficult to debug some bugs.

Copy link
Member Author

@6543 6543 Mar 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NO we should not panic if we can arange it. To hope for stacktraces to get reported as "linting" methode is wrong !!!

But what we can do is create a proper error to check against

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

NO, why it would panic if there is no bug in code?

Golang library itself panics a lot if something is totally wrong.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I also don't think we should check opts == nil here. It should not be nil if our code is right. Otherwise, you need to check another thousand places.

Copy link
Member Author

@6543 6543 Mar 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let me check (again) what comsumers this func has ... and also introduce a proper error instead of a ignore on keyword==""

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm ... I only mean opts == nil check doesn't make sense.

if keyword == "" looks good to me, I do not think it's worth to introduce to many "errors".

Golang has a very bad error system, TBH I have been tired of writing a lot of if err != nil { ctx.ServerError }

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you wana create a pull?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since it has been merged, I think it could be left there as-is, no good no harm.

return 0, nil, nil, nil
}

total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, repoIDs, language, keyword, page, pageSize, isFuzzy)
total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, opts)
if err != nil {
return 0, nil, nil, err
}
Expand Down
12 changes: 11 additions & 1 deletion routers/web/explore/code.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package explore
import (
"net/http"

"code.gitea.io/gitea/models/db"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/base"
code_indexer "code.gitea.io/gitea/modules/indexer/code"
Expand Down Expand Up @@ -76,7 +77,16 @@ func Code(ctx *context.Context) {
)

if (len(repoIDs) > 0) || isAdmin {
total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy)
total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
RepoIDs: repoIDs,
Keyword: keyword,
IsKeywordFuzzy: isFuzzy,
Language: language,
Paginator: &db.ListOptions{
Page: page,
PageSize: setting.UI.RepoSearchPagingNum,
},
})
if err != nil {
if code_indexer.IsAvailable(ctx) {
ctx.ServerError("SearchResults", err)
Expand Down
13 changes: 11 additions & 2 deletions routers/web/repo/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package repo
import (
"net/http"

"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/modules/base"
code_indexer "code.gitea.io/gitea/modules/indexer/code"
"code.gitea.io/gitea/modules/setting"
Expand Down Expand Up @@ -41,8 +42,16 @@ func Search(ctx *context.Context) {
page = 1
}

total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, []int64{ctx.Repo.Repository.ID},
language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy)
total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
RepoIDs: []int64{ctx.Repo.Repository.ID},
Keyword: keyword,
IsKeywordFuzzy: isFuzzy,
Language: language,
Paginator: &db.ListOptions{
Page: page,
PageSize: setting.UI.RepoSearchPagingNum,
},
})
if err != nil {
if code_indexer.IsAvailable(ctx) {
ctx.ServerError("SearchResults", err)
Expand Down
12 changes: 11 additions & 1 deletion routers/web/user/code.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ package user
import (
"net/http"

"code.gitea.io/gitea/models/db"
repo_model "code.gitea.io/gitea/models/repo"
"code.gitea.io/gitea/modules/base"
code_indexer "code.gitea.io/gitea/modules/indexer/code"
Expand Down Expand Up @@ -74,7 +75,16 @@ func CodeSearch(ctx *context.Context) {
)

if len(repoIDs) > 0 {
total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy)
total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
RepoIDs: repoIDs,
Keyword: keyword,
IsKeywordFuzzy: isFuzzy,
Language: language,
Paginator: &db.ListOptions{
Page: page,
PageSize: setting.UI.RepoSearchPagingNum,
},
})
if err != nil {
if code_indexer.IsAvailable(ctx) {
ctx.ServerError("SearchResults", err)
Expand Down