From edad606ce50592c52af39b50efa20cd607a471f6 Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Fri, 8 Mar 2024 02:28:24 +0100 Subject: [PATCH 01/13] use "isFuzzy" instead of "isMatch" because then we can use same name for issue search where default should be false --- modules/indexer/code/bleve/bleve.go | 4 ++-- modules/indexer/code/elasticsearch/elasticsearch.go | 8 ++++---- modules/indexer/code/indexer_test.go | 2 +- modules/indexer/code/internal/indexer.go | 4 ++-- modules/indexer/code/search.go | 5 +++-- modules/indexer/issues/internal/model.go | 2 ++ routers/web/explore/code.go | 4 ++-- routers/web/repo/search.go | 4 ++-- routers/web/user/code.go | 4 ++-- 9 files changed, 20 insertions(+), 17 deletions(-) diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index 8ba50ed77c938..7cec539b5881e 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -233,13 +233,13 @@ func (b *Indexer) Delete(_ context.Context, repoID int64) error { // Search searches for files in the specified repo. // Returns the matching file-paths -func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { +func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { var ( indexerQuery query.Query keywordQuery query.Query ) - if isMatch { + if !isFuzzy { prefixQuery := bleve.NewPrefixQuery(keyword) prefixQuery.FieldVal = "Content" keywordQuery = prefixQuery diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index 0f70f1348552c..065b0b20618e7 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -281,10 +281,10 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan } // Search searches for codes and language stats by given conditions. -func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { - searchType := esMultiMatchTypeBestFields - if isMatch { - searchType = esMultiMatchTypePhrasePrefix +func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { + searchType := esMultiMatchTypePhrasePrefix + if isFuzzy { + searchType = esMultiMatchTypeBestFields } kwQuery := elastic.NewMultiMatchQuery(keyword, "content").Type(searchType) diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go index 5eb8e61e3db4d..23dbd63410541 100644 --- a/modules/indexer/code/indexer_test.go +++ b/modules/indexer/code/indexer_test.go @@ -70,7 +70,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { for _, kw := range keywords { t.Run(kw.Keyword, func(t *testing.T) { - total, res, langs, err := indexer.Search(context.TODO(), kw.RepoIDs, "", kw.Keyword, 1, 10, false) + total, res, langs, err := indexer.Search(context.TODO(), kw.RepoIDs, "", kw.Keyword, 1, 10, true) assert.NoError(t, err) assert.Len(t, kw.IDs, int(total)) assert.Len(t, langs, kw.Langs) diff --git a/modules/indexer/code/internal/indexer.go b/modules/indexer/code/internal/indexer.go index da3ac3623c92f..6b37aee239f00 100644 --- a/modules/indexer/code/internal/indexer.go +++ b/modules/indexer/code/internal/indexer.go @@ -16,7 +16,7 @@ type Indexer interface { internal.Indexer Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error Delete(ctx context.Context, repoID int64) error - Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error) + Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error) } // NewDummyIndexer returns a dummy indexer @@ -38,6 +38,6 @@ func (d *dummyIndexer) Delete(ctx context.Context, repoID int64) error { return fmt.Errorf("indexer is not ready") } -func (d *dummyIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error) { +func (d *dummyIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, ifFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error) { return 0, nil, nil, fmt.Errorf("indexer is not ready") } diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go index 2ddc2397fa191..89a62a8d3e2dd 100644 --- a/modules/indexer/code/search.go +++ b/modules/indexer/code/search.go @@ -124,12 +124,13 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res } // PerformSearch perform a search on a repository -func PerformSearch(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int, []*Result, []*internal.SearchResultLanguages, error) { +// if isFuzzy is true set the Damerau-Levenshtein distance from 0 to 2 +func PerformSearch(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int, []*Result, []*internal.SearchResultLanguages, error) { if len(keyword) == 0 { return 0, nil, nil, nil } - total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, repoIDs, language, keyword, page, pageSize, isMatch) + total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, repoIDs, language, keyword, page, pageSize, isFuzzy) if err != nil { return 0, nil, nil, err } diff --git a/modules/indexer/issues/internal/model.go b/modules/indexer/issues/internal/model.go index 947335d8ce9cb..5beb6c0d2f5fc 100644 --- a/modules/indexer/issues/internal/model.go +++ b/modules/indexer/issues/internal/model.go @@ -74,6 +74,8 @@ type SearchResult struct { type SearchOptions struct { Keyword string // keyword to search + IsFuzzy bool // if true set the Damerau-Levenshtein distance from 0 to 2 + RepoIDs []int64 // repository IDs which the issues belong to AllPublic bool // if include all public repositories diff --git a/routers/web/explore/code.go b/routers/web/explore/code.go index 2cde8b655ee7b..a6bc71ac9cde0 100644 --- a/routers/web/explore/code.go +++ b/routers/web/explore/code.go @@ -35,7 +35,7 @@ func Code(ctx *context.Context) { keyword := ctx.FormTrim("q") queryType := ctx.FormTrim("t") - isMatch := queryType == "match" + isFuzzy := queryType != "match" ctx.Data["Keyword"] = keyword ctx.Data["Language"] = language @@ -77,7 +77,7 @@ func Code(ctx *context.Context) { ) if (len(repoIDs) > 0) || isAdmin { - total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isMatch) + total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy) if err != nil { if code_indexer.IsAvailable(ctx) { ctx.ServerError("SearchResults", err) diff --git a/routers/web/repo/search.go b/routers/web/repo/search.go index c53d8fd918a1c..766dd5726aa8d 100644 --- a/routers/web/repo/search.go +++ b/routers/web/repo/search.go @@ -25,7 +25,7 @@ func Search(ctx *context.Context) { keyword := ctx.FormTrim("q") queryType := ctx.FormTrim("t") - isMatch := queryType == "match" + isFuzzy := queryType != "match" ctx.Data["Keyword"] = keyword ctx.Data["Language"] = language @@ -43,7 +43,7 @@ func Search(ctx *context.Context) { } total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, []int64{ctx.Repo.Repository.ID}, - language, keyword, page, setting.UI.RepoSearchPagingNum, isMatch) + language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy) if err != nil { if code_indexer.IsAvailable(ctx) { ctx.ServerError("SearchResults", err) diff --git a/routers/web/user/code.go b/routers/web/user/code.go index eb711b76ebb08..8613d38b65a66 100644 --- a/routers/web/user/code.go +++ b/routers/web/user/code.go @@ -40,7 +40,7 @@ func CodeSearch(ctx *context.Context) { keyword := ctx.FormTrim("q") queryType := ctx.FormTrim("t") - isMatch := queryType == "match" + isFuzzy := queryType != "match" ctx.Data["Keyword"] = keyword ctx.Data["Language"] = language @@ -75,7 +75,7 @@ func CodeSearch(ctx *context.Context) { ) if len(repoIDs) > 0 { - total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isMatch) + total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy) if err != nil { if code_indexer.IsAvailable(ctx) { ctx.ServerError("SearchResults", err) From f6c89b9b4e7e1807df59301b031d56148898ffa0 Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Fri, 8 Mar 2024 03:15:52 +0100 Subject: [PATCH 02/13] meilisearch has only global setting to enable/disable fuzzy search, so we have to post-filter the hits --- modules/indexer/code/bleve/bleve.go | 10 +++++----- modules/indexer/internal/bleve/query.go | 7 +++++++ modules/indexer/issues/bleve/bleve.go | 17 ++++++++++++----- .../issues/elasticsearch/elasticsearch.go | 12 +++++++++++- modules/indexer/issues/internal/model.go | 2 +- .../indexer/issues/meilisearch/meilisearch.go | 19 ++++++++++++++++++- 6 files changed, 54 insertions(+), 13 deletions(-) diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index 7cec539b5881e..107dd23598d1b 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -239,15 +239,15 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword keywordQuery query.Query ) - if !isFuzzy { - prefixQuery := bleve.NewPrefixQuery(keyword) - prefixQuery.FieldVal = "Content" - keywordQuery = prefixQuery - } else { + if isFuzzy { phraseQuery := bleve.NewMatchPhraseQuery(keyword) phraseQuery.FieldVal = "Content" phraseQuery.Analyzer = repoIndexerAnalyzer keywordQuery = phraseQuery + } else { + prefixQuery := bleve.NewPrefixQuery(keyword) + prefixQuery.FieldVal = "Content" + keywordQuery = prefixQuery } if len(repoIDs) > 0 { diff --git a/modules/indexer/internal/bleve/query.go b/modules/indexer/internal/bleve/query.go index c7d66538c1263..31415273568dd 100644 --- a/modules/indexer/internal/bleve/query.go +++ b/modules/indexer/internal/bleve/query.go @@ -25,6 +25,13 @@ func MatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQue return q } +// PrefixQuery generates a match prefix query for the given phrase and field +func PrefixQuery(matchPhrase, field string) *query.PrefixQuery { + q := bleve.NewPrefixQuery(matchPhrase) + q.FieldVal = field + return q +} + // BoolFieldQuery generates a bool field query for the given value and field func BoolFieldQuery(value bool, field string) *query.BoolFieldQuery { q := bleve.NewBoolFieldQuery(value) diff --git a/modules/indexer/issues/bleve/bleve.go b/modules/indexer/issues/bleve/bleve.go index 6a5d65cb665d0..aaea854efa031 100644 --- a/modules/indexer/issues/bleve/bleve.go +++ b/modules/indexer/issues/bleve/bleve.go @@ -156,12 +156,19 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( var queries []query.Query if options.Keyword != "" { - keywordQueries := []query.Query{ - inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer), - inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer), - inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer), + if options.IsFuzzyKeyword { + queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ + inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer), + inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer), + inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer), + }...)) + } else { + queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{ + inner_bleve.PrefixQuery(options.Keyword, "title"), + inner_bleve.PrefixQuery(options.Keyword, "content"), + inner_bleve.PrefixQuery(options.Keyword, "comments"), + }...)) } - queries = append(queries, bleve.NewDisjunctionQuery(keywordQueries...)) } if len(options.RepoIDs) > 0 || options.AllPublic { diff --git a/modules/indexer/issues/elasticsearch/elasticsearch.go b/modules/indexer/issues/elasticsearch/elasticsearch.go index 3acd3ade71528..0077da263a7cc 100644 --- a/modules/indexer/issues/elasticsearch/elasticsearch.go +++ b/modules/indexer/issues/elasticsearch/elasticsearch.go @@ -19,6 +19,10 @@ import ( const ( issueIndexerLatestVersion = 1 + // multi-match-types, currently only 2 types are used + // Reference: https://www.elastic.co/guide/en/elasticsearch/reference/7.0/query-dsl-multi-match-query.html#multi-match-types + esMultiMatchTypeBestFields = "best_fields" + esMultiMatchTypePhrasePrefix = "phrase_prefix" ) var _ internal.Indexer = &Indexer{} @@ -141,7 +145,13 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( query := elastic.NewBoolQuery() if options.Keyword != "" { - query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments")) + + searchType := esMultiMatchTypePhrasePrefix + if options.IsFuzzyKeyword { + searchType = esMultiMatchTypeBestFields + } + + query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(searchType)) } if len(options.RepoIDs) > 0 { diff --git a/modules/indexer/issues/internal/model.go b/modules/indexer/issues/internal/model.go index 5beb6c0d2f5fc..d41fec4aba87d 100644 --- a/modules/indexer/issues/internal/model.go +++ b/modules/indexer/issues/internal/model.go @@ -74,7 +74,7 @@ type SearchResult struct { type SearchOptions struct { Keyword string // keyword to search - IsFuzzy bool // if true set the Damerau-Levenshtein distance from 0 to 2 + IsFuzzyKeyword bool // if false the levenshtein distance is 0 RepoIDs []int64 // repository IDs which the issues belong to AllPublic bool // if include all public repositories diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go index 325883196bb27..91212e4360d0f 100644 --- a/modules/indexer/issues/meilisearch/meilisearch.go +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -16,7 +16,7 @@ import ( ) const ( - issueIndexerLatestVersion = 2 + issueIndexerLatestVersion = 3 // TODO: make this configurable if necessary maxTotalHits = 10000 @@ -47,6 +47,9 @@ func NewIndexer(url, apiKey, indexerName string) *Indexer { }, DisplayedAttributes: []string{ "id", + "title", + "content", + "comments", }, FilterableAttributes: []string{ "repo_id", @@ -223,6 +226,20 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( hits := make([]internal.Match, 0, len(searchRes.Hits)) for _, hit := range searchRes.Hits { + if !options.IsFuzzyKeyword { + // as meilisearch does not have a non-fuzzy search and you can only change "typo tolerance" per index we have to post-filter the results + // https://www.meilisearch.com/docs/learn/configuration/typo_tolerance#configuring-typo-tolerance + title, _ := hit.(map[string]any)["title"].(string) + if !strings.Contains(title, options.Keyword) { + content, _ := hit.(map[string]any)["content"].(string) + if !strings.Contains(content, options.Keyword) { + comments, _ := hit.(map[string]any)["comments"].(string) + if !strings.Contains(comments, options.Keyword) { + continue + } + } + } + } hits = append(hits, internal.Match{ ID: int64(hit.(map[string]any)["id"].(float64)), }) From cb0dbffee6e121ddee7371a65ff5248603d74dfb Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Fri, 8 Mar 2024 15:12:11 +0100 Subject: [PATCH 03/13] add workaround for meilisearch --- .../indexer/issues/meilisearch/meilisearch.go | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go index 91212e4360d0f..aea808307e894 100644 --- a/modules/indexer/issues/meilisearch/meilisearch.go +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -229,12 +229,23 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( if !options.IsFuzzyKeyword { // as meilisearch does not have a non-fuzzy search and you can only change "typo tolerance" per index we have to post-filter the results // https://www.meilisearch.com/docs/learn/configuration/typo_tolerance#configuring-typo-tolerance + // TODO: remove once https://github.com/orgs/meilisearch/discussions/377 is addressed + keyword := strings.ToLower(options.Keyword) title, _ := hit.(map[string]any)["title"].(string) - if !strings.Contains(title, options.Keyword) { + if !strings.Contains(strings.ToLower(title), keyword) { content, _ := hit.(map[string]any)["content"].(string) - if !strings.Contains(content, options.Keyword) { - comments, _ := hit.(map[string]any)["comments"].(string) - if !strings.Contains(comments, options.Keyword) { + if !strings.Contains(strings.ToLower(content), keyword) { + comments, _ := hit.(map[string]any)["comments"].([]any) + found := false + for i := range comments { + comment, _ := comments[i].(string) + if strings.Contains(strings.ToLower(comment), keyword) { + found = true + break + } + } + if !found { + // we could not find it move on ... continue } } From 1222cbac2081a41025529ed81c5d2f1bf96440b2 Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Fri, 8 Mar 2024 15:18:45 +0100 Subject: [PATCH 04/13] Update modules/indexer/internal/bleve/query.go --- modules/indexer/internal/bleve/query.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/indexer/internal/bleve/query.go b/modules/indexer/internal/bleve/query.go index 31415273568dd..2a427c402026a 100644 --- a/modules/indexer/internal/bleve/query.go +++ b/modules/indexer/internal/bleve/query.go @@ -25,9 +25,9 @@ func MatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQue return q } -// PrefixQuery generates a match prefix query for the given phrase and field -func PrefixQuery(matchPhrase, field string) *query.PrefixQuery { - q := bleve.NewPrefixQuery(matchPhrase) +// PrefixQuery generates a match prefix query for the given prefix and field +func PrefixQuery(matchPrefix, field string) *query.PrefixQuery { + q := bleve.NewPrefixQuery(matchPrefix) q.FieldVal = field return q } From 0d38b47b2598f8b3627cff60346bad35bd731bfb Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Fri, 8 Mar 2024 15:22:13 +0100 Subject: [PATCH 05/13] just some wordings --- modules/indexer/issues/meilisearch/meilisearch.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go index aea808307e894..1caa44499e6f0 100644 --- a/modules/indexer/issues/meilisearch/meilisearch.go +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -245,7 +245,7 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( } } if !found { - // we could not find it move on ... + // we could not have a direct match, so ignore that hit and move on ... continue } } From af30719d4d5c2c3dff1631c9fa25d9c97f5df039 Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Fri, 8 Mar 2024 19:51:09 +0100 Subject: [PATCH 06/13] Update modules/indexer/issues/meilisearch/meilisearch.go Co-authored-by: delvh --- modules/indexer/issues/meilisearch/meilisearch.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go index 1caa44499e6f0..da5a636bf8253 100644 --- a/modules/indexer/issues/meilisearch/meilisearch.go +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -227,7 +227,7 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( hits := make([]internal.Match, 0, len(searchRes.Hits)) for _, hit := range searchRes.Hits { if !options.IsFuzzyKeyword { - // as meilisearch does not have a non-fuzzy search and you can only change "typo tolerance" per index we have to post-filter the results + // as meilisearch does not have an exact search and you can only change "typo tolerance" per index we have to post-filter the results // https://www.meilisearch.com/docs/learn/configuration/typo_tolerance#configuring-typo-tolerance // TODO: remove once https://github.com/orgs/meilisearch/discussions/377 is addressed keyword := strings.ToLower(options.Keyword) From 5a7d34888b9a7d41c004cfbca08ea3cb97660de1 Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Fri, 8 Mar 2024 19:55:00 +0100 Subject: [PATCH 07/13] Update modules/indexer/code/internal/indexer.go --- modules/indexer/code/internal/indexer.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/indexer/code/internal/indexer.go b/modules/indexer/code/internal/indexer.go index 6b37aee239f00..c92419deb22f7 100644 --- a/modules/indexer/code/internal/indexer.go +++ b/modules/indexer/code/internal/indexer.go @@ -38,6 +38,6 @@ func (d *dummyIndexer) Delete(ctx context.Context, repoID int64) error { return fmt.Errorf("indexer is not ready") } -func (d *dummyIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, ifFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error) { +func (d *dummyIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error) { return 0, nil, nil, fmt.Errorf("indexer is not ready") } From a94d250f0e694402167d58aea6c67abc458b1f84 Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Fri, 8 Mar 2024 20:32:07 +0100 Subject: [PATCH 08/13] add error handling --- .../indexer/issues/meilisearch/meilisearch.go | 81 +++++++++++++------ 1 file changed, 58 insertions(+), 23 deletions(-) diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go index da5a636bf8253..ca708df567058 100644 --- a/modules/indexer/issues/meilisearch/meilisearch.go +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -5,6 +5,7 @@ package meilisearch import ( "context" + "errors" "strconv" "strings" @@ -22,6 +23,10 @@ const ( maxTotalHits = 10000 ) +var ( + ErrMalformedResponse = errors.New("meilisearch returned unexpected malformed content") +) + var _ internal.Indexer = &Indexer{} // Indexer implements Indexer interface @@ -224,21 +229,59 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( return nil, err } + hits, err := nonFuzzyWorkaround(searchRes, options.Keyword, options.IsFuzzyKeyword) + if err != nil { + return nil, err + } + + return &internal.SearchResult{ + Total: searchRes.EstimatedTotalHits, + Hits: hits, + }, nil +} + +func parseSortBy(sortBy internal.SortBy) string { + field := strings.TrimPrefix(string(sortBy), "-") + if strings.HasPrefix(string(sortBy), "-") { + return field + ":desc" + } + return field + ":asc" +} + +// nonFuzzyWorkaround is needed as meilisearch does not have an exact search +// and you can only change "typo tolerance" per index we have to post-filter the results +// https://www.meilisearch.com/docs/learn/configuration/typo_tolerance#configuring-typo-tolerance +// TODO: remove once https://github.com/orgs/meilisearch/discussions/377 is addressed +func nonFuzzyWorkaround(searchRes *meilisearch.SearchResponse, keyword string, isFuzzy bool) ([]internal.Match, error) { hits := make([]internal.Match, 0, len(searchRes.Hits)) for _, hit := range searchRes.Hits { - if !options.IsFuzzyKeyword { - // as meilisearch does not have an exact search and you can only change "typo tolerance" per index we have to post-filter the results - // https://www.meilisearch.com/docs/learn/configuration/typo_tolerance#configuring-typo-tolerance - // TODO: remove once https://github.com/orgs/meilisearch/discussions/377 is addressed - keyword := strings.ToLower(options.Keyword) - title, _ := hit.(map[string]any)["title"].(string) + hit, ok := hit.(map[string]any) + if !ok { + return nil, ErrMalformedResponse + } + + if !isFuzzy { + keyword = strings.ToLower(keyword) + title, ok := hit["title"].(string) + if !ok { + return nil, ErrMalformedResponse + } if !strings.Contains(strings.ToLower(title), keyword) { - content, _ := hit.(map[string]any)["content"].(string) + content, ok := hit["content"].(string) + if !ok { + return nil, ErrMalformedResponse + } if !strings.Contains(strings.ToLower(content), keyword) { - comments, _ := hit.(map[string]any)["comments"].([]any) + comments, ok := hit["comments"].([]any) + if !ok { + return nil, ErrMalformedResponse + } found := false for i := range comments { - comment, _ := comments[i].(string) + comment, ok := comments[i].(string) + if !ok { + return nil, ErrMalformedResponse + } if strings.Contains(strings.ToLower(comment), keyword) { found = true break @@ -251,21 +294,13 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( } } } + issueID, ok := hit["id"].(float64) + if !ok { + return nil, ErrMalformedResponse + } hits = append(hits, internal.Match{ - ID: int64(hit.(map[string]any)["id"].(float64)), + ID: int64(issueID), }) } - - return &internal.SearchResult{ - Total: searchRes.EstimatedTotalHits, - Hits: hits, - }, nil -} - -func parseSortBy(sortBy internal.SortBy) string { - field := strings.TrimPrefix(string(sortBy), "-") - if strings.HasPrefix(string(sortBy), "-") { - return field + ":desc" - } - return field + ":asc" + return hits, nil } From 02e0c4152350f4e5a588a5805a526c6a34519233 Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Fri, 8 Mar 2024 20:33:08 +0100 Subject: [PATCH 09/13] just some documentation --- modules/indexer/issues/meilisearch/meilisearch.go | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go index ca708df567058..ea83e3dc7f912 100644 --- a/modules/indexer/issues/meilisearch/meilisearch.go +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -24,6 +24,7 @@ const ( ) var ( + // ErrMalformedResponse is never expected as we initialize the indexer ourself and so define the types. ErrMalformedResponse = errors.New("meilisearch returned unexpected malformed content") ) From be6530b27a3427fd8bbd3ad3419f3ee34079c704 Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Fri, 8 Mar 2024 20:50:05 +0100 Subject: [PATCH 10/13] unit-tests --- .../indexer/issues/meilisearch/meilisearch.go | 6 +-- .../issues/meilisearch/meilisearch_test.go | 45 +++++++++++++++++++ 2 files changed, 47 insertions(+), 4 deletions(-) diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go index ea83e3dc7f912..648a582a6cb2a 100644 --- a/modules/indexer/issues/meilisearch/meilisearch.go +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -23,10 +23,8 @@ const ( maxTotalHits = 10000 ) -var ( - // ErrMalformedResponse is never expected as we initialize the indexer ourself and so define the types. - ErrMalformedResponse = errors.New("meilisearch returned unexpected malformed content") -) +// ErrMalformedResponse is never expected as we initialize the indexer ourself and so define the types. +var ErrMalformedResponse = errors.New("meilisearch returned unexpected malformed content") var _ internal.Indexer = &Indexer{} diff --git a/modules/indexer/issues/meilisearch/meilisearch_test.go b/modules/indexer/issues/meilisearch/meilisearch_test.go index 3d7237268e1bd..ecce704236b99 100644 --- a/modules/indexer/issues/meilisearch/meilisearch_test.go +++ b/modules/indexer/issues/meilisearch/meilisearch_test.go @@ -10,7 +10,11 @@ import ( "testing" "time" + "code.gitea.io/gitea/modules/indexer/issues/internal" "code.gitea.io/gitea/modules/indexer/issues/internal/tests" + + "github.com/meilisearch/meilisearch-go" + "github.com/stretchr/testify/assert" ) func TestMeilisearchIndexer(t *testing.T) { @@ -48,3 +52,44 @@ func TestMeilisearchIndexer(t *testing.T) { tests.TestIndexer(t, indexer) } + +func TestNonFuzzyWorkaround(t *testing.T) { + // get unexpected return + _, err := nonFuzzyWorkaround(&meilisearch.SearchResponse{ + Hits: []any{"aa", "bb", "cc", "dd"}, + }, "bowling", false) + assert.ErrorIs(t, err, ErrMalformedResponse) + + validResponse := &meilisearch.SearchResponse{ + Hits: []any{ + map[string]any{ + "id": float64(11), + "title": "a title", + "content": "issue body with no match", + "comments": []any{"hey whats up?", "I'm currently bowling", "nice"}, + }, + map[string]any{ + "id": float64(22), + "title": "Bowling as title", + "content": "", + "comments": []any{}, + }, + map[string]any{ + "id": float64(33), + "title": "Bowl-ing as fuzzy match", + "content": "", + "comments": []any{}, + }, + }, + } + + // nonFuzzy + hits, err := nonFuzzyWorkaround(validResponse, "bowling", false) + assert.NoError(t, err) + assert.EqualValues(t, []internal.Match{{ID: 11}, {ID: 22}}, hits) + + // fuzzy + hits, err = nonFuzzyWorkaround(validResponse, "bowling", true) + assert.NoError(t, err) + assert.EqualValues(t, []internal.Match{{ID: 11}, {ID: 22}, {ID: 33}}, hits) +} From c77e46babd15d605f1d8772893efb4f9bc376bc0 Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Fri, 8 Mar 2024 20:59:01 +0100 Subject: [PATCH 11/13] Update modules/indexer/issues/meilisearch/meilisearch.go --- modules/indexer/issues/meilisearch/meilisearch.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go index 648a582a6cb2a..35dd6c38bee96 100644 --- a/modules/indexer/issues/meilisearch/meilisearch.go +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -248,7 +248,7 @@ func parseSortBy(sortBy internal.SortBy) string { } // nonFuzzyWorkaround is needed as meilisearch does not have an exact search -// and you can only change "typo tolerance" per index we have to post-filter the results +// and you can only change "typo tolerance" per index. So we have to post-filter the results // https://www.meilisearch.com/docs/learn/configuration/typo_tolerance#configuring-typo-tolerance // TODO: remove once https://github.com/orgs/meilisearch/discussions/377 is addressed func nonFuzzyWorkaround(searchRes *meilisearch.SearchResponse, keyword string, isFuzzy bool) ([]internal.Match, error) { From fcbab85841363f216a2d718876d1626507b928de Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Sat, 9 Mar 2024 01:37:42 +0100 Subject: [PATCH 12/13] flat it out --- .../indexer/issues/meilisearch/meilisearch.go | 62 ++++++++++++------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go index 35dd6c38bee96..72e91e9d41008 100644 --- a/modules/indexer/issues/meilisearch/meilisearch.go +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -261,36 +261,52 @@ func nonFuzzyWorkaround(searchRes *meilisearch.SearchResponse, keyword string, i if !isFuzzy { keyword = strings.ToLower(keyword) - title, ok := hit["title"].(string) - if !ok { - return nil, ErrMalformedResponse - } - if !strings.Contains(strings.ToLower(title), keyword) { + + // declare a anon func to check if the title, content or at least one comment contains the keyword + found, err := func() (bool, error) { + // check if title match first + title, ok := hit["title"].(string) + if !ok { + return false, ErrMalformedResponse + } else if strings.Contains(strings.ToLower(title), keyword) { + return true, nil + } + + // check if content has a match content, ok := hit["content"].(string) if !ok { - return nil, ErrMalformedResponse + return false, ErrMalformedResponse + } else if strings.Contains(strings.ToLower(content), keyword) { + return true, nil } - if !strings.Contains(strings.ToLower(content), keyword) { - comments, ok := hit["comments"].([]any) + + // now check for each comment if one has a match + // so we first try to cast and skip if there are no comments + comments, ok := hit["comments"].([]any) + if !ok { + return false, ErrMalformedResponse + } else if len(comments) == 0 { + return false, nil + } + // now we iterate over all and report as soon as we detect one match + for i := range comments { + comment, ok := comments[i].(string) if !ok { - return nil, ErrMalformedResponse - } - found := false - for i := range comments { - comment, ok := comments[i].(string) - if !ok { - return nil, ErrMalformedResponse - } - if strings.Contains(strings.ToLower(comment), keyword) { - found = true - break - } + return false, ErrMalformedResponse } - if !found { - // we could not have a direct match, so ignore that hit and move on ... - continue + if strings.Contains(strings.ToLower(comment), keyword) { + return true, nil } } + + // we got no match + return false, nil + }() + + if err != nil { + return nil, err + } else if !found { + continue } } issueID, ok := hit["id"].(float64) From 56d7770474b8c156ee76c05c278a93916c85065d Mon Sep 17 00:00:00 2001 From: silverwind Date: Sat, 9 Mar 2024 02:13:16 +0100 Subject: [PATCH 13/13] Update modules/indexer/issues/meilisearch/meilisearch.go --- modules/indexer/issues/meilisearch/meilisearch.go | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go index 72e91e9d41008..c429920065308 100644 --- a/modules/indexer/issues/meilisearch/meilisearch.go +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -288,6 +288,7 @@ func nonFuzzyWorkaround(searchRes *meilisearch.SearchResponse, keyword string, i } else if len(comments) == 0 { return false, nil } + // now we iterate over all and report as soon as we detect one match for i := range comments { comment, ok := comments[i].(string)