From 12a488c9fa457ef3a609ecc40090fc3646035ad4 Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Tue, 12 Mar 2024 14:15:26 +0100 Subject: [PATCH 1/5] meilisearch double quote on "match" query --- .../indexer/issues/meilisearch/meilisearch.go | 20 ++++++++++++++++++- .../issues/meilisearch/meilisearch_test.go | 6 ++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go index c429920065308..e1f8e59fa675b 100644 --- a/modules/indexer/issues/meilisearch/meilisearch.go +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -6,6 +6,7 @@ package meilisearch import ( "context" "errors" + "fmt" "strconv" "strings" @@ -217,7 +218,12 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( skip, limit := indexer_internal.ParsePaginator(options.Paginator, maxTotalHits) - searchRes, err := b.inner.Client.Index(b.inner.VersionedIndexName()).Search(options.Keyword, &meilisearch.SearchRequest{ + keyword := options.Keyword + if !options.IsFuzzyKeyword { + keyword = doubleQuoteKeyword(keyword) + } + + searchRes, err := b.inner.Client.Index(b.inner.VersionedIndexName()).Search(keyword, &meilisearch.SearchRequest{ Filter: query.Statement(), Limit: int64(limit), Offset: int64(skip), @@ -247,6 +253,18 @@ func parseSortBy(sortBy internal.SortBy) string { return field + ":asc" } +func doubleQuoteKeyword(k string) string { + kp := strings.Split(k, " ") + parts := 0 + for i := range kp { + if kp[i] != "" { + kp[parts] = fmt.Sprintf(`"%s"`, kp[i]) + parts++ + } + } + return strings.Join(kp[:parts], " ") +} + // nonFuzzyWorkaround is needed as meilisearch does not have an exact search // and you can only change "typo tolerance" per index. So we have to post-filter the results // https://www.meilisearch.com/docs/learn/configuration/typo_tolerance#configuring-typo-tolerance diff --git a/modules/indexer/issues/meilisearch/meilisearch_test.go b/modules/indexer/issues/meilisearch/meilisearch_test.go index ecce704236b99..8c451b43d9ff3 100644 --- a/modules/indexer/issues/meilisearch/meilisearch_test.go +++ b/modules/indexer/issues/meilisearch/meilisearch_test.go @@ -93,3 +93,9 @@ func TestNonFuzzyWorkaround(t *testing.T) { assert.NoError(t, err) assert.EqualValues(t, []internal.Match{{ID: 11}, {ID: 22}, {ID: 33}}, hits) } + +func TestDoubleQuoteKeyword(t *testing.T) { + assert.EqualValues(t, "", doubleQuoteKeyword("")) + assert.EqualValues(t, `"a" "b" "c"`, doubleQuoteKeyword("a b c")) + assert.EqualValues(t, `"a" "d" "g"`, doubleQuoteKeyword("a d g")) +} From 069b136d1be321da6973d5b1e1979d1013dfa4ff Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Tue, 12 Mar 2024 16:32:46 +0100 Subject: [PATCH 2/5] debug if the filter still do have to do work --- modules/indexer/issues/meilisearch/meilisearch.go | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go index e1f8e59fa675b..822419937fb40 100644 --- a/modules/indexer/issues/meilisearch/meilisearch.go +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -13,6 +13,7 @@ import ( indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_meilisearch "code.gitea.io/gitea/modules/indexer/internal/meilisearch" "code.gitea.io/gitea/modules/indexer/issues/internal" + "code.gitea.io/gitea/modules/log" "github.com/meilisearch/meilisearch-go" ) @@ -319,6 +320,8 @@ func nonFuzzyWorkaround(searchRes *meilisearch.SearchResponse, keyword string, i } // we got no match + issueID, _ := hit["id"].(float64) + log.Debug("filtered out hit with id %d by nonFuzzyWorkaround", issueID) return false, nil }() From a47c1784309e92a76fefaf70b3151455c47abc1d Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Tue, 12 Mar 2024 16:56:58 +0100 Subject: [PATCH 3/5] remove workaround --- .../indexer/issues/meilisearch/meilisearch.go | 63 +------------------ .../issues/meilisearch/meilisearch_test.go | 16 ++--- 2 files changed, 7 insertions(+), 72 deletions(-) diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go index 822419937fb40..b5335c07fb3c6 100644 --- a/modules/indexer/issues/meilisearch/meilisearch.go +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -13,7 +13,6 @@ import ( indexer_internal "code.gitea.io/gitea/modules/indexer/internal" inner_meilisearch "code.gitea.io/gitea/modules/indexer/internal/meilisearch" "code.gitea.io/gitea/modules/indexer/issues/internal" - "code.gitea.io/gitea/modules/log" "github.com/meilisearch/meilisearch-go" ) @@ -235,7 +234,7 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( return nil, err } - hits, err := nonFuzzyWorkaround(searchRes, options.Keyword, options.IsFuzzyKeyword) + hits, err := convertHits(searchRes) if err != nil { return nil, err } @@ -266,11 +265,7 @@ func doubleQuoteKeyword(k string) string { return strings.Join(kp[:parts], " ") } -// nonFuzzyWorkaround is needed as meilisearch does not have an exact search -// and you can only change "typo tolerance" per index. So we have to post-filter the results -// https://www.meilisearch.com/docs/learn/configuration/typo_tolerance#configuring-typo-tolerance -// TODO: remove once https://github.com/orgs/meilisearch/discussions/377 is addressed -func nonFuzzyWorkaround(searchRes *meilisearch.SearchResponse, keyword string, isFuzzy bool) ([]internal.Match, error) { +func convertHits(searchRes *meilisearch.SearchResponse) ([]internal.Match, error) { hits := make([]internal.Match, 0, len(searchRes.Hits)) for _, hit := range searchRes.Hits { hit, ok := hit.(map[string]any) @@ -278,63 +273,11 @@ func nonFuzzyWorkaround(searchRes *meilisearch.SearchResponse, keyword string, i return nil, ErrMalformedResponse } - if !isFuzzy { - keyword = strings.ToLower(keyword) - - // declare a anon func to check if the title, content or at least one comment contains the keyword - found, err := func() (bool, error) { - // check if title match first - title, ok := hit["title"].(string) - if !ok { - return false, ErrMalformedResponse - } else if strings.Contains(strings.ToLower(title), keyword) { - return true, nil - } - - // check if content has a match - content, ok := hit["content"].(string) - if !ok { - return false, ErrMalformedResponse - } else if strings.Contains(strings.ToLower(content), keyword) { - return true, nil - } - - // now check for each comment if one has a match - // so we first try to cast and skip if there are no comments - comments, ok := hit["comments"].([]any) - if !ok { - return false, ErrMalformedResponse - } else if len(comments) == 0 { - return false, nil - } - - // now we iterate over all and report as soon as we detect one match - for i := range comments { - comment, ok := comments[i].(string) - if !ok { - return false, ErrMalformedResponse - } - if strings.Contains(strings.ToLower(comment), keyword) { - return true, nil - } - } - - // we got no match - issueID, _ := hit["id"].(float64) - log.Debug("filtered out hit with id %d by nonFuzzyWorkaround", issueID) - return false, nil - }() - - if err != nil { - return nil, err - } else if !found { - continue - } - } issueID, ok := hit["id"].(float64) if !ok { return nil, ErrMalformedResponse } + hits = append(hits, internal.Match{ ID: int64(issueID), }) diff --git a/modules/indexer/issues/meilisearch/meilisearch_test.go b/modules/indexer/issues/meilisearch/meilisearch_test.go index 8c451b43d9ff3..7ce41e87dabec 100644 --- a/modules/indexer/issues/meilisearch/meilisearch_test.go +++ b/modules/indexer/issues/meilisearch/meilisearch_test.go @@ -53,11 +53,10 @@ func TestMeilisearchIndexer(t *testing.T) { tests.TestIndexer(t, indexer) } -func TestNonFuzzyWorkaround(t *testing.T) { - // get unexpected return - _, err := nonFuzzyWorkaround(&meilisearch.SearchResponse{ +func TestConvertHits(t *testing.T) { + _, err := convertHits(&meilisearch.SearchResponse{ Hits: []any{"aa", "bb", "cc", "dd"}, - }, "bowling", false) + }) assert.ErrorIs(t, err, ErrMalformedResponse) validResponse := &meilisearch.SearchResponse{ @@ -82,14 +81,7 @@ func TestNonFuzzyWorkaround(t *testing.T) { }, }, } - - // nonFuzzy - hits, err := nonFuzzyWorkaround(validResponse, "bowling", false) - assert.NoError(t, err) - assert.EqualValues(t, []internal.Match{{ID: 11}, {ID: 22}}, hits) - - // fuzzy - hits, err = nonFuzzyWorkaround(validResponse, "bowling", true) + hits, err := convertHits(validResponse) assert.NoError(t, err) assert.EqualValues(t, []internal.Match{{ID: 11}, {ID: 22}, {ID: 33}}, hits) } From d161622b2495988bd8e1f05b3ff308118bf97ce8 Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Thu, 14 Mar 2024 13:46:56 +0100 Subject: [PATCH 4/5] add code comment --- modules/indexer/issues/meilisearch/meilisearch.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go index b5335c07fb3c6..484a3938c6ebe 100644 --- a/modules/indexer/issues/meilisearch/meilisearch.go +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -220,6 +220,8 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) ( keyword := options.Keyword if !options.IsFuzzyKeyword { + // to make it non fuzzy ("typo tolerance" in meilisearch terms), we have to quote the keyword(s) + // https://www.meilisearch.com/docs/reference/api/search#phrase-search keyword = doubleQuoteKeyword(keyword) } From 6b9f4ebe8bc8a21805fc20513b63a85225801ed5 Mon Sep 17 00:00:00 2001 From: "m.huber" Date: Fri, 15 Mar 2024 23:59:17 +0100 Subject: [PATCH 5/5] handle quotes in input for doubleQuoteKeyword() --- modules/indexer/issues/meilisearch/meilisearch.go | 5 +++-- modules/indexer/issues/meilisearch/meilisearch_test.go | 2 ++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/modules/indexer/issues/meilisearch/meilisearch.go b/modules/indexer/issues/meilisearch/meilisearch.go index 01f4736b6e73f..b735c26968525 100644 --- a/modules/indexer/issues/meilisearch/meilisearch.go +++ b/modules/indexer/issues/meilisearch/meilisearch.go @@ -259,8 +259,9 @@ func doubleQuoteKeyword(k string) string { kp := strings.Split(k, " ") parts := 0 for i := range kp { - if kp[i] != "" { - kp[parts] = fmt.Sprintf(`"%s"`, kp[i]) + part := strings.Trim(kp[i], "\"") + if part != "" { + kp[parts] = fmt.Sprintf(`"%s"`, part) parts++ } } diff --git a/modules/indexer/issues/meilisearch/meilisearch_test.go b/modules/indexer/issues/meilisearch/meilisearch_test.go index 7ce41e87dabec..4666df136a057 100644 --- a/modules/indexer/issues/meilisearch/meilisearch_test.go +++ b/modules/indexer/issues/meilisearch/meilisearch_test.go @@ -90,4 +90,6 @@ func TestDoubleQuoteKeyword(t *testing.T) { assert.EqualValues(t, "", doubleQuoteKeyword("")) assert.EqualValues(t, `"a" "b" "c"`, doubleQuoteKeyword("a b c")) assert.EqualValues(t, `"a" "d" "g"`, doubleQuoteKeyword("a d g")) + assert.EqualValues(t, `"a" "d" "g"`, doubleQuoteKeyword("a d g")) + assert.EqualValues(t, `"a" "d" "g"`, doubleQuoteKeyword(`a "" "d" """g`)) }