Skip to content

Commit

Permalink
fix go-gitea#4479: add fuzzy keyword search to title
Browse files Browse the repository at this point in the history
  • Loading branch information
silkentrance committed Jul 1, 2024
1 parent e82f3ca commit 87bdec2
Show file tree
Hide file tree
Showing 13 changed files with 202 additions and 21 deletions.
6 changes: 6 additions & 0 deletions models/fixtures/access.yml
Original file line number Diff line number Diff line change
Expand Up @@ -171,3 +171,9 @@
user_id: 40
repo_id: 61
mode: 4

-
id: 30
user_id: 40
repo_id: 62
mode: 4
108 changes: 105 additions & 3 deletions models/fixtures/issue.yml
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@
poster_id: 39
original_author_id: 0
name: repo60 pull1
content: content for the 1st issue
content: content for the 1st pr
milestone_id: 0
priority: 0
is_closed: false
Expand All @@ -360,10 +360,10 @@
id: 22
repo_id: 61
index: 1
poster_id: 40
poster_id: 39
original_author_id: 0
name: repo61 pull1
content: content for the 1st issue
content: content for the 1st pr
milestone_id: 0
priority: 0
is_closed: false
Expand All @@ -372,3 +372,105 @@
created_unix: 1707270422
updated_unix: 1707270422
is_locked: false

-
id: 23
repo_id: 62
index: 1
poster_id: 40
original_author_id: 0
name: Ökononmie der UI
content: die ökonomie der UI muß dringend verbessert werden
milestone_id: 0
priority: 0
is_closed: false
is_pull: false
num_comments: 0
created_unix: 1707270422
updated_unix: 1707270422
is_locked: false

-
id: 24
repo_id: 62
index: 2
poster_id: 40
original_author_id: 0
name: 갃갃갃
content: 啊啊啊
milestone_id: 0
priority: 0
is_closed: false
is_pull: false
num_comments: 0
created_unix: 1707270422
updated_unix: 1707270422
is_locked: false

-
id: 25
repo_id: 62
index: 3
poster_id: 40
original_author_id: 0
name: 啊啊啊
content: 갃갃갃
milestone_id: 0
priority: 0
is_closed: false
is_pull: false
num_comments: 0
created_unix: 1707270422
updated_unix: 1707270422
is_locked: false

-
id: 26
repo_id: 62
index: 4
poster_id: 40
original_author_id: 0
name: repo62 pull1 - Ökononmie der UI
content: die ökonomie der UI muß dringend verbessert werden
milestone_id: 0
priority: 0
is_closed: false
is_pull: true
num_comments: 0
created_unix: 1707270412
updated_unix: 1707270412
is_locked: false

-
id: 27
repo_id: 62
index: 5
poster_id: 40
original_author_id: 0
name: repo62 pull2 - 갃갃갃
content: 啊啊啊
milestone_id: 0
priority: 0
is_closed: false
is_pull: true
num_comments: 0
created_unix: 1707270413
updated_unix: 1707270413
is_locked: false

-
id: 28
repo_id: 62
index: 6
poster_id: 40
original_author_id: 0
name: repo62 pull3 - 啊啊啊
content: 갃갃갃
milestone_id: 0
priority: 0
is_closed: false
is_pull: true
num_comments: 0
created_unix: 1707270414
updated_unix: 1707270414
is_locked: false
4 changes: 4 additions & 0 deletions models/fixtures/issue_index.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,7 @@
-
group_id: 51
max_index: 1

-
group_id: 62
max_index: 6
27 changes: 27 additions & 0 deletions models/fixtures/pull_request.yml
Original file line number Diff line number Diff line change
Expand Up @@ -117,3 +117,30 @@
index: 1
head_repo_id: 61
base_repo_id: 61

-
id: 11
type: 0 # gitea pull request
status: 2 # mergeable
issue_id: 26
index: 4
head_repo_id: 62
base_repo_id: 62

-
id: 12
type: 0 # gitea pull request
status: 2 # mergeable
issue_id: 27
index: 5
head_repo_id: 62
base_repo_id: 62

-
id: 13
type: 0 # gitea pull request
status: 2 # mergeable
issue_id: 28
index: 6
head_repo_id: 62
base_repo_id: 62
31 changes: 31 additions & 0 deletions models/fixtures/repository.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1768,3 +1768,34 @@
size: 0
is_fsck_enabled: true
close_issues_via_commit_in_any_branch: false

-
id: 62
owner_id: 41
owner_name: org41
lower_name: repo62
name: repo62
default_branch: main
num_watches: 0
num_stars: 0
num_forks: 0
num_issues: 3
num_closed_issues: 0
num_pulls: 3
num_closed_pulls: 0
num_milestones: 0
num_closed_milestones: 0
num_projects: 0
num_closed_projects: 0
is_private: false
is_empty: false
is_archived: false
is_mirror: false
status: 0
is_fork: false
fork_id: 0
is_template: false
template_id: 0
size: 0
is_fsck_enabled: true
close_issues_via_commit_in_any_branch: false
2 changes: 1 addition & 1 deletion models/fixtures/user.yml
Original file line number Diff line number Diff line change
Expand Up @@ -1510,7 +1510,7 @@
num_followers: 0
num_following: 0
num_stars: 0
num_repos: 1
num_repos: 2
num_teams: 2
num_members: 3
visibility: 0
Expand Down
2 changes: 1 addition & 1 deletion models/issues/issue_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -379,7 +379,7 @@ func TestCountIssues(t *testing.T) {
assert.NoError(t, unittest.PrepareTestDatabase())
count, err := issues_model.CountIssues(db.DefaultContext, &issues_model.IssuesOptions{})
assert.NoError(t, err)
assert.EqualValues(t, 22, count)
assert.EqualValues(t, 28, count)
}

func TestIssueLoadAttributes(t *testing.T) {
Expand Down
6 changes: 3 additions & 3 deletions models/repo/repo_list_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -138,12 +138,12 @@ func getTestCases() []struct {
{
name: "AllPublic/PublicRepositoriesOfUserIncludingCollaborative",
opts: &repo_model.SearchRepoOptions{ListOptions: db.ListOptions{Page: 1, PageSize: 10}, OwnerID: 15, AllPublic: true, Template: optional.Some(false)},
count: 33,
count: 34,
},
{
name: "AllPublic/PublicAndPrivateRepositoriesOfUserIncludingCollaborative",
opts: &repo_model.SearchRepoOptions{ListOptions: db.ListOptions{Page: 1, PageSize: 10}, OwnerID: 15, Private: true, AllPublic: true, AllLimited: true, Template: optional.Some(false)},
count: 38,
count: 39,
},
{
name: "AllPublic/PublicAndPrivateRepositoriesOfUserIncludingCollaborativeByName",
Expand All @@ -158,7 +158,7 @@ func getTestCases() []struct {
{
name: "AllPublic/PublicRepositoriesOfOrganization",
opts: &repo_model.SearchRepoOptions{ListOptions: db.ListOptions{Page: 1, PageSize: 10}, OwnerID: 17, AllPublic: true, Collaborate: optional.Some(false), Template: optional.Some(false)},
count: 33,
count: 34,
},
{
name: "AllTemplates",
Expand Down
8 changes: 8 additions & 0 deletions modules/indexer/internal/bleve/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,14 @@ func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query
return q
}

// FuzzyQuery generates a fuzzy query for the given phrase, field, and fuzziness
func FuzzyQuery(matchPhrase, field string, fuzziness int) *query.FuzzyQuery {
q := bleve.NewFuzzyQuery(matchPhrase)
q.FieldVal = field
q.Fuzziness = fuzziness
return q
}

// BoolFieldQuery generates a bool field query for the given value and field
func BoolFieldQuery(value bool, field string) *query.BoolFieldQuery {
q := bleve.NewBoolFieldQuery(value)
Expand Down
10 changes: 6 additions & 4 deletions modules/indexer/internal/bleve/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,14 @@ func openIndexer(path string, latestVersion int) (bleve.Index, int, error) {

func GuessFuzzinessByKeyword(s string) int {
// according to https://github.com/blevesearch/bleve/issues/1563, the supported max fuzziness is 2
// magic number 4 was chosen to determine the levenshtein distance per each character of a keyword
// BUT, when using CJK (eg: `갃갃갃` `啊啊啊`), it mismatches a lot.
// BUT, when using CJK (eg: `갃갃갃` `啊啊啊`), it mismatches a lot
// which we need to live with, as we need to support not just ASCII
// in case of code points >= 128 we will increase the fuzziness to 2
// the standard is 1
for _, r := range s {
if r >= 128 {
return 0
return 2
}
}
return min(2, len(s)/4)
return 1
}
2 changes: 1 addition & 1 deletion modules/indexer/issues/bleve/bleve.go
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
}

queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness),
inner_bleve.FuzzyQuery(options.Keyword, "title", fuzziness),
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness),
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness),
}...))
Expand Down
16 changes: 8 additions & 8 deletions modules/indexer/issues/indexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -209,13 +209,13 @@ func searchIssueIsPull(t *testing.T) {
SearchOptions{
IsPull: optional.Some(false),
},
[]int64{17, 16, 15, 14, 13, 6, 5, 18, 10, 7, 4, 1},
[]int64{25, 24, 23, 17, 16, 15, 14, 13, 6, 5, 18, 10, 7, 4, 1},
},
{
SearchOptions{
IsPull: optional.Some(true),
},
[]int64{22, 21, 12, 11, 20, 19, 9, 8, 3, 2},
[]int64{22, 21, 28, 27, 26, 12, 11, 20, 19, 9, 8, 3, 2},
},
}
for _, test := range tests {
Expand All @@ -236,7 +236,7 @@ func searchIssueIsClosed(t *testing.T) {
SearchOptions{
IsClosed: optional.Some(false),
},
[]int64{22, 21, 17, 16, 15, 14, 13, 12, 11, 20, 6, 19, 18, 10, 7, 9, 8, 3, 2, 1},
[]int64{25, 24, 23, 22, 21, 28, 27, 26, 17, 16, 15, 14, 13, 12, 11, 20, 6, 19, 18, 10, 7, 9, 8, 3, 2, 1},
},
{
SearchOptions{
Expand Down Expand Up @@ -302,7 +302,7 @@ func searchIssueByLabelID(t *testing.T) {
SearchOptions{
ExcludedLabelIDs: []int64{1},
},
[]int64{22, 21, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3},
[]int64{25, 24, 23, 22, 21, 28, 27, 26, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3},
},
}
for _, test := range tests {
Expand All @@ -323,7 +323,7 @@ func searchIssueByTime(t *testing.T) {
SearchOptions{
UpdatedAfterUnix: optional.Some(int64(0)),
},
[]int64{22, 21, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3, 2, 1},
[]int64{25, 24, 23, 22, 21, 28, 27, 26, 17, 16, 15, 14, 13, 12, 11, 20, 6, 5, 19, 18, 10, 7, 4, 9, 8, 3, 2, 1},
},
}
for _, test := range tests {
Expand All @@ -344,7 +344,7 @@ func searchIssueWithOrder(t *testing.T) {
SearchOptions{
SortBy: internal.SortByCreatedAsc,
},
[]int64{1, 2, 3, 8, 9, 4, 7, 10, 18, 19, 5, 6, 20, 11, 12, 13, 14, 15, 16, 17, 21, 22},
[]int64{1, 2, 3, 8, 9, 4, 7, 10, 18, 19, 5, 6, 20, 11, 12, 13, 14, 15, 16, 17, 26, 27, 28, 21, 22, 23, 24, 25},
},
}
for _, test := range tests {
Expand Down Expand Up @@ -401,8 +401,8 @@ func searchIssueWithPaginator(t *testing.T) {
PageSize: 5,
},
},
[]int64{22, 21, 17, 16, 15},
22,
[]int64{25, 24, 23, 22, 21},
28,
},
}
for _, test := range tests {
Expand Down
1 change: 1 addition & 0 deletions routers/web/repo/issue.go
Original file line number Diff line number Diff line change
Expand Up @@ -2677,6 +2677,7 @@ func SearchIssues(ctx *context.Context) {
MilestoneIDs: includedMilestones,
ProjectID: projectID,
SortBy: issue_indexer.SortByCreatedDesc,
IsFuzzyKeyword: true,
}

if since != 0 {
Expand Down

0 comments on commit 87bdec2

Please sign in to comment.