Skip to content

Commit

Permalink
Fix issues indexer document mapping (#25619)
Browse files Browse the repository at this point in the history
Fix regression of #5363 (so long ago).

The old code definded a document mapping for `issueIndexerDocType`, and
assigned it to `BleveIndexerData` as its type. (`BleveIndexerData` has
been renamed to `IndexerData` in #25174, but nothing more.) But the old
code never used `BleveIndexerData`, it wrote the index with an anonymous
struct type. Nonetheless, bleve would use the default auto-mapping for
struct it didn't know, so the indexer still worked. This means the
custom document mapping was always dead code.

The custom document mapping is not useless, it can reduce index storage,
this PR brings it back and disable default mapping to prevent it from
happening again. Since `IndexerData`(`BleveIndexerData`) has JSON tags,
and bleve uses them first, so we should use `repo_id` as the field name
instead of `RepoID`.

I did a test to compare the storage size before and after this, with
about 3k real comments that were migrated from some public repos.

Before:

```text
[ 160]  .
├── [  42]  index_meta.json
├── [  13]  rupture_meta.json
└── [ 128]  store
    ├── [6.9M]  00000000005d.zap
    └── [256K]  root.bolt
```

After:

```text
[ 160]  .
├── [  42]  index_meta.json
├── [  13]  rupture_meta.json
└── [ 128]  store
    ├── [3.5M]  000000000065.zap
    └── [256K]  root.bolt
```

It saves about half the storage space.

---------

Co-authored-by: Giteabot <teabot@gitea.io>
  • Loading branch information
wolfogre and GiteaBot committed Jul 4, 2023
1 parent dae022a commit 9958642
Showing 1 changed file with 12 additions and 20 deletions.
32 changes: 12 additions & 20 deletions modules/indexer/issues/bleve/bleve.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import (
const (
issueIndexerAnalyzer = "issueIndexer"
issueIndexerDocType = "issueIndexerDocType"
issueIndexerLatestVersion = 2
issueIndexerLatestVersion = 3
)

// numericEqualityQuery a numeric equality query for the given value and field
Expand Down Expand Up @@ -67,15 +67,16 @@ func generateIssueIndexMapping() (mapping.IndexMapping, error) {
docMapping := bleve.NewDocumentMapping()

numericFieldMapping := bleve.NewNumericFieldMapping()
numericFieldMapping.Store = false
numericFieldMapping.IncludeInAll = false
docMapping.AddFieldMappingsAt("RepoID", numericFieldMapping)
docMapping.AddFieldMappingsAt("repo_id", numericFieldMapping)

textFieldMapping := bleve.NewTextFieldMapping()
textFieldMapping.Store = false
textFieldMapping.IncludeInAll = false
docMapping.AddFieldMappingsAt("Title", textFieldMapping)
docMapping.AddFieldMappingsAt("Content", textFieldMapping)
docMapping.AddFieldMappingsAt("Comments", textFieldMapping)
docMapping.AddFieldMappingsAt("title", textFieldMapping)
docMapping.AddFieldMappingsAt("content", textFieldMapping)
docMapping.AddFieldMappingsAt("comments", textFieldMapping)

if err := addUnicodeNormalizeTokenFilter(mapping); err != nil {
return nil, err
Expand All @@ -91,6 +92,7 @@ func generateIssueIndexMapping() (mapping.IndexMapping, error) {
mapping.DefaultAnalyzer = issueIndexerAnalyzer
mapping.AddDocumentMapping(issueIndexerDocType, docMapping)
mapping.AddDocumentMapping("_all", bleve.NewDocumentDisabledMapping())
mapping.DefaultMapping = bleve.NewDocumentDisabledMapping() // disable default mapping, avoid indexing unexpected structs

return mapping, nil
}
Expand All @@ -116,17 +118,7 @@ func NewIndexer(indexDir string) *Indexer {
func (b *Indexer) Index(_ context.Context, issues []*internal.IndexerData) error {
batch := inner_bleve.NewFlushingBatch(b.inner.Indexer, maxBatchSize)
for _, issue := range issues {
if err := batch.Index(indexer_internal.Base36(issue.ID), struct {
RepoID int64
Title string
Content string
Comments []string
}{
RepoID: issue.RepoID,
Title: issue.Title,
Content: issue.Content,
Comments: issue.Comments,
}); err != nil {
if err := batch.Index(indexer_internal.Base36(issue.ID), (*IndexerData)(issue)); err != nil {
return err
}
}
Expand All @@ -149,7 +141,7 @@ func (b *Indexer) Delete(_ context.Context, ids ...int64) error {
func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int) (*internal.SearchResult, error) {
var repoQueriesP []*query.NumericRangeQuery
for _, repoID := range repoIDs {
repoQueriesP = append(repoQueriesP, numericEqualityQuery(repoID, "RepoID"))
repoQueriesP = append(repoQueriesP, numericEqualityQuery(repoID, "repo_id"))
}
repoQueries := make([]query.Query, len(repoQueriesP))
for i, v := range repoQueriesP {
Expand All @@ -159,9 +151,9 @@ func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, l
indexerQuery := bleve.NewConjunctionQuery(
bleve.NewDisjunctionQuery(repoQueries...),
bleve.NewDisjunctionQuery(
newMatchPhraseQuery(keyword, "Title", issueIndexerAnalyzer),
newMatchPhraseQuery(keyword, "Content", issueIndexerAnalyzer),
newMatchPhraseQuery(keyword, "Comments", issueIndexerAnalyzer),
newMatchPhraseQuery(keyword, "title", issueIndexerAnalyzer),
newMatchPhraseQuery(keyword, "content", issueIndexerAnalyzer),
newMatchPhraseQuery(keyword, "comments", issueIndexerAnalyzer),
))
search := bleve.NewSearchRequestOptions(indexerQuery, limit, start, false)
search.SortBy([]string{"-_score"})
Expand Down

0 comments on commit 9958642

Please sign in to comment.