Skip to content

Commit

Permalink
Normalize scores, closes #401
Browse files Browse the repository at this point in the history
  • Loading branch information
davidmezzetti committed Dec 27, 2022
1 parent 2726cfa commit e609f8c
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 0 deletions.
12 changes: 12 additions & 0 deletions src/python/txtai/scoring/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ def __init__(self, config=None):
self.documents = {} if self.config.get("content") else None
self.docterms = {} if self.config.get("terms") else None

# Normalize scores
self.normalize = self.config.get("normalize")

# Word frequency
self.docfreq = Counter()
self.wordfreq = Counter()
Expand Down Expand Up @@ -210,6 +213,15 @@ def search(self, query, limit=3):

scores[x] += self.docterms[token][x]

# Check if score normalization enabled
if self.normalize:
# Calculate max score = 4 * average score
maxscore = 4 * self.score(self.avgfreq, self.avgidf, self.avgdl)

# Normalize scores between 0 - 1 using maxscore
for x in scores:
scores[x] = min(scores[x] / maxscore, 1.0)

# Sort and get topn results
topn = sorted(scores.items(), key=lambda x: x[1], reverse=True)[:limit]

Expand Down
17 changes: 17 additions & 0 deletions test/python/testscoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ def runTests(self, method):
self.index(config)
self.weights(config)
self.search(config)
self.normalize(config)
self.content(config)
self.empty(config)

Expand Down Expand Up @@ -172,6 +173,22 @@ def search(self, config):
index, _ = scoring.search("bear", 1)[0]
self.assertEqual(index, 3)

def normalize(self, config):
"""
Test scoring search with normalized scores.
Args:
method: scoring method
"""

scoring = ScoringFactory.create({**config, **{"terms": True, "normalize": True}})
scoring.index(self.data)

# Run search and validate correct result returned
index, score = scoring.search(self.data[3][1], 1)[0]
self.assertEqual(index, 3)
self.assertEqual(score, 1.0)

def content(self, config):
"""
Test scoring search with content.
Expand Down

0 comments on commit e609f8c

Please sign in to comment.