Skip to content

Commit 3dfa93e

Browse files
Improve explanation in rescore (#30629)
Currently in a rescore request if window_size is smaller than the top N documents returned (N=size), explanation of scores could be incorrect for documents that were a part of topN and not part of rescoring. This PR corrects this, but saving in RescoreContext docIDs of documents for which rescoring was applied, and adding rescoring explanation only for these docIDs. Closes #28725
1 parent b634065 commit 3dfa93e

File tree

3 files changed

+69
-15
lines changed

3 files changed

+69
-15
lines changed
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
---
2+
"Score should match explanation in rescore":
3+
- skip:
4+
version: " - 6.99.99"
5+
reason: Explanation for rescoring was corrected after these versions
6+
- do:
7+
bulk:
8+
refresh: true
9+
body:
10+
- '{"index": {"_index": "test_index", "_type": "_doc", "_id": "1"}}'
11+
- '{"f1": "1"}'
12+
- '{"index": {"_index": "test_index", "_type": "_doc", "_id": "2"}}'
13+
- '{"f1": "2"}'
14+
- '{"index": {"_index": "test_index", "_type": "_doc", "_id": "3"}}'
15+
- '{"f1": "3"}'
16+
17+
- do:
18+
search:
19+
index: test_index
20+
body:
21+
explain: true
22+
query:
23+
match_all: {}
24+
rescore:
25+
window_size: 2
26+
query:
27+
rescore_query:
28+
match_all: {}
29+
query_weight: 5
30+
rescore_query_weight: 10
31+
32+
- match: { hits.hits.0._score: 15 }
33+
- match: { hits.hits.0._explanation.value: 15 }
34+
35+
- match: { hits.hits.1._score: 15 }
36+
- match: { hits.hits.1._explanation.value: 15 }
37+
38+
- match: { hits.hits.2._score: 5 }
39+
- match: { hits.hits.2._explanation.value: 5 }

server/src/main/java/org/elasticsearch/search/rescore/QueryRescorer.java

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@
3030
import java.util.Arrays;
3131
import java.util.Comparator;
3232
import java.util.Set;
33+
import java.util.Collections;
34+
import static java.util.stream.Collectors.toSet;
3335

3436
public final class QueryRescorer implements Rescorer {
3537

@@ -61,6 +63,11 @@ protected float combine(float firstPassScore, boolean secondPassMatches, float s
6163
// First take top slice of incoming docs, to be rescored:
6264
TopDocs topNFirstPass = topN(topDocs, rescoreContext.getWindowSize());
6365

66+
// Save doc IDs for which rescoring was applied to be used in score explanation
67+
Set<Integer> topNDocIDs = Collections.unmodifiableSet(
68+
Arrays.stream(topNFirstPass.scoreDocs).map(scoreDoc -> scoreDoc.doc).collect(toSet()));
69+
rescoreContext.setRescoredDocs(topNDocIDs);
70+
6471
// Rescore them:
6572
TopDocs rescored = rescorer.rescore(searcher, topNFirstPass, rescoreContext.getWindowSize());
6673

@@ -71,16 +78,12 @@ protected float combine(float firstPassScore, boolean secondPassMatches, float s
7178
@Override
7279
public Explanation explain(int topLevelDocId, IndexSearcher searcher, RescoreContext rescoreContext,
7380
Explanation sourceExplanation) throws IOException {
74-
QueryRescoreContext rescore = (QueryRescoreContext) rescoreContext;
7581
if (sourceExplanation == null) {
7682
// this should not happen but just in case
7783
return Explanation.noMatch("nothing matched");
7884
}
79-
// TODO: this isn't right? I.e., we are incorrectly pretending all first pass hits were rescored? If the requested docID was
80-
// beyond the top rescoreContext.window() in the first pass hits, we don't rescore it now?
81-
Explanation rescoreExplain = searcher.explain(rescore.query(), topLevelDocId);
85+
QueryRescoreContext rescore = (QueryRescoreContext) rescoreContext;
8286
float primaryWeight = rescore.queryWeight();
83-
8487
Explanation prim;
8588
if (sourceExplanation.isMatch()) {
8689
prim = Explanation.match(
@@ -89,23 +92,24 @@ public Explanation explain(int topLevelDocId, IndexSearcher searcher, RescoreCon
8992
} else {
9093
prim = Explanation.noMatch("First pass did not match", sourceExplanation);
9194
}
92-
93-
// NOTE: we don't use Lucene's Rescorer.explain because we want to insert our own description with which ScoreMode was used. Maybe
94-
// we should add QueryRescorer.explainCombine to Lucene?
95-
if (rescoreExplain != null && rescoreExplain.isMatch()) {
96-
float secondaryWeight = rescore.rescoreQueryWeight();
97-
Explanation sec = Explanation.match(
95+
if (rescoreContext.isRescored(topLevelDocId)){
96+
Explanation rescoreExplain = searcher.explain(rescore.query(), topLevelDocId);
97+
// NOTE: we don't use Lucene's Rescorer.explain because we want to insert our own description with which ScoreMode was used.
98+
// Maybe we should add QueryRescorer.explainCombine to Lucene?
99+
if (rescoreExplain != null && rescoreExplain.isMatch()) {
100+
float secondaryWeight = rescore.rescoreQueryWeight();
101+
Explanation sec = Explanation.match(
98102
rescoreExplain.getValue() * secondaryWeight,
99103
"product of:",
100104
rescoreExplain, Explanation.match(secondaryWeight, "secondaryWeight"));
101-
QueryRescoreMode scoreMode = rescore.scoreMode();
102-
return Explanation.match(
105+
QueryRescoreMode scoreMode = rescore.scoreMode();
106+
return Explanation.match(
103107
scoreMode.combine(prim.getValue(), sec.getValue()),
104108
scoreMode + " of:",
105109
prim, sec);
106-
} else {
107-
return prim;
110+
}
108111
}
112+
return prim;
109113
}
110114

111115
private static final Comparator<ScoreDoc> SCORE_DOC_COMPARATOR = new Comparator<ScoreDoc>() {

server/src/main/java/org/elasticsearch/search/rescore/RescoreContext.java

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
package org.elasticsearch.search.rescore;
2121

22+
import java.util.Set;
23+
2224
/**
2325
* Context available to the rescore while it is running. Rescore
2426
* implementations should extend this with any additional resources that
@@ -27,6 +29,7 @@
2729
public class RescoreContext {
2830
private final int windowSize;
2931
private final Rescorer rescorer;
32+
private Set<Integer> recroredDocs; //doc Ids for which rescoring was applied
3033

3134
/**
3235
* Build the context.
@@ -50,4 +53,12 @@ public Rescorer rescorer() {
5053
public int getWindowSize() {
5154
return windowSize;
5255
}
56+
57+
public void setRescoredDocs(Set<Integer> docIds) {
58+
recroredDocs = docIds;
59+
}
60+
61+
public boolean isRescored(int docId) {
62+
return recroredDocs.contains(docId);
63+
}
5364
}

0 commit comments

Comments
 (0)