Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Bulk Scorer For ToParentBlockJoinQuery #13697

Merged
merged 47 commits into from
Sep 11, 2024
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
6e398eb
Added BlockJoinBulkScorer
Mikep86 Aug 26, 2024
79ca811
BlockJoinBulkScorer development
Mikep86 Aug 26, 2024
fbd6ca5
Fix assertion failures
Mikep86 Aug 27, 2024
5d55d19
Added TestBlockJoinBulkScorer
Mikep86 Aug 27, 2024
1f39771
Test development
Mikep86 Aug 27, 2024
7e648ad
Compute expected scores and compare them to actual scores
Mikep86 Aug 28, 2024
8bba0db
Randomize score mode. Score multiple random indices.
Mikep86 Aug 28, 2024
57cb4c0
Filter out empty child docs
Mikep86 Aug 28, 2024
c1dce29
Randomize search score mode
Mikep86 Aug 28, 2024
9be3a20
Updated approach to handle when scoring in multiple batches
Mikep86 Aug 28, 2024
e15105f
Increase test iterations, fix assertion error
Mikep86 Aug 28, 2024
3e57ff9
fix assertion error
Mikep86 Aug 28, 2024
3a1859f
Handle when score supplier is null
Mikep86 Aug 28, 2024
25079b7
Fix min score computation
Mikep86 Aug 28, 2024
8f5a0b0
Add license
Mikep86 Aug 28, 2024
0f93801
Change batching approach
Mikep86 Aug 29, 2024
ec3d967
Remove unnecessary null check
Mikep86 Aug 30, 2024
cfd780e
Scoring computation adjustments
Mikep86 Aug 30, 2024
ac14952
Remove unnecessary scorer null checks
Mikep86 Aug 30, 2024
8786e58
Check that there are no matches when score supplier is null
Mikep86 Aug 30, 2024
4768012
Stop scoring once we've scored the last parent
Mikep86 Aug 30, 2024
2ce93ca
Calculate scores using doubles
Mikep86 Aug 30, 2024
b0dd8cd
Remove currentMin
Mikep86 Aug 30, 2024
bb72ece
Fix test failure
Mikep86 Aug 30, 2024
50e0db7
Simplify scoring reset
Mikep86 Sep 3, 2024
58b9868
Increased bulk scorer test iterations and parent doc count
Mikep86 Sep 3, 2024
ef49457
Updated bulk join scorer to use common score accumulator class
Mikep86 Sep 3, 2024
f26cb70
End scoring early if we've scored the last parent in the bit set
Mikep86 Sep 5, 2024
ab1fe69
Encapsulate ScoreMode.None handling
Mikep86 Sep 5, 2024
e58cb26
Delegate setMinCompetitiveScore call to child scorer
Mikep86 Sep 5, 2024
e83ba94
Resolve TODO
Mikep86 Sep 5, 2024
e19d594
Optimize scoring when score mode is ScoreMode.None
Mikep86 Sep 6, 2024
2ef6c37
change arg order
Mikep86 Sep 6, 2024
ca0cd07
Check if min == max
Mikep86 Sep 6, 2024
51cf4fd
Add dynamic pruning test with score mode set to Max
Mikep86 Sep 6, 2024
f558d23
Add dynamic pruning test with score mode set to None
Mikep86 Sep 6, 2024
d3b7d5c
Updated CHANGES.txt
Mikep86 Sep 6, 2024
9d4cc56
Fix test
Mikep86 Sep 9, 2024
e154b43
Scoring optimizations
Mikep86 Sep 9, 2024
f76db73
Add/improve comments
Mikep86 Sep 9, 2024
12343b7
Move error check into ParentApproximation#advance
Mikep86 Sep 10, 2024
5a993e3
ParentApproximation#advance logic adjustments
Mikep86 Sep 10, 2024
64672fa
Revert ParentApproximation#advance error-checking logic
Mikep86 Sep 11, 2024
f287a3b
Fix test
Mikep86 Sep 11, 2024
a62d87c
Merge branch 'main' into nested-query_bulk-scorer
Mikep86 Sep 11, 2024
5a41c63
Fix build error
Mikep86 Sep 11, 2024
448af12
Improve comment
Mikep86 Sep 11, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,25 @@
import java.util.Locale;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.BulkScorer;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.FilterLeafCollector;
import org.apache.lucene.search.FilterWeight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.Matches;
import org.apache.lucene.search.MatchesUtils;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryVisitor;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.ScorerSupplier;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.Bits;

/**
* This query requires that you index children and parent docs as a single block, using the {@link
Expand Down Expand Up @@ -156,6 +161,15 @@ public Scorer get(long leadCost) throws IOException {
return new BlockJoinScorer(childScorerSupplier.get(leadCost), parents, scoreMode);
}

@Override
public BulkScorer bulkScorer() throws IOException {
final BulkScorer innerBulkScorer = childScorerSupplier.bulkScorer();
if (innerBulkScorer == null) {
Mikep86 marked this conversation as resolved.
Show resolved Hide resolved
return null;
}
return new BlockJoinBulkScorer(innerBulkScorer, scoreMode, parents);
}

@Override
public long cost() {
return childScorerSupplier.cost();
Expand Down Expand Up @@ -275,6 +289,51 @@ public float matchCost() {
}
}

private static class Score extends Scorable {
private final ScoreMode scoreMode;
private Float score;
Mikep86 marked this conversation as resolved.
Show resolved Hide resolved
private int freq;

public Score(ScoreMode scoreMode) {
this.scoreMode = scoreMode;
reset();
}

public void reset() {
score = null;
freq = 0;
}

public void addChildScore(float childScore) {
Mikep86 marked this conversation as resolved.
Show resolved Hide resolved
freq += 1;
switch (scoreMode) {
case Total:
case Avg:
score = score == null ? childScore : score + childScore;
break;
case Min:
score = score == null ? childScore : Math.min(score, childScore);
break;
case Max:
score = score == null ? childScore : Math.max(score, childScore);
break;
case None:
break;
default:
throw new AssertionError();
}
}

@Override
public float score() {
float score = this.score != null ? this.score : 0;
if (scoreMode == ScoreMode.Avg && freq > 0) {
Mikep86 marked this conversation as resolved.
Show resolved Hide resolved
score /= freq;
}
return score;
}
}

static class BlockJoinScorer extends Scorer {
private final Scorer childScorer;
private final BitSet parentBits;
Expand Down Expand Up @@ -357,6 +416,7 @@ private void setScoreAndFreq() throws IOException {
while (childApproximation.nextDoc() < parentApproximation.docID()) {
if (childTwoPhase == null || childTwoPhase.matches()) {
final float childScore = scoreMode == ScoreMode.None ? 0 : childScorer.score();
// TODO: Refactor
freq += 1;
switch (scoreMode) {
case Total:
Expand Down Expand Up @@ -440,6 +500,83 @@ private String formatScoreExplanation(int matches, int start, int end, ScoreMode
}
}

private abstract static class BatchAwareLeafCollector extends FilterLeafCollector {
public BatchAwareLeafCollector(LeafCollector in) {
super(in);
}

public void endBatch(int doc) throws IOException {}
}

private static class BlockJoinBulkScorer extends BulkScorer {
private final BulkScorer childBulkScorer;
private final ScoreMode scoreMode;
private final BitSet parents;
private final Score currentParentScore;
private Integer currentParent;

public BlockJoinBulkScorer(BulkScorer childBulkScorer, ScoreMode scoreMode, BitSet parents) {
this.childBulkScorer = childBulkScorer;
this.scoreMode = scoreMode;
this.parents = parents;
this.currentParentScore = new Score(scoreMode);
this.currentParent = null;
}

@Override
public int score(LeafCollector collector, Bits acceptDocs, int min, int max)
throws IOException {
BatchAwareLeafCollector wrappedCollector = wrapCollector(collector);
childBulkScorer.score(wrappedCollector, acceptDocs, min, max);
Mikep86 marked this conversation as resolved.
Show resolved Hide resolved
wrappedCollector.endBatch(max);
return max;
}

@Override
public long cost() {
return childBulkScorer.cost();
}

// TODO: Need to resolve parent doc IDs in multi-reader space?
private BatchAwareLeafCollector wrapCollector(LeafCollector collector) {
return new BatchAwareLeafCollector(collector) {
private Scorable scorer = null;

@Override
public void setScorer(Scorable scorer) throws IOException {
this.scorer = scorer;
super.setScorer(scorer != null ? currentParentScore : null);
Mikep86 marked this conversation as resolved.
Show resolved Hide resolved
}

@Override
public void collect(int doc) throws IOException {
if (currentParent == null) {
Mikep86 marked this conversation as resolved.
Show resolved Hide resolved
currentParent = parents.nextSetBit(doc);
} else if (doc > currentParent) {
in.collect(currentParent); // Emit the current parent

// Get the next parent and reset the score
currentParent = parents.nextSetBit(doc);
currentParentScore.reset();
}

if (scorer != null && scoreMode != ScoreMode.None) {
currentParentScore.addChildScore(scorer.score());
}
}

@Override
public void endBatch(int doc) throws IOException {
if (currentParent != null && doc > currentParent) {
Mikep86 marked this conversation as resolved.
Show resolved Hide resolved
in.collect(currentParent);
currentParent = null;
currentParentScore.reset();
}
}
};
}
}

@Override
public Query rewrite(IndexSearcher indexSearcher) throws IOException {
final Query childRewrite = childQuery.rewrite(indexSearcher);
Expand Down
Loading
Loading