-
Notifications
You must be signed in to change notification settings - Fork 25k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Avoid double term construction in DfsPhase #38716
Changes from 2 commits
9d9df4d
d8de607
76f50a5
3c392fb
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,14 +19,11 @@ | |
|
||
package org.elasticsearch.search.dfs; | ||
|
||
import com.carrotsearch.hppc.ObjectHashSet; | ||
import com.carrotsearch.hppc.ObjectObjectHashMap; | ||
import com.carrotsearch.hppc.cursors.ObjectCursor; | ||
|
||
import org.apache.lucene.index.IndexReaderContext; | ||
import org.apache.lucene.index.Term; | ||
import org.apache.lucene.index.TermStates; | ||
import org.apache.lucene.search.CollectionStatistics; | ||
import org.apache.lucene.search.IndexSearcher; | ||
import org.apache.lucene.search.ScoreMode; | ||
import org.apache.lucene.search.TermStatistics; | ||
import org.elasticsearch.common.collect.HppcMaps; | ||
|
@@ -36,9 +33,8 @@ | |
import org.elasticsearch.tasks.TaskCancelledException; | ||
|
||
import java.io.IOException; | ||
import java.util.AbstractSet; | ||
import java.util.Collection; | ||
import java.util.Iterator; | ||
import java.util.HashMap; | ||
import java.util.Map; | ||
|
||
/** | ||
* Dfs phase of a search request, used to make scoring 100% accurate by collecting additional info from each shard before the query phase. | ||
|
@@ -52,101 +48,51 @@ public void preProcess(SearchContext context) { | |
|
||
@Override | ||
public void execute(SearchContext context) { | ||
final ObjectHashSet<Term> termsSet = new ObjectHashSet<>(); | ||
try { | ||
context.searcher().createWeight(context.searcher().rewrite(context.query()), ScoreMode.COMPLETE, 1f) | ||
.extractTerms(new DelegateSet(termsSet)); | ||
for (RescoreContext rescoreContext : context.rescore()) { | ||
try { | ||
rescoreContext.rescorer().extractTerms(context.searcher(), rescoreContext, new DelegateSet(termsSet)); | ||
} catch (IOException e) { | ||
throw new IllegalStateException("Failed to extract terms", e); | ||
} | ||
} | ||
|
||
Term[] terms = termsSet.toArray(Term.class); | ||
TermStatistics[] termStatistics = new TermStatistics[terms.length]; | ||
IndexReaderContext indexReaderContext = context.searcher().getTopReaderContext(); | ||
for (int i = 0; i < terms.length; i++) { | ||
if(context.isCancelled()) { | ||
throw new TaskCancelledException("cancelled"); | ||
} | ||
// LUCENE 4 UPGRADE: cache TermStates? | ||
TermStates termContext = TermStates.build(indexReaderContext, terms[i], true); | ||
termStatistics[i] = context.searcher().termStatistics(terms[i], termContext); | ||
} | ||
|
||
ObjectObjectHashMap<String, CollectionStatistics> fieldStatistics = HppcMaps.newNoNullKeysMap(); | ||
for (Term term : terms) { | ||
assert term.field() != null : "field is null"; | ||
if (fieldStatistics.containsKey(term.field()) == false) { | ||
final CollectionStatistics collectionStatistics = context.searcher().collectionStatistics(term.field()); | ||
if (collectionStatistics != null) { | ||
fieldStatistics.put(term.field(), collectionStatistics); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if this is really equivalent. Some queries are going to build term statistics even though they don't add terms in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I think this is incorrect - queries will only build term statistics if they actually need them, whereas I agree about the API change on rescorer though, let me think about a better way to do that. |
||
Map<Term, TermStatistics> stats = new HashMap<>(); | ||
IndexSearcher searcher = new IndexSearcher(context.searcher().getIndexReader()) { | ||
@Override | ||
public TermStatistics termStatistics(Term term, TermStates states) throws IOException { | ||
if (context.isCancelled()) { | ||
throw new TaskCancelledException("cancelled"); | ||
} | ||
if(context.isCancelled()) { | ||
TermStatistics ts = super.termStatistics(term, states); | ||
if (ts != null) { | ||
stats.put(term, ts); | ||
} | ||
return ts; | ||
} | ||
|
||
@Override | ||
public CollectionStatistics collectionStatistics(String field) throws IOException { | ||
if (context.isCancelled()) { | ||
throw new TaskCancelledException("cancelled"); | ||
} | ||
CollectionStatistics cs = super.collectionStatistics(field); | ||
if (cs != null) { | ||
fieldStatistics.put(field, cs); | ||
} | ||
return cs; | ||
} | ||
}; | ||
|
||
searcher.createWeight(context.searcher().rewrite(context.query()), ScoreMode.COMPLETE, 1); | ||
for (RescoreContext rescoreContext : context.rescore()) { | ||
rescoreContext.rescorer().extractTerms(searcher, rescoreContext); | ||
} | ||
|
||
Term[] terms = stats.keySet().toArray(new Term[0]); | ||
TermStatistics[] termStatistics = new TermStatistics[terms.length]; | ||
for (int i = 0; i < terms.length; i++) { | ||
termStatistics[i] = stats.get(terms[i]); | ||
} | ||
|
||
context.dfsResult().termsStatistics(terms, termStatistics) | ||
.fieldStatistics(fieldStatistics) | ||
.maxDoc(context.searcher().getIndexReader().maxDoc()); | ||
} catch (Exception e) { | ||
throw new DfsPhaseExecutionException(context, "Exception during dfs phase", e); | ||
} finally { | ||
termsSet.clear(); // don't hold on to terms | ||
} | ||
} | ||
|
||
// We need to bridge to JCF world, b/c of Query#extractTerms | ||
private static class DelegateSet extends AbstractSet<Term> { | ||
|
||
private final ObjectHashSet<Term> delegate; | ||
|
||
private DelegateSet(ObjectHashSet<Term> delegate) { | ||
this.delegate = delegate; | ||
} | ||
|
||
@Override | ||
public boolean add(Term term) { | ||
return delegate.add(term); | ||
} | ||
|
||
@Override | ||
public boolean addAll(Collection<? extends Term> terms) { | ||
boolean result = false; | ||
for (Term term : terms) { | ||
result = delegate.add(term); | ||
} | ||
return result; | ||
} | ||
|
||
@Override | ||
public Iterator<Term> iterator() { | ||
final Iterator<ObjectCursor<Term>> iterator = delegate.iterator(); | ||
return new Iterator<Term>() { | ||
@Override | ||
public boolean hasNext() { | ||
return iterator.hasNext(); | ||
} | ||
|
||
@Override | ||
public Term next() { | ||
return iterator.next().value; | ||
} | ||
|
||
@Override | ||
public void remove() { | ||
throw new UnsupportedOperationException(); | ||
} | ||
}; | ||
} | ||
|
||
@Override | ||
public int size() { | ||
return delegate.size(); | ||
} | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The new signature is a bit weird, the only option is to call
createWeight
on the searcher but it's obfuscated so you need to check an actual implementation to realize that.