diff --git a/plugins/examples/rescore/src/main/java/org/elasticsearch/example/rescore/ExampleRescoreBuilder.java b/plugins/examples/rescore/src/main/java/org/elasticsearch/example/rescore/ExampleRescoreBuilder.java index 358d2cb00ab14..7f4855f8880b7 100644 --- a/plugins/examples/rescore/src/main/java/org/elasticsearch/example/rescore/ExampleRescoreBuilder.java +++ b/plugins/examples/rescore/src/main/java/org/elasticsearch/example/rescore/ExampleRescoreBuilder.java @@ -20,7 +20,6 @@ package org.elasticsearch.example.rescore; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.Term; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; @@ -46,7 +45,6 @@ import java.util.Arrays; import java.util.Iterator; import java.util.Objects; -import java.util.Set; import static java.util.Collections.singletonList; import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg; @@ -224,9 +222,5 @@ public Explanation explain(int topLevelDocId, IndexSearcher searcher, RescoreCon return Explanation.match(context.factor, "test", singletonList(sourceExplanation)); } - @Override - public void extractTerms(IndexSearcher searcher, RescoreContext rescoreContext, Set termsSet) { - // Since we don't use queries there are no terms to extract. - } } } diff --git a/server/src/main/java/org/elasticsearch/search/dfs/DfsPhase.java b/server/src/main/java/org/elasticsearch/search/dfs/DfsPhase.java index 0b7d8da481c62..3cbc47173c6b4 100644 --- a/server/src/main/java/org/elasticsearch/search/dfs/DfsPhase.java +++ b/server/src/main/java/org/elasticsearch/search/dfs/DfsPhase.java @@ -19,14 +19,12 @@ package org.elasticsearch.search.dfs; -import com.carrotsearch.hppc.ObjectHashSet; import com.carrotsearch.hppc.ObjectObjectHashMap; -import com.carrotsearch.hppc.cursors.ObjectCursor; - -import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermStates; import org.apache.lucene.search.CollectionStatistics; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.TermStatistics; import org.elasticsearch.common.collect.HppcMaps; @@ -36,9 +34,8 @@ import org.elasticsearch.tasks.TaskCancelledException; import java.io.IOException; -import java.util.AbstractSet; -import java.util.Collection; -import java.util.Iterator; +import java.util.HashMap; +import java.util.Map; /** * Dfs phase of a search request, used to make scoring 100% accurate by collecting additional info from each shard before the query phase. @@ -52,42 +49,46 @@ public void preProcess(SearchContext context) { @Override public void execute(SearchContext context) { - final ObjectHashSet termsSet = new ObjectHashSet<>(); try { - context.searcher().createWeight(context.searcher().rewrite(context.query()), ScoreMode.COMPLETE, 1f) - .extractTerms(new DelegateSet(termsSet)); + ObjectObjectHashMap fieldStatistics = HppcMaps.newNoNullKeysMap(); + Map stats = new HashMap<>(); + IndexSearcher searcher = new IndexSearcher(context.searcher().getIndexReader()) { + @Override + public TermStatistics termStatistics(Term term, TermStates states) throws IOException { + if (context.isCancelled()) { + throw new TaskCancelledException("cancelled"); + } + TermStatistics ts = super.termStatistics(term, states); + if (ts != null) { + stats.put(term, ts); + } + return ts; + } + + @Override + public CollectionStatistics collectionStatistics(String field) throws IOException { + if (context.isCancelled()) { + throw new TaskCancelledException("cancelled"); + } + CollectionStatistics cs = super.collectionStatistics(field); + if (cs != null) { + fieldStatistics.put(field, cs); + } + return cs; + } + }; + + searcher.createWeight(context.searcher().rewrite(context.query()), ScoreMode.COMPLETE, 1); for (RescoreContext rescoreContext : context.rescore()) { - try { - rescoreContext.rescorer().extractTerms(context.searcher(), rescoreContext, new DelegateSet(termsSet)); - } catch (IOException e) { - throw new IllegalStateException("Failed to extract terms", e); + for (Query query : rescoreContext.getQueries()) { + searcher.createWeight(context.searcher().rewrite(query), ScoreMode.COMPLETE, 1); } } - Term[] terms = termsSet.toArray(Term.class); + Term[] terms = stats.keySet().toArray(new Term[0]); TermStatistics[] termStatistics = new TermStatistics[terms.length]; - IndexReaderContext indexReaderContext = context.searcher().getTopReaderContext(); for (int i = 0; i < terms.length; i++) { - if(context.isCancelled()) { - throw new TaskCancelledException("cancelled"); - } - // LUCENE 4 UPGRADE: cache TermStates? - TermStates termContext = TermStates.build(indexReaderContext, terms[i], true); - termStatistics[i] = context.searcher().termStatistics(terms[i], termContext); - } - - ObjectObjectHashMap fieldStatistics = HppcMaps.newNoNullKeysMap(); - for (Term term : terms) { - assert term.field() != null : "field is null"; - if (fieldStatistics.containsKey(term.field()) == false) { - final CollectionStatistics collectionStatistics = context.searcher().collectionStatistics(term.field()); - if (collectionStatistics != null) { - fieldStatistics.put(term.field(), collectionStatistics); - } - if(context.isCancelled()) { - throw new TaskCancelledException("cancelled"); - } - } + termStatistics[i] = stats.get(terms[i]); } context.dfsResult().termsStatistics(terms, termStatistics) @@ -95,58 +96,6 @@ public void execute(SearchContext context) { .maxDoc(context.searcher().getIndexReader().maxDoc()); } catch (Exception e) { throw new DfsPhaseExecutionException(context, "Exception during dfs phase", e); - } finally { - termsSet.clear(); // don't hold on to terms - } - } - - // We need to bridge to JCF world, b/c of Query#extractTerms - private static class DelegateSet extends AbstractSet { - - private final ObjectHashSet delegate; - - private DelegateSet(ObjectHashSet delegate) { - this.delegate = delegate; - } - - @Override - public boolean add(Term term) { - return delegate.add(term); - } - - @Override - public boolean addAll(Collection terms) { - boolean result = false; - for (Term term : terms) { - result = delegate.add(term); - } - return result; - } - - @Override - public Iterator iterator() { - final Iterator> iterator = delegate.iterator(); - return new Iterator() { - @Override - public boolean hasNext() { - return iterator.hasNext(); - } - - @Override - public Term next() { - return iterator.next().value; - } - - @Override - public void remove() { - throw new UnsupportedOperationException(); - } - }; - } - - @Override - public int size() { - return delegate.size(); } } diff --git a/server/src/main/java/org/elasticsearch/search/rescore/QueryRescorer.java b/server/src/main/java/org/elasticsearch/search/rescore/QueryRescorer.java index d70ec62c7af60..6e236d3e8143d 100644 --- a/server/src/main/java/org/elasticsearch/search/rescore/QueryRescorer.java +++ b/server/src/main/java/org/elasticsearch/search/rescore/QueryRescorer.java @@ -19,19 +19,19 @@ package org.elasticsearch.search.rescore; -import org.apache.lucene.index.Term; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.TopDocs; import java.io.IOException; import java.util.Arrays; +import java.util.Collections; import java.util.Comparator; +import java.util.List; import java.util.Set; -import java.util.Collections; + import static java.util.stream.Collectors.toSet; public final class QueryRescorer implements Rescorer { @@ -170,6 +170,11 @@ public void setQuery(Query query) { this.query = query; } + @Override + public List getQueries() { + return Collections.singletonList(query); + } + public Query query() { return query; } @@ -203,10 +208,4 @@ public void setScoreMode(String scoreMode) { } } - @Override - public void extractTerms(IndexSearcher searcher, RescoreContext rescoreContext, Set termsSet) throws IOException { - Query query = ((QueryRescoreContext) rescoreContext).query(); - searcher.createWeight(searcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f).extractTerms(termsSet); - } - } diff --git a/server/src/main/java/org/elasticsearch/search/rescore/RescoreContext.java b/server/src/main/java/org/elasticsearch/search/rescore/RescoreContext.java index 2401b9ff32900..4f44af6321791 100644 --- a/server/src/main/java/org/elasticsearch/search/rescore/RescoreContext.java +++ b/server/src/main/java/org/elasticsearch/search/rescore/RescoreContext.java @@ -19,6 +19,10 @@ package org.elasticsearch.search.rescore; +import org.apache.lucene.search.Query; + +import java.util.Collections; +import java.util.List; import java.util.Set; /** @@ -29,7 +33,7 @@ public class RescoreContext { private final int windowSize; private final Rescorer rescorer; - private Set resroredDocs; //doc Ids for which rescoring was applied + private Set rescoredDocs; //doc Ids for which rescoring was applied /** * Build the context. @@ -55,10 +59,17 @@ public int getWindowSize() { } public void setRescoredDocs(Set docIds) { - resroredDocs = docIds; + rescoredDocs = docIds; } public boolean isRescored(int docId) { - return resroredDocs.contains(docId); + return rescoredDocs.contains(docId); + } + + /** + * Returns queries associated with the rescorer + */ + public List getQueries() { + return Collections.emptyList(); } } diff --git a/server/src/main/java/org/elasticsearch/search/rescore/Rescorer.java b/server/src/main/java/org/elasticsearch/search/rescore/Rescorer.java index 023c9340e4b1d..36c77d2df4124 100644 --- a/server/src/main/java/org/elasticsearch/search/rescore/Rescorer.java +++ b/server/src/main/java/org/elasticsearch/search/rescore/Rescorer.java @@ -19,14 +19,11 @@ package org.elasticsearch.search.rescore; -import org.apache.lucene.index.Term; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.TopDocs; -import org.elasticsearch.action.search.SearchType; import java.io.IOException; -import java.util.Set; /** * A query rescorer interface used to re-rank the Top-K results of a previously @@ -61,10 +58,4 @@ public interface Rescorer { Explanation explain(int topLevelDocId, IndexSearcher searcher, RescoreContext rescoreContext, Explanation sourceExplanation) throws IOException; - /** - * Extracts all terms needed to execute this {@link Rescorer}. This method - * is executed in a distributed frequency collection roundtrip for - * {@link SearchType#DFS_QUERY_THEN_FETCH} - */ - void extractTerms(IndexSearcher searcher, RescoreContext rescoreContext, Set termsSet) throws IOException; }