diff --git a/server/src/main/java/org/apache/lucene/queries/BlendedTermQuery.java b/server/src/main/java/org/apache/lucene/queries/BlendedTermQuery.java index dd3ac992475b9..1700979c32d64 100644 --- a/server/src/main/java/org/apache/lucene/queries/BlendedTermQuery.java +++ b/server/src/main/java/org/apache/lucene/queries/BlendedTermQuery.java @@ -113,23 +113,17 @@ protected void blend(final TermStates[] contexts, int maxDoc, IndexReader reader // TODO: Maybe it could also make sense to assume independent distributions of documents and eg. have: // df = df1 + df2 - (df1 * df2 / maxDoc)? max = Math.max(df, max); - if (minSumTTF != -1 && ctx.totalTermFreq() != -1) { + if (ctx.totalTermFreq() > 0) { // we need to find out the minimum sumTTF to adjust the statistics // otherwise the statistics don't match minSumTTF = Math.min(minSumTTF, reader.getSumTotalTermFreq(terms[i].field())); - } else { - minSumTTF = -1; } } - if (minSumTTF != -1 && maxDoc > minSumTTF) { - maxDoc = (int)minSumTTF; - } - if (max == 0) { return; // we are done that term doesn't exist at all } - long sumTTF = minSumTTF == -1 ? -1 : 0; + long sumTTF = 0; final int[] tieBreak = new int[contexts.length]; for (int i = 0; i < tieBreak.length; ++i) { tieBreak[i] = i; @@ -165,11 +159,7 @@ protected int compare(int i, int j) { } contexts[i] = ctx = adjustDF(reader.getContext(), ctx, Math.min(maxDoc, actualDf)); prev = current; - if (sumTTF >= 0 && ctx.totalTermFreq() >= 0) { - sumTTF += ctx.totalTermFreq(); - } else { - sumTTF = -1; // omit once TF is omitted anywhere! - } + sumTTF += ctx.totalTermFreq(); } sumTTF = Math.min(sumTTF, minSumTTF); for (int i = 0; i < contexts.length; i++) { @@ -177,17 +167,12 @@ protected int compare(int i, int j) { if (df == 0) { continue; } - // the blended sumTTF can't be greater than the sumTTTF on the field - final long fixedTTF = sumTTF == -1 ? -1 : sumTTF; - contexts[i] = adjustTTF(reader.getContext(), contexts[i], fixedTTF); + contexts[i] = adjustTTF(reader.getContext(), contexts[i], sumTTF); } } private TermStates adjustTTF(IndexReaderContext readerContext, TermStates termContext, long sumTTF) throws IOException { assert termContext.wasBuiltFor(readerContext); - if (sumTTF == -1 && termContext.totalTermFreq() == -1) { - return termContext; - } TermStates newTermContext = new TermStates(readerContext); List leaves = readerContext.leaves(); final int len; @@ -213,12 +198,7 @@ private TermStates adjustTTF(IndexReaderContext readerContext, TermStates termCo private static TermStates adjustDF(IndexReaderContext readerContext, TermStates ctx, int newDocFreq) throws IOException { assert ctx.wasBuiltFor(readerContext); // Use a value of ttf that is consistent with the doc freq (ie. gte) - long newTTF; - if (ctx.totalTermFreq() < 0) { - newTTF = -1; - } else { - newTTF = Math.max(ctx.totalTermFreq(), newDocFreq); - } + long newTTF = Math.max(ctx.totalTermFreq(), newDocFreq); List leaves = readerContext.leaves(); final int len; if (leaves == null) { diff --git a/server/src/test/java/org/apache/lucene/queries/BlendedTermQueryTests.java b/server/src/test/java/org/apache/lucene/queries/BlendedTermQueryTests.java index 1ad067a7e2b36..ce33c247a3337 100644 --- a/server/src/test/java/org/apache/lucene/queries/BlendedTermQueryTests.java +++ b/server/src/test/java/org/apache/lucene/queries/BlendedTermQueryTests.java @@ -28,10 +28,12 @@ import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.MultiReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermStates; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryUtils; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreMode; @@ -52,6 +54,8 @@ import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.instanceOf; public class BlendedTermQueryTests extends ESTestCase { public void testDismaxQuery() throws IOException { @@ -114,6 +118,61 @@ public void testDismaxQuery() throws IOException { assertEquals(Integer.toString(1), reader.document(scoreDocs[0].doc).getField("id").stringValue()); } + { + // test with an unknown field + String[] fields = new String[] {"username", "song", "unknown_field"}; + Query query = BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "foo"), 1.0f); + Query rewrite = searcher.rewrite(query); + assertThat(rewrite, instanceOf(BooleanQuery.class)); + for (BooleanClause clause : (BooleanQuery) rewrite) { + assertThat(clause.getQuery(), instanceOf(TermQuery.class)); + TermQuery termQuery = (TermQuery) clause.getQuery(); + TermStates termStates = termQuery.getTermStates(); + if (termQuery.getTerm().field().equals("unknown_field")) { + assertThat(termStates.docFreq(), equalTo(0)); + assertThat(termStates.totalTermFreq(), equalTo(0L)); + } else { + assertThat(termStates.docFreq(), greaterThan(0)); + assertThat(termStates.totalTermFreq(), greaterThan(0L)); + } + } + assertThat(searcher.search(query, 10).totalHits.value, equalTo((long) iters + username.length)); + } + { + // test with an unknown field and an unknown term + String[] fields = new String[] {"username", "song", "unknown_field"}; + Query query = BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "unknown_term"), 1.0f); + Query rewrite = searcher.rewrite(query); + assertThat(rewrite, instanceOf(BooleanQuery.class)); + for (BooleanClause clause : (BooleanQuery) rewrite) { + assertThat(clause.getQuery(), instanceOf(TermQuery.class)); + TermQuery termQuery = (TermQuery) clause.getQuery(); + TermStates termStates = termQuery.getTermStates(); + assertThat(termStates.docFreq(), equalTo(0)); + assertThat(termStates.totalTermFreq(), equalTo(0L)); + } + assertThat(searcher.search(query, 10).totalHits.value, equalTo(0L)); + } + { + // test with an unknown field and a term that is present in only one field + String[] fields = new String[] {"username", "song", "id", "unknown_field"}; + Query query = BlendedTermQuery.dismaxBlendedQuery(toTerms(fields, "fan"), 1.0f); + Query rewrite = searcher.rewrite(query); + assertThat(rewrite, instanceOf(BooleanQuery.class)); + for (BooleanClause clause : (BooleanQuery) rewrite) { + assertThat(clause.getQuery(), instanceOf(TermQuery.class)); + TermQuery termQuery = (TermQuery) clause.getQuery(); + TermStates termStates = termQuery.getTermStates(); + if (termQuery.getTerm().field().equals("username")) { + assertThat(termStates.docFreq(), equalTo(1)); + assertThat(termStates.totalTermFreq(), equalTo(1L)); + } else { + assertThat(termStates.docFreq(), equalTo(0)); + assertThat(termStates.totalTermFreq(), equalTo(0L)); + } + } + assertThat(searcher.search(query, 10).totalHits.value, equalTo(1L)); + } reader.close(); w.close(); dir.close();