diff --git a/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java b/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java index 362c8870f652d..f08600cdfd0e9 100644 --- a/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java +++ b/modules/percolator/src/main/java/org/elasticsearch/percolator/QueryAnalyzer.java @@ -232,7 +232,7 @@ private static Result handleConjunction(List conjunctionsWithUnknowns) { List conjunctions = conjunctionsWithUnknowns.stream().filter(r -> r.isUnknown() == false).collect(Collectors.toList()); if (conjunctions.isEmpty()) { if (conjunctionsWithUnknowns.isEmpty()) { - throw new IllegalArgumentException("Must have at least on conjunction sub result"); + throw new IllegalArgumentException("Must have at least one conjunction sub result"); } return conjunctionsWithUnknowns.get(0); // all conjunctions are unknown, so just return the first one } @@ -247,47 +247,53 @@ private static Result handleConjunction(List conjunctionsWithUnknowns) { int msm = 0; boolean verified = conjunctionsWithUnknowns.size() == conjunctions.size(); boolean matchAllDocs = true; - boolean hasDuplicateTerms = false; Set extractions = new HashSet<>(); Set seenRangeFields = new HashSet<>(); for (Result result : conjunctions) { - // In case that there are duplicate query extractions we need to be careful with - // incrementing msm, - // because that could lead to valid matches not becoming candidate matches: - // query: (field:val1 AND field:val2) AND (field:val2 AND field:val3) - // doc: field: val1 val2 val3 - // So lets be protective and decrease the msm: + int resultMsm = result.minimumShouldMatch; for (QueryExtraction queryExtraction : result.extractions) { if (queryExtraction.range != null) { // In case of range queries each extraction does not simply increment the - // minimum_should_match - // for that percolator query like for a term based extraction, so that can lead - // to more false - // positives for percolator queries with range queries than term based queries. - // The is because the way number fields are extracted from the document to be - // percolated. - // Per field a single range is extracted and if a percolator query has two or - // more range queries - // on the same field, then the minimum should match can be higher than clauses - // in the CoveringQuery. - // Therefore right now the minimum should match is incremented once per number - // field when processing - // the percolator query at index time. - if (seenRangeFields.add(queryExtraction.range.fieldName)) { - resultMsm = 1; - } else { - resultMsm = 0; + // minimum_should_match for that percolator query like for a term based extraction, + // so that can lead to more false positives for percolator queries with range queries + // than term based queries. + // This is because the way number fields are extracted from the document to be + // percolated. Per field a single range is extracted and if a percolator query has two or + // more range queries on the same field, then the minimum should match can be higher than clauses + // in the CoveringQuery. Therefore right now the minimum should match is only incremented once per + // number field when processing the percolator query at index time. + // For multiple ranges within a single extraction (ie from an existing conjunction or disjunction) + // then this will already have been taken care of, so we only check against fieldnames from + // previously processed extractions, and don't add to the seenRangeFields list until all + // extractions from this result are processed + if (seenRangeFields.contains(queryExtraction.range.fieldName)) { + resultMsm = Math.max(0, resultMsm - 1); + verified = false; } } - - if (extractions.contains(queryExtraction)) { - resultMsm = Math.max(0, resultMsm - 1); - verified = false; + else { + // In case that there are duplicate term query extractions we need to be careful with + // incrementing msm, because that could lead to valid matches not becoming candidate matches: + // query: (field:val1 AND field:val2) AND (field:val2 AND field:val3) + // doc: field: val1 val2 val3 + // So lets be protective and decrease the msm: + if (extractions.contains(queryExtraction)) { + resultMsm = Math.max(0, resultMsm - 1); + verified = false; + } } } msm += resultMsm; + // add range fields from this Result to the seenRangeFields set so that minimumShouldMatch is correctly + // calculated for subsequent Results + result.extractions.stream() + .map(e -> e.range) + .filter(Objects::nonNull) + .map(e -> e.fieldName) + .forEach(seenRangeFields::add); + if (result.verified == false // If some inner extractions are optional, the result can't be verified || result.minimumShouldMatch < result.extractions.size()) { @@ -299,7 +305,7 @@ private static Result handleConjunction(List conjunctionsWithUnknowns) { if (matchAllDocs) { return new Result(matchAllDocs, verified); } else { - return new Result(verified, extractions, hasDuplicateTerms ? 1 : msm); + return new Result(verified, extractions, msm); } } diff --git a/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java b/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java index 91c815c40322e..1c00d0555b41a 100644 --- a/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java +++ b/modules/percolator/src/test/java/org/elasticsearch/percolator/QueryAnalyzerTests.java @@ -78,6 +78,7 @@ import static org.elasticsearch.percolator.QueryAnalyzer.analyze; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.is; +import static org.hamcrest.collection.IsCollectionWithSize.hasSize; public class QueryAnalyzerTests extends ESTestCase { @@ -1208,4 +1209,135 @@ public void testIntervalQueries() { assertTermsEqual(result.extractions, new Term("field", "a")); } + public void testCombinedRangeAndTermWithMinimumShouldMatch() { + + Query disj = new BooleanQuery.Builder() + .add(IntPoint.newRangeQuery("i", 0, 10), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v1")), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v1")), Occur.SHOULD) + .setMinimumNumberShouldMatch(2) + .build(); + + Result r = analyze(disj, Version.CURRENT); + assertThat(r.minimumShouldMatch, equalTo(1)); + assertThat(r.extractions, hasSize(2)); + assertFalse(r.matchAllDocs); + assertFalse(r.verified); + + Query q = new BooleanQuery.Builder() + .add(IntPoint.newRangeQuery("i", 0, 10), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v1")), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v1")), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v1")), Occur.FILTER) + .setMinimumNumberShouldMatch(2) + .build(); + + Result result = analyze(q, Version.CURRENT); + assertThat(result.minimumShouldMatch, equalTo(1)); + assertThat(result.extractions.size(), equalTo(2)); + assertFalse(result.verified); + assertFalse(result.matchAllDocs); + + q = new BooleanQuery.Builder() + .add(q, Occur.MUST) + .add(q, Occur.MUST) + .build(); + + result = analyze(q, Version.CURRENT); + assertThat(result.minimumShouldMatch, equalTo(1)); + assertThat(result.extractions.size(), equalTo(2)); + assertFalse(result.verified); + assertFalse(result.matchAllDocs); + + Query q2 = new BooleanQuery.Builder() + .add(new TermQuery(new Term("f", "v1")), Occur.FILTER) + .add(IntPoint.newRangeQuery("i", 15, 20), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v2")), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v2")), Occur.MUST) + .setMinimumNumberShouldMatch(1) + .build(); + + result = analyze(q2, Version.CURRENT); + assertThat(result.minimumShouldMatch, equalTo(2)); + assertThat(result.extractions, hasSize(3)); + assertFalse(result.verified); + assertFalse(result.matchAllDocs); + + // multiple range queries on different fields + Query q3 = new BooleanQuery.Builder() + .add(IntPoint.newRangeQuery("i", 15, 20), Occur.SHOULD) + .add(IntPoint.newRangeQuery("i2", 15, 20), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v1")), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v2")), Occur.MUST) + .setMinimumNumberShouldMatch(1) + .build(); + result = analyze(q3, Version.CURRENT); + assertThat(result.minimumShouldMatch, equalTo(2)); + assertThat(result.extractions, hasSize(4)); + assertFalse(result.verified); + assertFalse(result.matchAllDocs); + + // multiple disjoint range queries on the same field + Query q4 = new BooleanQuery.Builder() + .add(IntPoint.newRangeQuery("i", 15, 20), Occur.SHOULD) + .add(IntPoint.newRangeQuery("i", 25, 30), Occur.SHOULD) + .add(IntPoint.newRangeQuery("i", 35, 40), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v1")), Occur.SHOULD) + .add(new TermQuery(new Term("f", "v2")), Occur.MUST) + .setMinimumNumberShouldMatch(1) + .build(); + result = analyze(q4, Version.CURRENT); + assertThat(result.minimumShouldMatch, equalTo(2)); + assertThat(result.extractions, hasSize(5)); + assertFalse(result.verified); + assertFalse(result.matchAllDocs); + + // multiple conjunction range queries on the same field + Query q5 = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(IntPoint.newRangeQuery("i", 15, 20), Occur.MUST) + .add(IntPoint.newRangeQuery("i", 25, 30), Occur.MUST) + .build(), Occur.MUST) + .add(IntPoint.newRangeQuery("i", 35, 40), Occur.MUST) + .add(new TermQuery(new Term("f", "v2")), Occur.MUST) + .build(); + result = analyze(q5, Version.CURRENT); + assertThat(result.minimumShouldMatch, equalTo(2)); + assertThat(result.extractions, hasSize(4)); + assertFalse(result.verified); + assertFalse(result.matchAllDocs); + + // multiple conjunction range queries on different fields + Query q6 = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(IntPoint.newRangeQuery("i", 15, 20), Occur.MUST) + .add(IntPoint.newRangeQuery("i2", 25, 30), Occur.MUST) + .build(), Occur.MUST) + .add(IntPoint.newRangeQuery("i", 35, 40), Occur.MUST) + .add(new TermQuery(new Term("f", "v2")), Occur.MUST) + .build(); + result = analyze(q6, Version.CURRENT); + assertThat(result.minimumShouldMatch, equalTo(3)); + assertThat(result.extractions, hasSize(4)); + assertFalse(result.verified); + assertFalse(result.matchAllDocs); + + // mixed term and range conjunctions + Query q7 = new BooleanQuery.Builder() + .add(new BooleanQuery.Builder() + .add(IntPoint.newRangeQuery("i", 1, 2), Occur.MUST) + .add(new TermQuery(new Term("f", "1")), Occur.MUST) + .build(), Occur.MUST) + .add(new BooleanQuery.Builder() + .add(IntPoint.newRangeQuery("i", 1, 2), Occur.MUST) + .add(new TermQuery(new Term("f", "2")), Occur.MUST) + .build(), Occur.MUST) + .build(); + result = analyze(q7, Version.CURRENT); + assertThat(result.minimumShouldMatch, equalTo(3)); + assertThat(result.extractions, hasSize(3)); + assertFalse(result.verified); + assertFalse(result.matchAllDocs); + } + }