5454import java .util .ArrayList ;
5555import java .util .Arrays ;
5656import java .util .Collections ;
57+ import java .util .Comparator ;
5758import java .util .HashMap ;
5859import java .util .HashSet ;
5960import java .util .List ;
6061import java .util .Map ;
6162import java .util .Objects ;
6263import java .util .Set ;
6364import java .util .function .BiFunction ;
65+ import java .util .stream .Collectors ;
6466
6567import static java .util .stream .Collectors .toSet ;
6668
@@ -366,36 +368,37 @@ private static BiFunction<Query, Version, Result> booleanQuery() {
366368 Set <QueryExtraction > extractions = new HashSet <>();
367369 Set <String > seenRangeFields = new HashSet <>();
368370 for (Result result : results ) {
369- QueryExtraction [] t = result .extractions .toArray (new QueryExtraction [1 ]);
370- if (result .extractions .size () == 1 && t [0 ].range != null ) {
371- // In case of range queries each extraction does not simply increment the minimum_should_match
372- // for that percolator query like for a term based extraction, so that can lead to more false
373- // positives for percolator queries with range queries than term based queries.
374- // The is because the way number fields are extracted from the document to be percolated.
375- // Per field a single range is extracted and if a percolator query has two or more range queries
376- // on the same field than the the minimum should match can be higher than clauses in the CoveringQuery.
377- // Therefore right now the minimum should match is incremented once per number field when processing
378- // the percolator query at index time.
379- if (seenRangeFields .add (t [0 ].range .fieldName )) {
380- msm += 1 ;
381- }
382- } else {
383- // In case that there are duplicate query extractions we need to be careful with incrementing msm,
384- // because that could lead to valid matches not becoming candidate matches:
385- // query: (field:val1 AND field:val2) AND (field:val2 AND field:val3)
386- // doc: field: val1 val2 val3
387- // So lets be protective and decrease the msm:
388- int resultMsm = result .minimumShouldMatch ;
389- for (QueryExtraction queryExtraction : result .extractions ) {
390- if (extractions .contains (queryExtraction )) {
391- // To protect against negative msm:
392- // (sub results could consist out of disjunction and conjunction and
393- // then we do not know which extraction contributed to msm)
394- resultMsm = Math .max (0 , resultMsm - 1 );
371+ // In case that there are duplicate query extractions we need to be careful with incrementing msm,
372+ // because that could lead to valid matches not becoming candidate matches:
373+ // query: (field:val1 AND field:val2) AND (field:val2 AND field:val3)
374+ // doc: field: val1 val2 val3
375+ // So lets be protective and decrease the msm:
376+ int resultMsm = result .minimumShouldMatch ;
377+ for (QueryExtraction queryExtraction : result .extractions ) {
378+ if (queryExtraction .range != null ) {
379+ // In case of range queries each extraction does not simply increment the minimum_should_match
380+ // for that percolator query like for a term based extraction, so that can lead to more false
381+ // positives for percolator queries with range queries than term based queries.
382+ // The is because the way number fields are extracted from the document to be percolated.
383+ // Per field a single range is extracted and if a percolator query has two or more range queries
384+ // on the same field than the the minimum should match can be higher than clauses in the CoveringQuery.
385+ // Therefore right now the minimum should match is incremented once per number field when processing
386+ // the percolator query at index time.
387+ if (seenRangeFields .add (queryExtraction .range .fieldName )) {
388+ resultMsm = 1 ;
389+ } else {
390+ resultMsm = 0 ;
395391 }
396392 }
397- msm += resultMsm ;
393+
394+ if (extractions .contains (queryExtraction )) {
395+ // To protect against negative msm:
396+ // (sub results could consist out of disjunction and conjunction and
397+ // then we do not know which extraction contributed to msm)
398+ resultMsm = Math .max (0 , resultMsm - 1 );
399+ }
398400 }
401+ msm += resultMsm ;
399402 verified &= result .verified ;
400403 matchAllDocs &= result .matchAllDocs ;
401404 extractions .addAll (result .extractions );
@@ -518,8 +521,7 @@ private static BiFunction<Query, Version, Result> toParentBlockJoinQuery() {
518521 private static Result handleDisjunction (List <Query > disjunctions , int requiredShouldClauses , boolean otherClauses ,
519522 Version version ) {
520523 // Keep track of the msm for each clause:
521- int [] msmPerClause = new int [disjunctions .size ()];
522- String [] rangeFieldNames = new String [disjunctions .size ()];
524+ List <DisjunctionClause > clauses = new ArrayList <>(disjunctions .size ());
523525 boolean verified = otherClauses == false ;
524526 if (version .before (Version .V_6_1_0 )) {
525527 verified &= requiredShouldClauses <= 1 ;
@@ -535,17 +537,14 @@ private static Result handleDisjunction(List<Query> disjunctions, int requiredSh
535537 }
536538 int resultMsm = subResult .minimumShouldMatch ;
537539 for (QueryExtraction extraction : subResult .extractions ) {
538- if (terms .contains (extraction )) {
539- resultMsm = Math .max (1 , resultMsm - 1 );
540+ if (terms .add (extraction ) == false ) {
541+ resultMsm = Math .max (0 , resultMsm - 1 );
540542 }
541543 }
542- msmPerClause [i ] = resultMsm ;
543- terms .addAll (subResult .extractions );
544-
545- QueryExtraction [] t = subResult .extractions .toArray (new QueryExtraction [1 ]);
546- if (subResult .extractions .size () == 1 && t [0 ].range != null ) {
547- rangeFieldNames [i ] = t [0 ].range .fieldName ;
548- }
544+ clauses .add (new DisjunctionClause (resultMsm , subResult .extractions .stream ()
545+ .filter (extraction -> extraction .range != null )
546+ .map (extraction -> extraction .range .fieldName )
547+ .collect (toSet ())));
549548 }
550549 boolean matchAllDocs = numMatchAllClauses > 0 && numMatchAllClauses >= requiredShouldClauses ;
551550
@@ -554,15 +553,20 @@ private static Result handleDisjunction(List<Query> disjunctions, int requiredSh
554553 Set <String > seenRangeFields = new HashSet <>();
555554 // Figure out what the combined msm is for this disjunction:
556555 // (sum the lowest required clauses, otherwise we're too strict and queries may not match)
557- Arrays .sort (msmPerClause );
558- int limit = Math .min (msmPerClause .length , Math .max (1 , requiredShouldClauses ));
556+ clauses = clauses .stream ()
557+ .filter (o -> o .msm > 0 )
558+ .sorted (Comparator .comparingInt (o -> o .msm ))
559+ .collect (Collectors .toList ());
560+ int limit = Math .min (clauses .size (), Math .max (1 , requiredShouldClauses ));
559561 for (int i = 0 ; i < limit ; i ++) {
560- if (rangeFieldNames [i ] != null ) {
561- if (seenRangeFields .add (rangeFieldNames [i ])) {
562- msm += 1 ;
562+ if (clauses .get (i ).rangeFieldNames .isEmpty () == false ) {
563+ for (String rangeField : clauses .get (i ).rangeFieldNames ) {
564+ if (seenRangeFields .add (rangeField )) {
565+ msm += 1 ;
566+ }
563567 }
564568 } else {
565- msm += msmPerClause [ i ] ;
569+ msm += clauses . get ( i ). msm ;
566570 }
567571 }
568572 } else {
@@ -575,6 +579,17 @@ private static Result handleDisjunction(List<Query> disjunctions, int requiredSh
575579 }
576580 }
577581
582+ static class DisjunctionClause {
583+
584+ final int msm ;
585+ final Set <String > rangeFieldNames ;
586+
587+ DisjunctionClause (int msm , Set <String > rangeFieldNames ) {
588+ this .msm = msm ;
589+ this .rangeFieldNames = rangeFieldNames ;
590+ }
591+ }
592+
578593 static Set <QueryExtraction > selectBestExtraction (Set <QueryExtraction > extractions1 , Set <QueryExtraction > extractions2 ) {
579594 assert extractions1 != null || extractions2 != null ;
580595 if (extractions1 == null ) {
0 commit comments