Skip to content

Commit 0e2ec6f

Browse files
peteralfonsiPeter Alfonsi
andauthored
Boolean must to filter rewrite for queries with constant scores (#18541)
* Add must -> filter boolean rewrite Signed-off-by: Peter Alfonsi <petealft@amazon.com> * fix comment wording Signed-off-by: Peter Alfonsi <petealft@amazon.com> * changelog Signed-off-by: Peter Alfonsi <petealft@amazon.com> * changelog fix Signed-off-by: Peter Alfonsi <petealft@amazon.com> * fix WrapperQueryBuilderTests Signed-off-by: Peter Alfonsi <petealft@amazon.com> * fix PercolatorQuerySearchIT Signed-off-by: Peter Alfonsi <petealft@amazon.com> * rerun gradle check Signed-off-by: Peter Alfonsi <petealft@amazon.com> * rerun gradle check Signed-off-by: Peter Alfonsi <petealft@amazon.com> --------- Signed-off-by: Peter Alfonsi <petealft@amazon.com> Signed-off-by: Peter Alfonsi <peter.alfonsi@gmail.com> Co-authored-by: Peter Alfonsi <petealft@amazon.com>
1 parent 8fdc6b3 commit 0e2ec6f

File tree

6 files changed

+142
-11
lines changed

6 files changed

+142
-11
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
1414
- Add last index request timestamp columns to the `_cat/indices` API. ([10766](https://github.com/opensearch-project/OpenSearch/issues/10766))
1515
- Introduce a new pull-based ingestion plugin for file-based indexing (for local testing) ([#18591](https://github.com/opensearch-project/OpenSearch/pull/18591))
1616
- Add support for search pipeline in search and msearch template ([#18564](https://github.com/opensearch-project/OpenSearch/pull/18564))
17+
- Add BooleanQuery rewrite moving constant-scoring must clauses to filter clauses ([#18510](https://github.com/opensearch-project/OpenSearch/issues/18510))
1718

1819
### Changed
1920
- Update Subject interface to use CheckedRunnable ([#18570](https://github.com/opensearch-project/OpenSearch/issues/18570))

modules/percolator/src/internalClusterTest/java/org/opensearch/percolator/PercolatorQuerySearchIT.java

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -291,43 +291,40 @@ public void testPercolatorRangeQueries() throws Exception {
291291
.get();
292292
logger.info("response={}", response);
293293
assertHitCount(response, 2);
294-
assertThat(response.getHits().getAt(0).getId(), equalTo("3"));
295-
assertThat(response.getHits().getAt(1).getId(), equalTo("1"));
294+
assertSearchHits(response, "3", "1");
296295

297296
source = BytesReference.bytes(jsonBuilder().startObject().field("field1", 11).endObject());
298297
response = client().prepareSearch().setQuery(new PercolateQueryBuilder("query", source, MediaTypeRegistry.JSON)).get();
299298
assertHitCount(response, 1);
300-
assertThat(response.getHits().getAt(0).getId(), equalTo("1"));
299+
assertSearchHits(response, "1");
301300

302301
// Test double range:
303302
source = BytesReference.bytes(jsonBuilder().startObject().field("field2", 12).endObject());
304303
response = client().prepareSearch().setQuery(new PercolateQueryBuilder("query", source, MediaTypeRegistry.JSON)).get();
305304
assertHitCount(response, 2);
306-
assertThat(response.getHits().getAt(0).getId(), equalTo("6"));
307-
assertThat(response.getHits().getAt(1).getId(), equalTo("4"));
305+
assertSearchHits(response, "6", "4");
308306

309307
source = BytesReference.bytes(jsonBuilder().startObject().field("field2", 11).endObject());
310308
response = client().prepareSearch().setQuery(new PercolateQueryBuilder("query", source, MediaTypeRegistry.JSON)).get();
311309
assertHitCount(response, 1);
312-
assertThat(response.getHits().getAt(0).getId(), equalTo("4"));
310+
assertSearchHits(response, "4");
313311

314312
// Test IP range:
315313
source = BytesReference.bytes(jsonBuilder().startObject().field("field3", "192.168.1.5").endObject());
316314
response = client().prepareSearch().setQuery(new PercolateQueryBuilder("query", source, MediaTypeRegistry.JSON)).get();
317315
assertHitCount(response, 2);
318-
assertThat(response.getHits().getAt(0).getId(), equalTo("9"));
319-
assertThat(response.getHits().getAt(1).getId(), equalTo("7"));
316+
assertSearchHits(response, "9", "7");
320317

321318
source = BytesReference.bytes(jsonBuilder().startObject().field("field3", "192.168.1.4").endObject());
322319
response = client().prepareSearch().setQuery(new PercolateQueryBuilder("query", source, MediaTypeRegistry.JSON)).get();
323320
assertHitCount(response, 1);
324-
assertThat(response.getHits().getAt(0).getId(), equalTo("7"));
321+
assertSearchHits(response, "7");
325322

326323
// Test date range:
327324
source = BytesReference.bytes(jsonBuilder().startObject().field("field4", "2016-05-15").endObject());
328325
response = client().prepareSearch().setQuery(new PercolateQueryBuilder("query", source, MediaTypeRegistry.JSON)).get();
329326
assertHitCount(response, 1);
330-
assertThat(response.getHits().getAt(0).getId(), equalTo("10"));
327+
assertSearchHits(response, "10");
331328
}
332329

333330
public void testPercolatorGeoQueries() throws Exception {

server/src/internalClusterTest/java/org/opensearch/search/query/BooleanQueryIT.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,26 @@ public void testMustNotRangeRewriteWithMoreThanOneValue() throws Exception {
229229
assertHitCount(client().prepareSearch().setQuery(matchAllQuery()).get(), numDocs);
230230
}
231231

232+
public void testMustToFilterRewrite() throws Exception {
233+
// Check we still get expected behavior after rewriting must clauses --> filter clauses.
234+
String intField = "int_field";
235+
createIndex("test");
236+
int numDocs = 100;
237+
238+
for (int i = 0; i < numDocs; i++) {
239+
client().prepareIndex("test").setId(Integer.toString(i)).setSource(intField, i).get();
240+
}
241+
ensureGreen();
242+
waitForRelocation();
243+
forceMerge();
244+
refresh();
245+
246+
int gt = 22;
247+
int lt = 92;
248+
int expectedHitCount = lt - gt - 1;
249+
assertHitCount(client().prepareSearch().setQuery(boolQuery().must(rangeQuery(intField).lt(lt).gt(gt))).get(), expectedHitCount);
250+
}
251+
232252
private String padZeros(int value, int length) {
233253
// String.format() not allowed
234254
String ret = Integer.toString(value);

server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,8 @@
4949
import org.opensearch.core.xcontent.ObjectParser;
5050
import org.opensearch.core.xcontent.XContentBuilder;
5151
import org.opensearch.core.xcontent.XContentParser;
52+
import org.opensearch.index.mapper.MappedFieldType;
53+
import org.opensearch.index.mapper.NumberFieldMapper;
5254

5355
import java.io.IOException;
5456
import java.util.ArrayList;
@@ -401,6 +403,7 @@ protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws
401403
}
402404

403405
changed |= rewriteMustNotRangeClausesToShould(newBuilder, queryRewriteContext);
406+
changed |= rewriteMustClausesToFilter(newBuilder, queryRewriteContext);
404407

405408
if (changed) {
406409
newBuilder.adjustPureNegative = adjustPureNegative;
@@ -550,4 +553,53 @@ private boolean checkAllDocsHaveOneValue(List<LeafReaderContext> contexts, Strin
550553
}
551554
return true;
552555
}
556+
557+
private boolean rewriteMustClausesToFilter(BoolQueryBuilder newBuilder, QueryRewriteContext queryRewriteContext) {
558+
// If we have must clauses which return the same score for all matching documents, like numeric term queries or ranges,
559+
// moving them from must clauses to filter clauses improves performance in some cases.
560+
// This works because it can let Lucene use MaxScoreCache to skip non-competitive docs.
561+
boolean changed = false;
562+
Set<QueryBuilder> mustClausesToMove = new HashSet<>();
563+
564+
QueryShardContext shardContext;
565+
if (queryRewriteContext == null) {
566+
shardContext = null;
567+
} else {
568+
shardContext = queryRewriteContext.convertToShardContext(); // can still be null
569+
}
570+
571+
for (QueryBuilder clause : mustClauses) {
572+
if (isClauseIrrelevantToScoring(clause, shardContext)) {
573+
mustClausesToMove.add(clause);
574+
changed = true;
575+
}
576+
}
577+
578+
newBuilder.mustClauses.removeAll(mustClausesToMove);
579+
newBuilder.filterClauses.addAll(mustClausesToMove);
580+
return changed;
581+
}
582+
583+
private boolean isClauseIrrelevantToScoring(QueryBuilder clause, QueryShardContext context) {
584+
// This is an incomplete list of clauses this might apply for; it can be expanded in future.
585+
586+
// If a clause is purely numeric, for example a date range, its score is unimportant as
587+
// it'll be the same for all returned docs
588+
if (clause instanceof RangeQueryBuilder) return true;
589+
if (clause instanceof GeoBoundingBoxQueryBuilder) return true;
590+
591+
// Further optimizations depend on knowing whether the field is numeric.
592+
// QueryBuilder.doRewrite() is called several times in the search flow, and the shard context telling us this
593+
// is only available the last time, when it's called from SearchService.executeQueryPhase().
594+
// Skip moving these clauses if we don't have the shard context.
595+
if (context == null) return false;
596+
if (!(clause instanceof WithFieldName wfn)) return false;
597+
MappedFieldType fieldType = context.fieldMapper(wfn.fieldName());
598+
if (!(fieldType instanceof NumberFieldMapper.NumberFieldType)) return false;
599+
600+
if (clause instanceof MatchQueryBuilder) return true;
601+
if (clause instanceof TermQueryBuilder) return true;
602+
if (clause instanceof TermsQueryBuilder) return true;
603+
return false;
604+
}
553605
}

server/src/test/java/org/opensearch/index/query/BoolQueryBuilderTests.java

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@
7373
import static org.hamcrest.CoreMatchers.equalTo;
7474
import static org.hamcrest.CoreMatchers.instanceOf;
7575
import static org.mockito.Mockito.mock;
76+
import static org.mockito.Mockito.when;
7677

7778
public class BoolQueryBuilderTests extends AbstractQueryTestCase<BoolQueryBuilder> {
7879
@Override
@@ -630,6 +631,65 @@ public void testMustNotRewriteDisabledWithoutExactlyOneValuePerDoc() throws Exce
630631
IOUtils.close(w, reader, dir);
631632
}
632633

634+
public void testMustClausesRewritten() throws Exception {
635+
BoolQueryBuilder qb = new BoolQueryBuilder();
636+
637+
// Should be moved
638+
QueryBuilder intTermQuery = new TermQueryBuilder(INT_FIELD_NAME, 200);
639+
QueryBuilder rangeQuery = new RangeQueryBuilder(INT_FIELD_NAME).gt(10).lt(20);
640+
// Should be moved to filter clause, the boost applies equally to all matched docs
641+
QueryBuilder rangeQueryWithBoost = new RangeQueryBuilder(DATE_FIELD_NAME).gt(10).lt(20).boost(2);
642+
QueryBuilder intTermsQuery = new TermsQueryBuilder(INT_FIELD_NAME, new int[] { 1, 4, 100 });
643+
QueryBuilder boundingBoxQuery = new GeoBoundingBoxQueryBuilder(GEO_POINT_FIELD_NAME);
644+
QueryBuilder doubleMatchQuery = new MatchQueryBuilder(DOUBLE_FIELD_NAME, 5.5);
645+
646+
// Should not be moved
647+
QueryBuilder textTermQuery = new TermQueryBuilder(TEXT_FIELD_NAME, "bar");
648+
QueryBuilder textTermsQuery = new TermsQueryBuilder(TEXT_FIELD_NAME, "foo", "bar");
649+
QueryBuilder textMatchQuery = new MatchQueryBuilder(TEXT_FIELD_NAME, "baz");
650+
651+
qb.must(intTermQuery);
652+
qb.must(rangeQuery);
653+
qb.must(rangeQueryWithBoost);
654+
qb.must(intTermsQuery);
655+
qb.must(boundingBoxQuery);
656+
qb.must(doubleMatchQuery);
657+
658+
qb.must(textTermQuery);
659+
qb.must(textTermsQuery);
660+
qb.must(textMatchQuery);
661+
662+
BoolQueryBuilder rewritten = (BoolQueryBuilder) Rewriteable.rewrite(qb, createShardContext());
663+
for (QueryBuilder clause : List.of(
664+
intTermQuery,
665+
rangeQuery,
666+
rangeQueryWithBoost,
667+
intTermsQuery,
668+
boundingBoxQuery,
669+
doubleMatchQuery
670+
)) {
671+
assertFalse(rewritten.must().contains(clause));
672+
assertTrue(rewritten.filter().contains(clause));
673+
}
674+
for (QueryBuilder clause : List.of(textTermQuery, textTermsQuery, textMatchQuery)) {
675+
assertTrue(rewritten.must().contains(clause));
676+
assertFalse(rewritten.filter().contains(clause));
677+
}
678+
679+
// If we have null QueryShardContext, match/term/terms queries should not be moved as we can't determine if they're numeric.
680+
QueryRewriteContext nullContext = mock(QueryRewriteContext.class);
681+
when(nullContext.convertToShardContext()).thenReturn(null);
682+
rewritten = (BoolQueryBuilder) Rewriteable.rewrite(qb, nullContext);
683+
for (QueryBuilder clause : List.of(rangeQuery, rangeQueryWithBoost, boundingBoxQuery)) {
684+
assertFalse(rewritten.must().contains(clause));
685+
assertTrue(rewritten.filter().contains(clause));
686+
}
687+
for (QueryBuilder clause : List.of(textTermQuery, textTermsQuery, textMatchQuery, intTermQuery, intTermsQuery, doubleMatchQuery)) {
688+
assertTrue(rewritten.must().contains(clause));
689+
assertFalse(rewritten.filter().contains(clause));
690+
}
691+
}
692+
633693
private QueryBuilder getRangeQueryBuilder(String fieldName, Integer lower, Integer upper, boolean includeLower, boolean includeUpper) {
634694
RangeQueryBuilder rq = new RangeQueryBuilder(fieldName);
635695
if (lower != null) {

server/src/test/java/org/opensearch/index/query/WrapperQueryBuilderTests.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,8 @@ protected WrapperQueryBuilder doCreateTestQueryBuilder() {
9393

9494
@Override
9595
protected void doAssertLuceneQuery(WrapperQueryBuilder queryBuilder, Query query, QueryShardContext context) throws IOException {
96-
QueryBuilder innerQuery = queryBuilder.rewrite(createShardContext());
96+
// Must rewrite recursively so innerQuery matches query
97+
QueryBuilder innerQuery = Rewriteable.rewrite(queryBuilder, createShardContext());
9798
Query expected = rewrite(innerQuery.toQuery(context));
9899
assertEquals(rewrite(query), expected);
99100
}

0 commit comments

Comments
 (0)