From c9eeffefc1bad6af359f363a9c611e336d31bd3f Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Wed, 11 Jun 2025 12:37:03 -0700 Subject: [PATCH 1/8] Add must -> filter boolean rewrite Signed-off-by: Peter Alfonsi --- .../search/query/BooleanQueryIT.java | 20 +++++++ .../index/query/BoolQueryBuilder.java | 52 ++++++++++++++++ .../index/query/BoolQueryBuilderTests.java | 60 +++++++++++++++++++ 3 files changed, 132 insertions(+) diff --git a/server/src/internalClusterTest/java/org/opensearch/search/query/BooleanQueryIT.java b/server/src/internalClusterTest/java/org/opensearch/search/query/BooleanQueryIT.java index ff55889e041d5..0a1255004499f 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/query/BooleanQueryIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/query/BooleanQueryIT.java @@ -229,6 +229,26 @@ public void testMustNotRangeRewriteWithMoreThanOneValue() throws Exception { assertHitCount(client().prepareSearch().setQuery(matchAllQuery()).get(), numDocs); } + public void testMustToFilterRewrite() throws Exception { + // Check we still get expected behavior after rewriting must clauses --> filter clauses. + String intField = "int_field"; + createIndex("test"); + int numDocs = 100; + + for (int i = 0; i < numDocs; i++) { + client().prepareIndex("test").setId(Integer.toString(i)).setSource(intField, i).get(); + } + ensureGreen(); + waitForRelocation(); + forceMerge(); + refresh(); + + int gt = 22; + int lt = 92; + int expectedHitCount = lt - gt - 1; + assertHitCount(client().prepareSearch().setQuery(boolQuery().must(rangeQuery(intField).lt(lt).gt(gt))).get(), expectedHitCount); + } + private String padZeros(int value, int length) { // String.format() not allowed String ret = Integer.toString(value); diff --git a/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java index 85b555cc612ab..ed7a3cce7629f 100644 --- a/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java @@ -49,6 +49,8 @@ import org.opensearch.core.xcontent.ObjectParser; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.core.xcontent.XContentParser; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.NumberFieldMapper; import java.io.IOException; import java.util.ArrayList; @@ -401,6 +403,7 @@ protected QueryBuilder doRewrite(QueryRewriteContext queryRewriteContext) throws } changed |= rewriteMustNotRangeClausesToShould(newBuilder, queryRewriteContext); + changed |= rewriteMustClausesToFilter(newBuilder, queryRewriteContext); if (changed) { newBuilder.adjustPureNegative = adjustPureNegative; @@ -550,4 +553,53 @@ private boolean checkAllDocsHaveOneValue(List contexts, Strin } return true; } + + private boolean rewriteMustClausesToFilter(BoolQueryBuilder newBuilder, QueryRewriteContext queryRewriteContext) { + // If we have must clauses which return the same score for all matching documents, like term queries or numeric ranges, + // moving them from must clauses to filter clauses improves performance in some cases. + // This works because it can let Lucene use MaxScoreCache to skip non-competitive docs. + boolean changed = false; + Set mustClausesToMove = new HashSet<>(); + + QueryShardContext shardContext; + if (queryRewriteContext == null) { + shardContext = null; + } else { + shardContext = queryRewriteContext.convertToShardContext(); // can still be null + } + + for (QueryBuilder clause : mustClauses) { + if (isClauseIrrelevantToScoring(clause, shardContext)) { + mustClausesToMove.add(clause); + changed = true; + } + } + + newBuilder.mustClauses.removeAll(mustClausesToMove); + newBuilder.filterClauses.addAll(mustClausesToMove); + return changed; + } + + private boolean isClauseIrrelevantToScoring(QueryBuilder clause, QueryShardContext context) { + // This is an incomplete list of clauses this might apply for; it can be expanded in future. + + // If a clause is purely numeric, for example a date range, its score is unimportant as + // it'll be the same for all returned docs + if (clause instanceof RangeQueryBuilder) return true; + if (clause instanceof GeoBoundingBoxQueryBuilder) return true; + + // Further optimizations depend on knowing whether the field is numeric. + // QueryBuilder.doRewrite() is called several times in the search flow, and the shard context telling us this + // is only available the last time, when it's called from SearchService.executeQueryPhase(). + // Skip moving these clauses if we don't have the shard context. + if (context == null) return false; + if (!(clause instanceof WithFieldName wfn)) return false; + MappedFieldType fieldType = context.fieldMapper(wfn.fieldName()); + if (!(fieldType instanceof NumberFieldMapper.NumberFieldType)) return false; + + if (clause instanceof MatchQueryBuilder) return true; + if (clause instanceof TermQueryBuilder) return true; + if (clause instanceof TermsQueryBuilder) return true; + return false; + } } diff --git a/server/src/test/java/org/opensearch/index/query/BoolQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/BoolQueryBuilderTests.java index f08b7786c22af..c9e988117ee9f 100644 --- a/server/src/test/java/org/opensearch/index/query/BoolQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/BoolQueryBuilderTests.java @@ -73,6 +73,7 @@ import static org.hamcrest.CoreMatchers.equalTo; import static org.hamcrest.CoreMatchers.instanceOf; import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; public class BoolQueryBuilderTests extends AbstractQueryTestCase { @Override @@ -630,6 +631,65 @@ public void testMustNotRewriteDisabledWithoutExactlyOneValuePerDoc() throws Exce IOUtils.close(w, reader, dir); } + public void testMustClausesRewritten() throws Exception { + BoolQueryBuilder qb = new BoolQueryBuilder(); + + // Should be moved + QueryBuilder intTermQuery = new TermQueryBuilder(INT_FIELD_NAME, 200); + QueryBuilder rangeQuery = new RangeQueryBuilder(INT_FIELD_NAME).gt(10).lt(20); + // Should be moved to filter clause, the boost applies equally to all matched docs + QueryBuilder rangeQueryWithBoost = new RangeQueryBuilder(DATE_FIELD_NAME).gt(10).lt(20).boost(2); + QueryBuilder intTermsQuery = new TermsQueryBuilder(INT_FIELD_NAME, new int[] { 1, 4, 100 }); + QueryBuilder boundingBoxQuery = new GeoBoundingBoxQueryBuilder(GEO_POINT_FIELD_NAME); + QueryBuilder doubleMatchQuery = new MatchQueryBuilder(DOUBLE_FIELD_NAME, 5.5); + + // Should not be moved + QueryBuilder textTermQuery = new TermQueryBuilder(TEXT_FIELD_NAME, "bar"); + QueryBuilder textTermsQuery = new TermsQueryBuilder(TEXT_FIELD_NAME, "foo", "bar"); + QueryBuilder textMatchQuery = new MatchQueryBuilder(TEXT_FIELD_NAME, "baz"); + + qb.must(intTermQuery); + qb.must(rangeQuery); + qb.must(rangeQueryWithBoost); + qb.must(intTermsQuery); + qb.must(boundingBoxQuery); + qb.must(doubleMatchQuery); + + qb.must(textTermQuery); + qb.must(textTermsQuery); + qb.must(textMatchQuery); + + BoolQueryBuilder rewritten = (BoolQueryBuilder) Rewriteable.rewrite(qb, createShardContext()); + for (QueryBuilder clause : List.of( + intTermQuery, + rangeQuery, + rangeQueryWithBoost, + intTermsQuery, + boundingBoxQuery, + doubleMatchQuery + )) { + assertFalse(rewritten.must().contains(clause)); + assertTrue(rewritten.filter().contains(clause)); + } + for (QueryBuilder clause : List.of(textTermQuery, textTermsQuery, textMatchQuery)) { + assertTrue(rewritten.must().contains(clause)); + assertFalse(rewritten.filter().contains(clause)); + } + + // If we have null QueryShardContext, match/term/terms queries should not be moved as we can't determine if they're numeric. + QueryRewriteContext nullContext = mock(QueryRewriteContext.class); + when(nullContext.convertToShardContext()).thenReturn(null); + rewritten = (BoolQueryBuilder) Rewriteable.rewrite(qb, nullContext); + for (QueryBuilder clause : List.of(rangeQuery, rangeQueryWithBoost, boundingBoxQuery)) { + assertFalse(rewritten.must().contains(clause)); + assertTrue(rewritten.filter().contains(clause)); + } + for (QueryBuilder clause : List.of(textTermQuery, textTermsQuery, textMatchQuery, intTermQuery, intTermsQuery, doubleMatchQuery)) { + assertTrue(rewritten.must().contains(clause)); + assertFalse(rewritten.filter().contains(clause)); + } + } + private QueryBuilder getRangeQueryBuilder(String fieldName, Integer lower, Integer upper, boolean includeLower, boolean includeUpper) { RangeQueryBuilder rq = new RangeQueryBuilder(fieldName); if (lower != null) { From 0cd16101754f17be00807688ed383e7f29aa0bd0 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Mon, 16 Jun 2025 11:10:33 -0700 Subject: [PATCH 2/8] fix comment wording Signed-off-by: Peter Alfonsi --- .../main/java/org/opensearch/index/query/BoolQueryBuilder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java b/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java index ed7a3cce7629f..82f24b6288cde 100644 --- a/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/BoolQueryBuilder.java @@ -555,7 +555,7 @@ private boolean checkAllDocsHaveOneValue(List contexts, Strin } private boolean rewriteMustClausesToFilter(BoolQueryBuilder newBuilder, QueryRewriteContext queryRewriteContext) { - // If we have must clauses which return the same score for all matching documents, like term queries or numeric ranges, + // If we have must clauses which return the same score for all matching documents, like numeric term queries or ranges, // moving them from must clauses to filter clauses improves performance in some cases. // This works because it can let Lucene use MaxScoreCache to skip non-competitive docs. boolean changed = false; From 5a5ca5ff6bcc56fa50d684bc7be6c616a6be1036 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Tue, 17 Jun 2025 10:38:56 -0700 Subject: [PATCH 3/8] changelog Signed-off-by: Peter Alfonsi --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 273c4c3164b71..42acf5636b815 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -41,6 +41,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Added File Cache Pinning ([#17617](https://github.com/opensearch-project/OpenSearch/issues/13648)) - Support consumer reset in Resume API for pull-based ingestion. This PR includes a breaking change for the experimental pull-based ingestion feature. ([#18332](https://github.com/opensearch-project/OpenSearch/pull/18332)) - Add FIPS build tooling ([#4254](https://github.com/opensearch-project/security/issues/4254)) +- Add BooleanQuery rewrite moving constant-scoring must clauses to filter clases ([#18510](https://github.com/opensearch-project/OpenSearch/issues/18510)) ### Changed - Create generic DocRequest to better categorize ActionRequests ([#18269](https://github.com/opensearch-project/OpenSearch/pull/18269))) From 216c4610ad2140775ab9fc60910250ef5357247a Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Tue, 17 Jun 2025 10:41:14 -0700 Subject: [PATCH 4/8] changelog fix Signed-off-by: Peter Alfonsi --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index d4440308899b6..a30e7cc36fc9b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -53,7 +53,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Support system generated ingest pipelines for bulk update operations ([#18277](https://github.com/opensearch-project/OpenSearch/pull/18277))) - Added FS Health Check Failure metric ([#18435](https://github.com/opensearch-project/OpenSearch/pull/18435)) - Ability to run Code Coverage with Gradle and produce the jacoco reports locally ([#18509](https://github.com/opensearch-project/OpenSearch/issues/18509)) -- Add BooleanQuery rewrite moving constant-scoring must clauses to filter clases ([#18510](https://github.com/opensearch-project/OpenSearch/issues/18510)) +- Add BooleanQuery rewrite moving constant-scoring must clauses to filter clauses ([#18510](https://github.com/opensearch-project/OpenSearch/issues/18510)) ### Changed - Create generic DocRequest to better categorize ActionRequests ([#18269](https://github.com/opensearch-project/OpenSearch/pull/18269))) From 60c073bfdcd7a1cb8e99eed32f80f4f99e43413d Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Wed, 18 Jun 2025 11:47:23 -0700 Subject: [PATCH 5/8] fix WrapperQueryBuilderTests Signed-off-by: Peter Alfonsi --- .../org/opensearch/index/query/WrapperQueryBuilderTests.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/src/test/java/org/opensearch/index/query/WrapperQueryBuilderTests.java b/server/src/test/java/org/opensearch/index/query/WrapperQueryBuilderTests.java index 1786517c1aa1d..286c1487b2cd8 100644 --- a/server/src/test/java/org/opensearch/index/query/WrapperQueryBuilderTests.java +++ b/server/src/test/java/org/opensearch/index/query/WrapperQueryBuilderTests.java @@ -93,7 +93,8 @@ protected WrapperQueryBuilder doCreateTestQueryBuilder() { @Override protected void doAssertLuceneQuery(WrapperQueryBuilder queryBuilder, Query query, QueryShardContext context) throws IOException { - QueryBuilder innerQuery = queryBuilder.rewrite(createShardContext()); + // Must rewrite recursively so innerQuery matches query + QueryBuilder innerQuery = Rewriteable.rewrite(queryBuilder, createShardContext()); Query expected = rewrite(innerQuery.toQuery(context)); assertEquals(rewrite(query), expected); } From 8628d750ba541e51dfee204ddcd59d3168c9bd32 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Wed, 18 Jun 2025 11:56:40 -0700 Subject: [PATCH 6/8] fix PercolatorQuerySearchIT Signed-off-by: Peter Alfonsi --- .../percolator/PercolatorQuerySearchIT.java | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/modules/percolator/src/internalClusterTest/java/org/opensearch/percolator/PercolatorQuerySearchIT.java b/modules/percolator/src/internalClusterTest/java/org/opensearch/percolator/PercolatorQuerySearchIT.java index d03173b6b37fe..b141e4865e04c 100644 --- a/modules/percolator/src/internalClusterTest/java/org/opensearch/percolator/PercolatorQuerySearchIT.java +++ b/modules/percolator/src/internalClusterTest/java/org/opensearch/percolator/PercolatorQuerySearchIT.java @@ -291,43 +291,40 @@ public void testPercolatorRangeQueries() throws Exception { .get(); logger.info("response={}", response); assertHitCount(response, 2); - assertThat(response.getHits().getAt(0).getId(), equalTo("3")); - assertThat(response.getHits().getAt(1).getId(), equalTo("1")); + assertSearchHits(response, "3", "1"); source = BytesReference.bytes(jsonBuilder().startObject().field("field1", 11).endObject()); response = client().prepareSearch().setQuery(new PercolateQueryBuilder("query", source, MediaTypeRegistry.JSON)).get(); assertHitCount(response, 1); - assertThat(response.getHits().getAt(0).getId(), equalTo("1")); + assertSearchHits(response, "1"); // Test double range: source = BytesReference.bytes(jsonBuilder().startObject().field("field2", 12).endObject()); response = client().prepareSearch().setQuery(new PercolateQueryBuilder("query", source, MediaTypeRegistry.JSON)).get(); assertHitCount(response, 2); - assertThat(response.getHits().getAt(0).getId(), equalTo("6")); - assertThat(response.getHits().getAt(1).getId(), equalTo("4")); + assertSearchHits(response, "6", "4"); source = BytesReference.bytes(jsonBuilder().startObject().field("field2", 11).endObject()); response = client().prepareSearch().setQuery(new PercolateQueryBuilder("query", source, MediaTypeRegistry.JSON)).get(); assertHitCount(response, 1); - assertThat(response.getHits().getAt(0).getId(), equalTo("4")); + assertSearchHits(response, "4"); // Test IP range: source = BytesReference.bytes(jsonBuilder().startObject().field("field3", "192.168.1.5").endObject()); response = client().prepareSearch().setQuery(new PercolateQueryBuilder("query", source, MediaTypeRegistry.JSON)).get(); assertHitCount(response, 2); - assertThat(response.getHits().getAt(0).getId(), equalTo("9")); - assertThat(response.getHits().getAt(1).getId(), equalTo("7")); + assertSearchHits(response, "9", "7"); source = BytesReference.bytes(jsonBuilder().startObject().field("field3", "192.168.1.4").endObject()); response = client().prepareSearch().setQuery(new PercolateQueryBuilder("query", source, MediaTypeRegistry.JSON)).get(); assertHitCount(response, 1); - assertThat(response.getHits().getAt(0).getId(), equalTo("7")); + assertSearchHits(response, "7"); // Test date range: source = BytesReference.bytes(jsonBuilder().startObject().field("field4", "2016-05-15").endObject()); response = client().prepareSearch().setQuery(new PercolateQueryBuilder("query", source, MediaTypeRegistry.JSON)).get(); assertHitCount(response, 1); - assertThat(response.getHits().getAt(0).getId(), equalTo("10")); + assertSearchHits(response, "10"); } public void testPercolatorGeoQueries() throws Exception { From f866e676f128636cfcfc882a261ba732a977fba3 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Wed, 18 Jun 2025 12:14:44 -0700 Subject: [PATCH 7/8] rerun gradle check Signed-off-by: Peter Alfonsi From 049c59fad86cc8bde60854fcfb9389d048101f67 Mon Sep 17 00:00:00 2001 From: Peter Alfonsi Date: Wed, 18 Jun 2025 14:57:10 -0700 Subject: [PATCH 8/8] rerun gradle check Signed-off-by: Peter Alfonsi