Skip to content
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- [Star-Tree] Add star-tree search related stats ([#18707](https://github.com/opensearch-project/OpenSearch/pull/18707))
- Add support for plugins to profile information ([#18656](https://github.com/opensearch-project/OpenSearch/pull/18656))
- Add support for Combined Fields query ([#18724](https://github.com/opensearch-project/OpenSearch/pull/18724))
- Terminate eligible non-scoring boolean queries early for performance ([#18842](https://github.com/opensearch-project/OpenSearch/pull/18842))
- Added approximation support for range queries with now in date field ([#18511](https://github.com/opensearch-project/OpenSearch/pull/18511))

### Changed
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,21 @@

import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;

import org.opensearch.action.search.SearchResponse;
import org.opensearch.cluster.metadata.IndexMetadata;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.unit.TimeValue;
import org.opensearch.search.SearchHit;
import org.opensearch.search.sort.SortOrder;
import org.opensearch.search.suggest.SuggestBuilder;
import org.opensearch.test.OpenSearchIntegTestCase;
import org.opensearch.test.ParameterizedStaticSettingsOpenSearchIntegTestCase;
import org.opensearch.transport.client.Client;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

Expand All @@ -26,8 +35,14 @@
import static org.opensearch.index.query.QueryBuilders.termQuery;
import static org.opensearch.index.query.QueryBuilders.termsQuery;
import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING;
import static org.opensearch.search.aggregations.AggregationBuilders.cardinality;
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertAcked;
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount;
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertOrderedSearchHits;
import static org.apache.lucene.search.TotalHits.Relation.EQUAL_TO;
import static org.apache.lucene.search.TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;

@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.TEST, numDataNodes = 1)
public class BooleanQueryIT extends ParameterizedStaticSettingsOpenSearchIntegTestCase {

public BooleanQueryIT(Settings staticSettings) {
Expand Down Expand Up @@ -69,10 +84,7 @@ public void testMustNotRangeRewrite() throws Exception {
.setSource(intField, i, termField1, termValue1, termField2, termValue2)
.get();
}
ensureGreen();
waitForRelocation();
forceMerge();
refresh();
afterIndexing();

int lt = 80;
int gte = 20;
Expand Down Expand Up @@ -118,10 +130,7 @@ public void testMustNotDateRangeRewrite() throws Exception {
for (int day = minDay; day <= maxDay; day++) {
client().prepareIndex("test").setSource(dateField, getDate(day, 0)).get();
}
ensureGreen();
waitForRelocation();
forceMerge();
refresh();
afterIndexing();

int minExcludedDay = 15;
int maxExcludedDay = 25;
Expand All @@ -147,10 +156,7 @@ public void testMustNotDateRangeWithFormatAndTimeZoneRewrite() throws Exception
for (int hour = 0; hour < 24; hour++) {
client().prepareIndex("test").setSource(dateField, getDate(1, hour)).get();
}
ensureGreen();
waitForRelocation();
forceMerge();
refresh();
afterIndexing();

int zoneOffset = 3;
assertHitCount(
Expand Down Expand Up @@ -185,10 +191,7 @@ public void testMustNotRangeRewriteWithMissingValues() throws Exception {
client().prepareIndex("test").setId(Integer.toString(i)).setSource(termField, termValue).get();
}
}
ensureGreen();
waitForRelocation();
forceMerge();
refresh();
afterIndexing();

// Search excluding range 30 to 50

Expand Down Expand Up @@ -217,10 +220,7 @@ public void testMustNotRangeRewriteWithMoreThanOneValue() throws Exception {
client().prepareIndex("test").setId(Integer.toString(i)).setSource(intField, i).get();
}
}
ensureGreen();
waitForRelocation();
forceMerge();
refresh();
afterIndexing();

// Range queries will match if ANY of the doc's values are within the range.
// So if we exclude the range 0 to 20, we shouldn't see any of those documents returned,
Expand All @@ -234,6 +234,145 @@ public void testMustNotRangeRewriteWithMoreThanOneValue() throws Exception {
assertHitCount(client().prepareSearch().setQuery(matchAllQuery()).get(), numDocs);
}

public void testFilterEarlyTermination() throws Exception {
int numDocs = 6_000;
String intField = "int_field";
String textField1 = "text_field_1";
List<String> textField1Values = List.of("even", "odd");
String textField2 = "text_field_2";
List<String> textField2Values = List.of("a", "b", "c");
Client client = client();
String indexName = "test";
// Set trackTotalHitsUpTo to 500 rather than default 10k, so we have to index fewer docs
int trackTotalHitsUpTo = 500;

// Enforce 1 shard per node, so that no shard has < trackTotalHitsUpTo matching docs and cannot actually terminate early
assertAcked(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where are we validating multi shard behaviour?

client.admin()
.indices()
.prepareCreate(indexName)
.setMapping(intField, "type=integer", textField1, "type=text", textField2, "type=text")
.setSettings(
Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1).put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0)
)
.get()
);

for (int i = 0; i < numDocs; i++) {
String textValue1 = textField1Values.get(i % 2);
String textValue2 = textField2Values.get(i % 3);
client.prepareIndex(indexName)
.setId(Integer.toString(i))
.setSource(intField, i, textField1, textValue1, textField2, textValue2)
.get();
}
afterIndexing();
int lte = 3000;

// A query with only filter or must_not clauses should be able to terminate early
SearchResponse response = client().prepareSearch()
.setTrackTotalHitsUpTo(trackTotalHitsUpTo)
.setQuery(boolQuery().mustNot(termQuery(textField1, "even")).filter(rangeQuery(intField).lte(lte)))
.get();
assertTrue(response.isTerminatedEarly());
// Note: queries that have finished early with terminate_after will return "eq" for hit relation
assertHitCount(response, trackTotalHitsUpTo, EQUAL_TO);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we validate the correctness of the returned documents as well?


// Force the same query to not terminate early by setting terminateAfter to some non-default high value, so we can check the hits
// are identical
SearchResponse originalQueryResponse = client().prepareSearch()
.setTrackTotalHitsUpTo(trackTotalHitsUpTo)
.setQuery(boolQuery().mustNot(termQuery(textField1, "even")).filter(rangeQuery(intField).lte(lte)))
.setTerminateAfter(1_000_000)
.get();
assertFalse(originalQueryResponse.isTerminatedEarly()); // Returns false not null when TA was set but not reached
assertHitCount(originalQueryResponse, trackTotalHitsUpTo, GREATER_THAN_OR_EQUAL_TO);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we also test for mixed clause types that look like filters?

List<String> terminatedEarlyIds = new ArrayList<>();
for (Iterator<SearchHit> it = response.getHits().iterator(); it.hasNext();) {
SearchHit terminatedEarlyHit = it.next();
terminatedEarlyIds.add(terminatedEarlyHit.getId());
}
assertOrderedSearchHits(originalQueryResponse, terminatedEarlyIds.toArray(new String[0]));

// Queries with other clauses should not terminate early
response = client().prepareSearch()
.setTrackTotalHitsUpTo(trackTotalHitsUpTo)
.setQuery(boolQuery().mustNot(termQuery(textField1, "even")).must(termQuery(textField2, "a")))
.get();
assertNull(response.isTerminatedEarly());
assertHitCount(response, trackTotalHitsUpTo, GREATER_THAN_OR_EQUAL_TO);

response = client().prepareSearch()
.setTrackTotalHitsUpTo(trackTotalHitsUpTo)
.setQuery(boolQuery().must(rangeQuery(intField).lte(lte)).should(termQuery(textField1, "odd")))
.get();
assertNull(response.isTerminatedEarly());
assertHitCount(response, trackTotalHitsUpTo, GREATER_THAN_OR_EQUAL_TO);

// Queries with aggregations shouldn't terminate early
response = client().prepareSearch()
.setTrackTotalHitsUpTo(trackTotalHitsUpTo)
.setQuery(boolQuery().mustNot(termQuery(textField1, "even")).filter(rangeQuery(intField).lte(lte)))
.addAggregation(cardinality("cardinality").field(intField))
.get();
assertNull(response.isTerminatedEarly());
assertHitCount(response, trackTotalHitsUpTo, GREATER_THAN_OR_EQUAL_TO);

// Queries with sorting shouldn't terminate early
response = client().prepareSearch()
.setTrackTotalHitsUpTo(trackTotalHitsUpTo)
.setQuery(boolQuery().mustNot(termQuery(textField1, "even")).filter(rangeQuery(intField).lte(lte)))
.addSort(intField, SortOrder.DESC)
.get();
assertNull(response.isTerminatedEarly());
assertHitCount(response, trackTotalHitsUpTo, GREATER_THAN_OR_EQUAL_TO);

// Queries with pagination shouldn't terminate early
response = client().prepareSearch()
.setTrackTotalHitsUpTo(trackTotalHitsUpTo)
.setQuery(boolQuery().mustNot(termQuery(textField1, "even")).filter(rangeQuery(intField).lte(lte)))
.setFrom(10)
.get();
assertNull(response.isTerminatedEarly());
assertHitCount(response, trackTotalHitsUpTo, GREATER_THAN_OR_EQUAL_TO);

response = client().prepareSearch()
.setTrackTotalHitsUpTo(trackTotalHitsUpTo)
.setQuery(boolQuery().mustNot(termQuery(textField1, "even")).filter(rangeQuery(intField).lte(lte)))
.searchAfter(new Object[] { 0 })
.addSort(intField, SortOrder.DESC)
.get();
assertNull(response.isTerminatedEarly());
assertHitCount(response, trackTotalHitsUpTo, GREATER_THAN_OR_EQUAL_TO);

// Scroll queries shouldn't terminate early
response = client().prepareSearch()
.setQuery(boolQuery().mustNot(termQuery(textField1, "even")).filter(rangeQuery(intField).lte(lte)))
.setScroll(TimeValue.timeValueSeconds(30))
.get();
assertNull(response.isTerminatedEarly());
assertHitCount(response, lte / 2);

response = client().prepareSearchScroll(response.getScrollId()).setScroll(TimeValue.timeValueSeconds(30)).get();
assertNull(response.isTerminatedEarly());
assertHitCount(response, lte / 2);
clearScroll(response.getScrollId());

// Suggest queries shouldn't terminate early
response = client().prepareSearch()
.suggest(new SuggestBuilder())
.setTrackTotalHitsUpTo(trackTotalHitsUpTo)
.setQuery(boolQuery().mustNot(termQuery(textField1, "even")).filter(rangeQuery(intField).lte(lte)))
.get();
assertNull(response.isTerminatedEarly());
assertHitCount(response, trackTotalHitsUpTo, GREATER_THAN_OR_EQUAL_TO);

// Search responses missing a query shouldn't terminate early or throw an error
response = client().prepareSearch().setTrackTotalHitsUpTo(trackTotalHitsUpTo).get();
assertNull(response.isTerminatedEarly());
assertHitCount(response, trackTotalHitsUpTo, GREATER_THAN_OR_EQUAL_TO);
}

public void testMustNotNumericMatchOrTermQueryRewrite() throws Exception {
Map<Integer, Integer> statusToDocCountMap = Map.of(200, 1000, 404, 30, 500, 1, 400, 1293);
String statusField = "status";
Expand Down Expand Up @@ -313,4 +452,11 @@ private String padZeros(int value, int length) {
private String getDate(int day, int hour) {
return "2016-01-" + padZeros(day, 2) + "T" + padZeros(hour, 2) + ":00:00.000";
}

private void afterIndexing() {
ensureGreen();
waitForRelocation();
forceMerge();
refresh();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1207,4 +1207,34 @@ public boolean evaluateKeywordIndexOrDocValuesEnabled() {
}
return false;
}

/**
* Checks if early termination can be enabled for this search, and enables it if so.
* @return whether early termination was applied
*/
boolean tryEnablingEarlyTermination() {
// This method should only be called after queries are rewritten and parsed, and terminateAfter and size have already been set.
assert size != -1 : "Cannot call `tryEnablingEarlyTermination` until after `size` has been set";
assert from != -1 : "Cannot call `tryEnablingEarlyTermination` until after `from` has been set";
if (query == null) return false;

if (terminateAfter != DEFAULT_TERMINATE_AFTER) return false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we check for deeper queries, such as

  1. Nested BooleanQueries inside FILTER clauses
  2. ConstantScoreQuery wrapping (common in filter contexts)
  3. MinimumShouldMatch requirements not exposed in BooleanQuery API

if (!(query instanceof BooleanQuery bq)) return false;

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No check for search profiling. Early termination will produce misleading profiler output showing only partial query execution.

if (aggregations() != null) return false;
if (from > 0 || searchAfter != null) return false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why cant early termination not work with search_after?

if (sort != null) return false;
if (sliceBuilder != null || scrollContext() != null) return false;
if (suggest != null) return false;

// We can only set terminateAfter to trackTotalHitsUpTo if we only have filter and must_not clauses
if (bq.getClauses(Occur.MUST).isEmpty() && bq.getClauses(Occur.SHOULD).isEmpty()) {
terminateAfter = Math.max(size, trackTotalHitsUpTo);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we not be checking for null query here? Returning false for null query is wrong. A null query is equivalent to MatchAllQuery in OpenSearch, which IS a constant-scoring query suitable for early termination.

return true;
// TODO: Disabling concurrent segment search can speed this up even further for default trackTotalHitsUpTo of 10k,
// but at this point the CSS logic already assumes it can't be changed. We can revisit this in future.
}
// TODO: In future we can do the same for any constant-score query
return false;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we just disable CSS when early termination is enabled?

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -1142,6 +1142,8 @@ final SearchContext createContext(

// pre process
queryPhase.preProcess(context);

context.tryEnablingEarlyTermination();
} catch (Exception e) {
context.close();
throw e;
Expand Down
Loading
Loading