From b21d00fedd1ad4d071f5ef8c11817a4ccd3015bb Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Mon, 19 Aug 2024 17:49:26 -0700 Subject: [PATCH 1/2] Backport/backport 15194 to 2.x Signed-off-by: Finn Carroll --- CHANGELOG.md | 1 + .../search.aggregation/360_date_histogram.yml | 87 ++++++++++++ .../test/search.aggregation/40_range.yml | 75 ++++++++++ .../bucket/range/RangeAggregator.java | 6 +- .../bucket/range/RangeAggregatorTests.java | 128 +++++++++++++++++- 5 files changed, 293 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 506e7b1f35f0c..531a914410df8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -52,6 +52,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix array_index_out_of_bounds_exception when indexing documents with field name containing only dot ([#15126](https://github.com/opensearch-project/OpenSearch/pull/15126)) - Fixed array field name omission in flat_object function for nested JSON ([#13620](https://github.com/opensearch-project/OpenSearch/pull/13620)) - Fix incorrect parameter names in MinHash token filter configuration handling ([#15233](https://github.com/opensearch-project/OpenSearch/pull/15233)) +- Fix range aggregation optimization ignoring top level queries ([#15287](https://github.com/opensearch-project/OpenSearch/pull/15287)) ### Security diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_date_histogram.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_date_histogram.yml index 0ea9d3de00926..00499f3e9def8 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_date_histogram.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_date_histogram.yml @@ -61,3 +61,90 @@ setup: - match: { aggregations.histo.buckets.8.doc_count: 1 } - match: { aggregations.histo.buckets.12.key_as_string: "2016-06-01T00:00:00.000Z" } - match: { aggregations.histo.buckets.12.doc_count: 1 } + +--- +"Date histogram aggregation w/ filter query test": + - do: + bulk: + refresh: true + index: dhisto-agg-w-query + body: + - '{"index": {}}' + - '{"routing": "route1", "date": "2024-08-12", "dow": "monday"}' + - '{"index": {}}' + - '{"routing": "route1", "date": "2024-08-14", "dow": "wednesday"}' + - '{"index": {}}' + - '{"routing": "route1", "date": "2024-08-19", "dow": "monday"}' + - '{"index": {}}' + - '{"routing": "route2", "date": "2024-08-13", "dow": "tuesday"}' + - '{"index": {}}' + - '{"routing": "route2", "date": "2024-08-15", "dow": "thursday"}' + + - do: + search: + index: dhisto-agg-w-query + body: + query: + bool: + must: + match_all: {} + filter: + - terms: + routing: + - "route1" + aggregations: + weekHisto: + date_histogram: + field: date + calendar_interval: week + _source: false + + - match: { hits.total.value: 3 } + - match: { aggregations.weekHisto.buckets.0.doc_count: 2 } + - match: { aggregations.weekHisto.buckets.1.doc_count: 1 } + +--- +"Date histogram aggregation w/ shared field range test": + - do: + bulk: + refresh: true + index: dhisto-agg-w-query + body: + - '{"index": {}}' + - '{"date": "2024-10-31"}' + - '{"index": {}}' + - '{"date": "2024-11-11"}' + - '{"index": {}}' + - '{"date": "2024-11-28"}' + - '{"index": {}}' + - '{"date": "2024-12-25"}' + - '{"index": {}}' + - '{"date": "2025-01-01"}' + - '{"index": {}}' + - '{"date": "2025-02-14"}' + + - do: + search: + index: dhisto-agg-w-query + body: + profile: true + query: + range: + date: + gte: "2024-01-01" + lt: "2025-01-01" + aggregations: + monthHisto: + date_histogram: + field: date + calendar_interval: month + _source: false + + - match: { hits.total.value: 4 } + - match: { aggregations.monthHisto.buckets.0.doc_count: 1 } + - match: { aggregations.monthHisto.buckets.1.doc_count: 2 } + - match: { aggregations.monthHisto.buckets.2.doc_count: 1 } + - match: { profile.shards.0.aggregations.0.debug.optimized_segments: 1 } + - match: { profile.shards.0.aggregations.0.debug.unoptimized_segments: 0 } + - match: { profile.shards.0.aggregations.0.debug.leaf_visited: 0 } + - match: { profile.shards.0.aggregations.0.debug.inner_visited: 0 } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml index 80aad96ce1f6b..7a93a61aa8d1f 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml @@ -673,3 +673,78 @@ setup: - match: { aggregations.my_range.buckets.3.from: 1.5 } - is_false: aggregations.my_range.buckets.3.to - match: { aggregations.my_range.buckets.3.doc_count: 2 } + +--- +"Filter query w/ aggregation test": + - do: + bulk: + refresh: true + index: range-agg-w-query + body: + - '{"index": {}}' + - '{"routing": "route1", "v": -10, "date": "2024-10-29"}' + - '{"index": {}}' + - '{"routing": "route1", "v": -5, "date": "2024-10-30"}' + - '{"index": {}}' + - '{"routing": "route1", "v": 10, "date": "2024-10-31"}' + - '{"index": {}}' + - '{"routing": "route2", "v": 15, "date": "2024-11-01"}' + - '{"index": {}}' + - '{"routing": "route2", "v": 20, "date": "2024-11-02"}' + + - do: + search: + index: range-agg-w-query + body: + query: + bool: + must: + match_all: {} + filter: + - terms: + routing: + - "route1" + aggregations: + NegPosAgg: + range: + field: v + keyed: true + ranges: + - to: 0 + key: "0" + - from: 0 + key: "1" + _source: false + + - match: { hits.total.value: 3 } + - match: { aggregations.NegPosAgg.buckets.0.doc_count: 2 } + - match: { aggregations.NegPosAgg.buckets.1.doc_count: 1 } + + - do: + search: + index: range-agg-w-query + body: + query: + bool: + must: + match_all: {} + filter: + - terms: + routing: + - "route1" + aggregations: + HalloweenAgg: + date_range: + field: date + format: "yyyy-MM-dd" + keyed: true + ranges: + - to: "2024-11-01" + key: "to-october" + - from: "2024-11-01" + key: "from-september" + _source: false + + - match: { hits.total.value: 3 } + - match: { aggregations.HalloweenAgg.buckets.to-october.doc_count: 3 } + - match: { aggregations.HalloweenAgg.buckets.from-september.doc_count: 0 } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java index 17461f228e993..39dc1c36e8895 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java @@ -70,6 +70,7 @@ import java.util.function.Function; import static org.opensearch.core.xcontent.ConstructingObjectParser.optionalConstructorArg; +import static org.opensearch.search.aggregations.bucket.filterrewrite.DateHistogramAggregatorBridge.segmentMatchAll; /** * Aggregate all docs that match given ranges. @@ -310,8 +311,9 @@ public ScoreMode scoreMode() { @Override public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { - boolean optimized = filterRewriteOptimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, false); - if (optimized) throw new CollectionTerminatedException(); + if (segmentMatchAll(context, ctx) && filterRewriteOptimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, false)) { + throw new CollectionTerminatedException(); + } final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx); return new LeafBucketCollectorBase(sub, values) { diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/range/RangeAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/range/RangeAggregatorTests.java index 7e796b684e869..f43fcd037de4f 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/bucket/range/RangeAggregatorTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/range/RangeAggregatorTests.java @@ -32,6 +32,9 @@ package org.opensearch.search.aggregations.bucket.range; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.IntPoint; +import org.apache.lucene.document.KeywordField; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.document.SortedNumericDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; @@ -39,9 +42,13 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.tests.index.RandomIndexWriter; import org.apache.lucene.tests.util.TestUtil; @@ -54,6 +61,7 @@ import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.NumberFieldMapper.NumberFieldType; import org.opensearch.index.mapper.NumberFieldMapper.NumberType; +import org.opensearch.index.mapper.ParseContext.Document; import org.opensearch.search.aggregations.AggregationBuilder; import org.opensearch.search.aggregations.AggregatorTestCase; import org.opensearch.search.aggregations.CardinalityUpperBound; @@ -65,6 +73,7 @@ import java.io.IOException; import java.time.ZoneOffset; import java.time.ZonedDateTime; +import java.util.ArrayList; import java.util.Collections; import java.util.LinkedHashMap; import java.util.LinkedList; @@ -457,6 +466,29 @@ public void testFloatType() throws IOException { ); } + public void testTopLevelRangeQuery() throws IOException { + NumberFieldType fieldType = new NumberFieldType(NumberType.INTEGER.typeName(), NumberType.INTEGER); + String fieldName = fieldType.numberType().typeName(); + Query query = IntPoint.newRangeQuery(fieldName, 5, 20); + + testRewriteOptimizationCase( + fieldType, + new double[][] { { 0.0, 10.0 }, { 10.0, 20.0 } }, + query, + new Number[] { 0.1, 4.0, 9, 11, 12, 19 }, + range -> { + List ranges = range.getBuckets(); + assertEquals(2, ranges.size()); + assertEquals("0.0-10.0", ranges.get(0).getKeyAsString()); + assertEquals(1, ranges.get(0).getDocCount()); + assertEquals("10.0-20.0", ranges.get(1).getKeyAsString()); + assertEquals(3, ranges.get(1).getDocCount()); + assertTrue(AggregationInspectionHelper.hasValue(range)); + }, + false + ); + } + public void testUnsignedLongType() throws IOException { testRewriteOptimizationCase( new NumberFieldType(NumberType.UNSIGNED_LONG.typeName(), NumberType.UNSIGNED_LONG), @@ -493,6 +525,76 @@ public void testUnsignedLongType() throws IOException { ); } + /** + * Expect no optimization as top level query excludes documents. + */ + public void testTopLevelFilterTermQuery() throws IOException { + final String KEYWORD_FIELD_NAME = "route"; + final NumberFieldType NUM_FIELD_TYPE = new NumberFieldType(NumberType.DOUBLE.typeName(), NumberType.DOUBLE); + final NumberType numType = NUM_FIELD_TYPE.numberType(); + + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.setMinimumNumberShouldMatch(0); + builder.add(new TermQuery(new Term(KEYWORD_FIELD_NAME, "route1")), BooleanClause.Occur.MUST); + Query boolQuery = builder.build(); + + List docList = new ArrayList<>(); + for (int i = 0; i < 3; i++) + docList.add(new Document()); + + docList.get(0).addAll(numType.createFields(numType.typeName(), 3.0, true, true, false)); + docList.get(1).addAll(numType.createFields(numType.typeName(), 11.0, true, true, false)); + docList.get(2).addAll(numType.createFields(numType.typeName(), 15.0, true, true, false)); + docList.get(0).add(new KeywordField(KEYWORD_FIELD_NAME, "route1", Field.Store.NO)); + docList.get(1).add(new KeywordField(KEYWORD_FIELD_NAME, "route1", Field.Store.NO)); + docList.get(2).add(new KeywordField(KEYWORD_FIELD_NAME, "route2", Field.Store.NO)); + + testRewriteOptimizationCase(NUM_FIELD_TYPE, new double[][] { { 0.0, 10.0 }, { 10.0, 20.0 } }, boolQuery, docList, range -> { + List ranges = range.getBuckets(); + assertEquals(2, ranges.size()); + assertEquals("0.0-10.0", ranges.get(0).getKeyAsString()); + assertEquals(1, ranges.get(0).getDocCount()); + assertEquals("10.0-20.0", ranges.get(1).getKeyAsString()); + assertEquals(1, ranges.get(1).getDocCount()); + assertTrue(AggregationInspectionHelper.hasValue(range)); + }, false); + } + + /** + * Expect optimization as top level query is effective match all. + */ + public void testTopLevelEffectiveMatchAll() throws IOException { + final String KEYWORD_FIELD_NAME = "route"; + final NumberFieldType NUM_FIELD_TYPE = new NumberFieldType(NumberType.DOUBLE.typeName(), NumberType.DOUBLE); + final NumberType numType = NUM_FIELD_TYPE.numberType(); + + BooleanQuery.Builder builder = new BooleanQuery.Builder(); + builder.setMinimumNumberShouldMatch(0); + builder.add(new TermQuery(new Term(KEYWORD_FIELD_NAME, "route1")), BooleanClause.Occur.MUST); + Query boolQuery = builder.build(); + + List docList = new ArrayList<>(); + for (int i = 0; i < 3; i++) + docList.add(new Document()); + + docList.get(0).addAll(numType.createFields(numType.typeName(), 3.0, true, true, false)); + docList.get(1).addAll(numType.createFields(numType.typeName(), 11.0, true, true, false)); + docList.get(2).addAll(numType.createFields(numType.typeName(), 15.0, true, true, false)); + docList.get(0).add(new KeywordField(KEYWORD_FIELD_NAME, "route1", Field.Store.NO)); + docList.get(1).add(new KeywordField(KEYWORD_FIELD_NAME, "route1", Field.Store.NO)); + docList.get(2).add(new KeywordField(KEYWORD_FIELD_NAME, "route1", Field.Store.NO)); + + testRewriteOptimizationCase(NUM_FIELD_TYPE, new double[][] { { 0.0, 10.0 }, { 10.0, 20.0 } }, boolQuery, docList, range -> { + List ranges = range.getBuckets(); + assertEquals(2, ranges.size()); + assertEquals("0.0-10.0", ranges.get(0).getKeyAsString()); + assertEquals(1, ranges.get(0).getDocCount()); + assertEquals("10.0-20.0", ranges.get(1).getKeyAsString()); + assertEquals(2, ranges.get(1).getDocCount()); + assertTrue(AggregationInspectionHelper.hasValue(range)); + }, true); + } + private void testCase( Query query, CheckedConsumer buildIndex, @@ -556,11 +658,33 @@ private void testRewriteOptimizationCase( ) throws IOException { NumberType numberType = fieldType.numberType(); String fieldName = numberType.typeName(); + List docs = new ArrayList<>(); + + for (Number dataPoint : dataPoints) { + Document doc = new Document(); + List fieldList = numberType.createFields(fieldName, dataPoint, true, true, false); + for (Field fld : fieldList) + doc.add(fld); + docs.add(doc); + } + + testRewriteOptimizationCase(fieldType, ranges, query, docs, verify, optimized); + } + private void testRewriteOptimizationCase( + NumberFieldType fieldType, + double[][] ranges, + Query query, + List documents, + Consumer> verify, + boolean optimized + ) throws IOException { + NumberType numberType = fieldType.numberType(); + String fieldName = numberType.typeName(); try (Directory directory = newDirectory()) { try (IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig().setCodec(TestUtil.getDefaultCodec()))) { - for (Number dataPoint : dataPoints) { - indexWriter.addDocument(numberType.createFields(fieldName, dataPoint, true, true, false)); + for (Document doc : documents) { + indexWriter.addDocument(doc); } } From 23201dbb1ab33c638e504b284d42616bf987eff1 Mon Sep 17 00:00:00 2001 From: Finn Carroll Date: Tue, 20 Aug 2024 05:17:35 +0000 Subject: [PATCH 2/2] Rest test skip 2.16. Bugfix not backported. Signed-off-by: Finn Carroll --- .../test/search.aggregation/360_date_histogram.yml | 4 ++++ .../rest-api-spec/test/search.aggregation/40_range.yml | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_date_histogram.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_date_histogram.yml index 00499f3e9def8..52d68f4e32812 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_date_histogram.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/360_date_histogram.yml @@ -105,6 +105,10 @@ setup: --- "Date histogram aggregation w/ shared field range test": + - skip: + version: " - 2.16.99" + reason: "Bugfix not backported to 2.16" + - do: bulk: refresh: true diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml index 7a93a61aa8d1f..daba12e5fb753 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/40_range.yml @@ -676,6 +676,10 @@ setup: --- "Filter query w/ aggregation test": + - skip: + version: " - 2.16.99" + reason: "Bugfix not backported to 2.16" + - do: bulk: refresh: true