From 8d5752e79b6580d6ac42bb15276371394c775679 Mon Sep 17 00:00:00 2001 From: Austin Lee Date: Wed, 28 Jun 2023 17:58:11 -0700 Subject: [PATCH] Implement buildEmptyAggregations for MultiTermsAggregator (#7089) (#7318) * Implement buildEmptyAggregations for MultiTermsAggregator (#7089) Signed-off-by: Austin Lee * Address Spotless check issue Signed-off-by: Austin Lee * Add a unit test for MultiTermsAggregator.buildEmptyAggregations (#7089) Signed-off-by: Austin Lee * Update changelog Update version check and reason Signed-off-by: Austin Lee --------- Signed-off-by: Austin Lee --- CHANGELOG.md | 1 + .../search.aggregation/370_multi_terms.yml | 48 +++++++++++++ .../bucket/terms/MultiTermsAggregator.java | 15 +++- .../terms/MultiTermsAggregatorTests.java | 69 +++++++++++++++++++ 4 files changed, 132 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 32253849c446c..faed56bfea5ce 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -152,6 +152,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Enforce 512 byte document ID limit in bulk updates ([#8039](https://github.com/opensearch-project/OpenSearch/pull/8039)) - With only GlobalAggregation in request causes unnecessary wrapping with MultiCollector ([#8125](https://github.com/opensearch-project/OpenSearch/pull/8125)) - Fix mapping char_filter when mapping a hashtag ([#7591](https://github.com/opensearch-project/OpenSearch/pull/7591)) +- Fix NPE in multiterms aggregations involving empty buckets ([#7318](https://github.com/opensearch-project/OpenSearch/pull/7318)) ### Security diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_multi_terms.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_multi_terms.yml index 0f897866fcb9d..eeab8e78bf830 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_multi_terms.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/370_multi_terms.yml @@ -712,3 +712,51 @@ setup: - match: { aggregations.m_terms.buckets.0.key: ["a", 1] } - match: { aggregations.m_terms.buckets.0.key_as_string: "a|1" } - match: { aggregations.m_terms.buckets.0.doc_count: 4 } + +--- +"aggregate over multi-terms test": + - skip: + version: "- 2.9.99" + reason: "multi_terms aggregation was introduced in 2.1.0, NPE bug checked by this test case will manifest in any version < 3.0" + + - do: + bulk: + index: test_1 + refresh: true + body: + - '{"index": {}}' + - '{"str": "a", "ip": "127.0.0.1", "date": "2022-03-23"}' + - '{"index": {}}' + - '{"str": "a", "ip": "127.0.0.1", "date": "2022-03-25"}' + - '{"index": {}}' + - '{"str": "b", "ip": "127.0.0.1", "date": "2022-03-23"}' + - '{"index": {}}' + - '{"str": "b", "ip": "127.0.0.1", "date": "2022-03-25"}' + + - do: + search: + index: test_1 + size: 0 + body: + aggs: + histo: + date_histogram: + field: date + calendar_interval: day + aggs: + m_terms: + multi_terms: + terms: + - field: str + - field: ip + + - match: { hits.total.value: 4 } + - length: { aggregations.histo.buckets: 3 } + - match: { aggregations.histo.buckets.0.key_as_string: "2022-03-23T00:00:00.000Z" } + - match: { aggregations.histo.buckets.0.m_terms.buckets.0.key: ["a", "127.0.0.1"] } + - match: { aggregations.histo.buckets.0.m_terms.buckets.1.key: ["b", "127.0.0.1"] } + - match: { aggregations.histo.buckets.1.key_as_string: "2022-03-24T00:00:00.000Z" } + - length: { aggregations.histo.buckets.1.m_terms.buckets: 0 } + - match: { aggregations.histo.buckets.2.key_as_string: "2022-03-25T00:00:00.000Z" } + - match: { aggregations.histo.buckets.2.m_terms.buckets.0.key: [ "a", "127.0.0.1" ] } + - match: { aggregations.histo.buckets.2.m_terms.buckets.1.key: [ "b", "127.0.0.1" ] } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregator.java index c810ba8f38624..fccb9c3af5986 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregator.java @@ -196,7 +196,20 @@ InternalMultiTerms buildResult(long owningBucketOrd, long otherDocCount, Interna @Override public InternalAggregation buildEmptyAggregation() { - return null; + return new InternalMultiTerms( + name, + order, + order, + bucketCountThresholds.getRequiredSize(), + bucketCountThresholds.getMinDocCount(), + metadata(), + bucketCountThresholds.getShardSize(), + showTermDocCountError, + 0, + 0, + formats, + Collections.emptyList() + ); } @Override diff --git a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregatorTests.java b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregatorTests.java index 75ad9e12e0776..a2792114e9529 100644 --- a/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregatorTests.java +++ b/server/src/test/java/org/opensearch/search/aggregations/bucket/terms/MultiTermsAggregatorTests.java @@ -28,6 +28,10 @@ import org.opensearch.common.network.InetAddresses; import org.opensearch.common.settings.Settings; import org.opensearch.common.time.DateFormatter; +import org.opensearch.common.util.BigArrays; +import org.opensearch.common.util.MockPageCacheRecycler; +import org.opensearch.index.IndexService; +import org.opensearch.index.cache.IndexCache; import org.opensearch.index.mapper.BooleanFieldMapper; import org.opensearch.index.mapper.DateFieldMapper; import org.opensearch.index.mapper.GeoPointFieldMapper; @@ -35,22 +39,32 @@ import org.opensearch.index.mapper.KeywordFieldMapper; import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.indices.breaker.NoneCircuitBreakerService; import org.opensearch.script.MockScriptEngine; import org.opensearch.script.Script; import org.opensearch.script.ScriptEngine; import org.opensearch.script.ScriptModule; import org.opensearch.script.ScriptService; import org.opensearch.script.ScriptType; +import org.opensearch.search.DocValueFormat; import org.opensearch.search.aggregations.AggregationBuilder; +import org.opensearch.search.aggregations.Aggregator; +import org.opensearch.search.aggregations.AggregatorFactories; import org.opensearch.search.aggregations.AggregatorTestCase; import org.opensearch.search.aggregations.BucketOrder; +import org.opensearch.search.aggregations.CardinalityUpperBound; +import org.opensearch.search.aggregations.InternalAggregation; import org.opensearch.search.aggregations.metrics.InternalMax; import org.opensearch.search.aggregations.metrics.MaxAggregationBuilder; import org.opensearch.search.aggregations.support.CoreValuesSourceType; import org.opensearch.search.aggregations.support.MultiTermsValuesSourceConfig; import org.opensearch.search.aggregations.support.ValueType; import org.opensearch.search.aggregations.support.ValuesSourceType; +import org.opensearch.search.internal.SearchContext; import org.opensearch.search.lookup.LeafDocLookup; +import org.opensearch.test.TestSearchContext; import java.io.IOException; import java.util.ArrayList; @@ -58,6 +72,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.UUID; import java.util.function.Consumer; import java.util.function.Function; @@ -68,8 +83,12 @@ import static java.util.stream.Collectors.toList; import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.contains; +import static org.hamcrest.Matchers.empty; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.hasSize; +import static org.hamcrest.Matchers.instanceOf; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; public class MultiTermsAggregatorTests extends AggregatorTestCase { private static final String FIELD_NAME = "field"; @@ -852,6 +871,56 @@ public void testIncludeExclude() throws IOException { ); } + public void testEmptyAggregations() throws IOException { + QueryShardContext queryShardContext = mock(QueryShardContext.class); + IndexShard indexShard = mock(IndexShard.class); + BigArrays bigArrays = new BigArrays(new MockPageCacheRecycler(Settings.EMPTY), new NoneCircuitBreakerService(), ""); + IndexService indexService = mock(IndexService.class); + when(indexService.getShardOrNull(0)).thenReturn(indexShard); + IndexCache cache = mock(IndexCache.class); + when(cache.bitsetFilterCache()).thenReturn(null); + when(indexService.cache()).thenReturn(cache); + SearchContext context = new TestSearchContext(bigArrays, indexService); + when(indexService.newQueryShardContext(0, null, () -> 0L, null)).thenReturn(queryShardContext); + AggregatorFactories factories = AggregatorFactories.EMPTY; + boolean showTermDocCountError = true; + MultiTermsAggregator.InternalValuesSource internalValuesSources = mock(MultiTermsAggregator.InternalValuesSource.class); + DocValueFormat format = mock(DocValueFormat.class); + BucketOrder order = mock(BucketOrder.class); + Aggregator.SubAggCollectionMode collectMode = Aggregator.SubAggCollectionMode.BREADTH_FIRST; + TermsAggregator.BucketCountThresholds bucketCountThresholds = mock(TermsAggregator.BucketCountThresholds.class); + Aggregator parent = mock(Aggregator.class); + CardinalityUpperBound cardinality = CardinalityUpperBound.ONE; + Map metadata = new HashMap<>(); + String k1 = UUID.randomUUID().toString(); + String v1 = UUID.randomUUID().toString(); + metadata.put(k1, v1); + + MultiTermsAggregator mAgg = new MultiTermsAggregator( + AGG_NAME, + factories, + showTermDocCountError, + List.of(internalValuesSources), + List.of(format), + order, + collectMode, + bucketCountThresholds, + context, + parent, + cardinality, + metadata + ); + InternalAggregation emptyAgg = mAgg.buildEmptyAggregation(); + + MatcherAssert.assertThat(emptyAgg.getName(), equalTo(AGG_NAME)); + MatcherAssert.assertThat(emptyAgg, instanceOf(InternalMultiTerms.class)); + + InternalMultiTerms mt = (InternalMultiTerms) emptyAgg; + MatcherAssert.assertThat(mt.getMetadata().keySet(), contains(k1)); + MatcherAssert.assertThat(mt.getMetadata().get(k1), equalTo(v1)); + MatcherAssert.assertThat(mt.getBuckets(), empty()); + } + private void testAggregation( Query query, List terms,