Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement buildEmptyAggregations for MultiTermsAggregator (#7089) #7318

Merged
merged 4 commits into from
Jun 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- Enforce 512 byte document ID limit in bulk updates ([#8039](https://github.com/opensearch-project/OpenSearch/pull/8039))
- With only GlobalAggregation in request causes unnecessary wrapping with MultiCollector ([#8125](https://github.com/opensearch-project/OpenSearch/pull/8125))
- Fix mapping char_filter when mapping a hashtag ([#7591](https://github.com/opensearch-project/OpenSearch/pull/7591))
- Fix NPE in multiterms aggregations involving empty buckets ([#7318](https://github.com/opensearch-project/OpenSearch/pull/7318))

### Security

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -712,3 +712,51 @@ setup:
- match: { aggregations.m_terms.buckets.0.key: ["a", 1] }
- match: { aggregations.m_terms.buckets.0.key_as_string: "a|1" }
- match: { aggregations.m_terms.buckets.0.doc_count: 4 }

---
"aggregate over multi-terms test":
- skip:
version: "- 2.9.99"
reason: "multi_terms aggregation was introduced in 2.1.0, NPE bug checked by this test case will manifest in any version < 3.0"

- do:
bulk:
index: test_1
refresh: true
body:
- '{"index": {}}'
- '{"str": "a", "ip": "127.0.0.1", "date": "2022-03-23"}'
- '{"index": {}}'
- '{"str": "a", "ip": "127.0.0.1", "date": "2022-03-25"}'
- '{"index": {}}'
- '{"str": "b", "ip": "127.0.0.1", "date": "2022-03-23"}'
- '{"index": {}}'
- '{"str": "b", "ip": "127.0.0.1", "date": "2022-03-25"}'

- do:
search:
index: test_1
size: 0
body:
aggs:
histo:
date_histogram:
field: date
calendar_interval: day
aggs:
m_terms:
multi_terms:
terms:
- field: str
- field: ip

- match: { hits.total.value: 4 }
- length: { aggregations.histo.buckets: 3 }
- match: { aggregations.histo.buckets.0.key_as_string: "2022-03-23T00:00:00.000Z" }
- match: { aggregations.histo.buckets.0.m_terms.buckets.0.key: ["a", "127.0.0.1"] }
- match: { aggregations.histo.buckets.0.m_terms.buckets.1.key: ["b", "127.0.0.1"] }
- match: { aggregations.histo.buckets.1.key_as_string: "2022-03-24T00:00:00.000Z" }
- length: { aggregations.histo.buckets.1.m_terms.buckets: 0 }
- match: { aggregations.histo.buckets.2.key_as_string: "2022-03-25T00:00:00.000Z" }
- match: { aggregations.histo.buckets.2.m_terms.buckets.0.key: [ "a", "127.0.0.1" ] }
- match: { aggregations.histo.buckets.2.m_terms.buckets.1.key: [ "b", "127.0.0.1" ] }
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,20 @@ InternalMultiTerms buildResult(long owningBucketOrd, long otherDocCount, Interna

@Override
public InternalAggregation buildEmptyAggregation() {
return null;
return new InternalMultiTerms(
name,
order,
order,
bucketCountThresholds.getRequiredSize(),
bucketCountThresholds.getMinDocCount(),
metadata(),
bucketCountThresholds.getShardSize(),
showTermDocCountError,
0,
0,
formats,
Collections.emptyList()
);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,36 +28,51 @@
import org.opensearch.common.network.InetAddresses;
import org.opensearch.common.settings.Settings;
import org.opensearch.common.time.DateFormatter;
import org.opensearch.common.util.BigArrays;
import org.opensearch.common.util.MockPageCacheRecycler;
import org.opensearch.index.IndexService;
import org.opensearch.index.cache.IndexCache;
import org.opensearch.index.mapper.BooleanFieldMapper;
import org.opensearch.index.mapper.DateFieldMapper;
import org.opensearch.index.mapper.GeoPointFieldMapper;
import org.opensearch.index.mapper.IpFieldMapper;
import org.opensearch.index.mapper.KeywordFieldMapper;
import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.index.mapper.NumberFieldMapper;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.index.shard.IndexShard;
import org.opensearch.indices.breaker.NoneCircuitBreakerService;
import org.opensearch.script.MockScriptEngine;
import org.opensearch.script.Script;
import org.opensearch.script.ScriptEngine;
import org.opensearch.script.ScriptModule;
import org.opensearch.script.ScriptService;
import org.opensearch.script.ScriptType;
import org.opensearch.search.DocValueFormat;
import org.opensearch.search.aggregations.AggregationBuilder;
import org.opensearch.search.aggregations.Aggregator;
import org.opensearch.search.aggregations.AggregatorFactories;
import org.opensearch.search.aggregations.AggregatorTestCase;
import org.opensearch.search.aggregations.BucketOrder;
import org.opensearch.search.aggregations.CardinalityUpperBound;
import org.opensearch.search.aggregations.InternalAggregation;
import org.opensearch.search.aggregations.metrics.InternalMax;
import org.opensearch.search.aggregations.metrics.MaxAggregationBuilder;
import org.opensearch.search.aggregations.support.CoreValuesSourceType;
import org.opensearch.search.aggregations.support.MultiTermsValuesSourceConfig;
import org.opensearch.search.aggregations.support.ValueType;
import org.opensearch.search.aggregations.support.ValuesSourceType;
import org.opensearch.search.internal.SearchContext;
import org.opensearch.search.lookup.LeafDocLookup;
import org.opensearch.test.TestSearchContext;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.function.Consumer;
import java.util.function.Function;

Expand All @@ -68,8 +83,12 @@
import static java.util.stream.Collectors.toList;
import static org.hamcrest.Matchers.closeTo;
import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasSize;
import static org.hamcrest.Matchers.instanceOf;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;

public class MultiTermsAggregatorTests extends AggregatorTestCase {
private static final String FIELD_NAME = "field";
Expand Down Expand Up @@ -852,6 +871,56 @@ public void testIncludeExclude() throws IOException {
);
}

public void testEmptyAggregations() throws IOException {
QueryShardContext queryShardContext = mock(QueryShardContext.class);
IndexShard indexShard = mock(IndexShard.class);
BigArrays bigArrays = new BigArrays(new MockPageCacheRecycler(Settings.EMPTY), new NoneCircuitBreakerService(), "");
IndexService indexService = mock(IndexService.class);
when(indexService.getShardOrNull(0)).thenReturn(indexShard);
IndexCache cache = mock(IndexCache.class);
when(cache.bitsetFilterCache()).thenReturn(null);
when(indexService.cache()).thenReturn(cache);
SearchContext context = new TestSearchContext(bigArrays, indexService);
when(indexService.newQueryShardContext(0, null, () -> 0L, null)).thenReturn(queryShardContext);
AggregatorFactories factories = AggregatorFactories.EMPTY;
boolean showTermDocCountError = true;
MultiTermsAggregator.InternalValuesSource internalValuesSources = mock(MultiTermsAggregator.InternalValuesSource.class);
DocValueFormat format = mock(DocValueFormat.class);
BucketOrder order = mock(BucketOrder.class);
Aggregator.SubAggCollectionMode collectMode = Aggregator.SubAggCollectionMode.BREADTH_FIRST;
TermsAggregator.BucketCountThresholds bucketCountThresholds = mock(TermsAggregator.BucketCountThresholds.class);
Aggregator parent = mock(Aggregator.class);
CardinalityUpperBound cardinality = CardinalityUpperBound.ONE;
Map<String, Object> metadata = new HashMap<>();
String k1 = UUID.randomUUID().toString();
String v1 = UUID.randomUUID().toString();
metadata.put(k1, v1);

MultiTermsAggregator mAgg = new MultiTermsAggregator(
AGG_NAME,
factories,
showTermDocCountError,
List.of(internalValuesSources),
List.of(format),
order,
collectMode,
bucketCountThresholds,
context,
parent,
cardinality,
metadata
);
InternalAggregation emptyAgg = mAgg.buildEmptyAggregation();

MatcherAssert.assertThat(emptyAgg.getName(), equalTo(AGG_NAME));
MatcherAssert.assertThat(emptyAgg, instanceOf(InternalMultiTerms.class));

InternalMultiTerms mt = (InternalMultiTerms) emptyAgg;
MatcherAssert.assertThat(mt.getMetadata().keySet(), contains(k1));
MatcherAssert.assertThat(mt.getMetadata().get(k1), equalTo(v1));
MatcherAssert.assertThat(mt.getBuckets(), empty());
}

private void testAggregation(
Query query,
List<MultiTermsValuesSourceConfig> terms,
Expand Down