Skip to content

Commit

Permalink
feat: Make max aggregation bucket size configurable
Browse files Browse the repository at this point in the history
  • Loading branch information
Kerem Sahin committed Nov 11, 2020
1 parent b8f5dbb commit f31107a
Show file tree
Hide file tree
Showing 4 changed files with 38 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,13 @@
@Slf4j
public class ESSearchDAO<DOCUMENT extends RecordTemplate> extends BaseSearchDAO<DOCUMENT> {

private static final Integer DEFAULT_TERM_BUCKETS_SIZE_100 = 100;
private static final String URN_FIELD = "urn";

private RestHighLevelClient _client;
private BaseSearchConfig<DOCUMENT> _config;
private BaseESAutoCompleteQuery _autoCompleteQueryForLowCardFields;
private BaseESAutoCompleteQuery _autoCompleteQueryForHighCardFields;
private int _maxTermBucketSize = 100;

// TODO: Currently takes elastic search client, in future, can take other clients such as galene
// TODO: take params and settings needed to create the client
Expand Down Expand Up @@ -254,7 +254,7 @@ private void buildAggregations(@Nonnull SearchSourceBuilder searchSourceBuilder,
@Nullable Filter filter) {
Set<String> facetFields = _config.getFacetFields();
for (String facet : facetFields) {
AggregationBuilder aggBuilder = AggregationBuilders.terms(facet).field(facet).size(DEFAULT_TERM_BUCKETS_SIZE_100);
AggregationBuilder aggBuilder = AggregationBuilders.terms(facet).field(facet).size(_maxTermBucketSize);
Optional.ofNullable(filter).map(Filter::getCriteria).ifPresent(criteria -> {
for (Criterion criterion : criteria) {
if (!facetFields.contains(criterion.getField()) || criterion.getField().equals(facet)) {
Expand Down Expand Up @@ -446,4 +446,16 @@ private Urn getUrnFromSearchHit(@Nonnull SearchHit hit) {
throw new RuntimeException("Invalid urn in search document " + e);
}
}

/**
* Sets max term bucket size in the aggregation results.
*
* <p>The default value might not always be good enough when aggregation happens on a high cardinality field.
* Using a high default instead is also not ideal because of potential query performance degradation.
* Instead, entities which have a rare use case of aggregating over high cardinality fields can use this method
* to configure the aggregation behavior.
*/
public void setMaxTermBucketSize(int maxTermBucketSize) {
_maxTermBucketSize = maxTermBucketSize;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.aggregations.AggregationBuilders;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;

Expand Down Expand Up @@ -212,6 +213,23 @@ public void testPreferenceInSearchQuery() {
assertEquals(searchRequest.preference(), preference);
}

@Test
public void testSetMaxTermBucketSize() {
String facetFieldName = "value";
Filter filter = QueryUtils.newFilter(Collections.singletonMap(facetFieldName, "dummy"));

// Default max term bucket size
SearchRequest searchRequest = _searchDAO.constructSearchQuery("dummy", filter, null, null, 0, 10);
assertEquals(searchRequest.source().aggregations().getAggregatorFactories().get(0),
AggregationBuilders.terms(facetFieldName).field(facetFieldName).size(100));

// Modified max term bucket size
_searchDAO.setMaxTermBucketSize(5);
searchRequest = _searchDAO.constructSearchQuery("dummy", filter, null, null, 0, 10);
assertEquals(searchRequest.source().aggregations().getAggregatorFactories().get(0),
AggregationBuilders.terms(facetFieldName).field(facetFieldName).size(5));
}

private static SearchHit makeSearchHit(int id) {
SearchHit hit = mock(SearchHit.class);
Map<String, Object> sourceMap = new HashMap<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@

import com.linkedin.testing.EntityDocument;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import javax.annotation.Nonnull;

public class TestSearchConfig extends BaseSearchConfig<EntityDocument> {
@Override
@Nonnull
public Set<String> getFacetFields() {
return Collections.unmodifiableSet(new HashSet<>());
return Collections.singleton("value");
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,9 @@ record EntityDocument {
* For unit tests
*/
urn: Urn

/**
* For unit tests
*/
value: optional string
}

0 comments on commit f31107a

Please sign in to comment.