Skip to content

Commit 73223b3

Browse files
committed
Aggs: fix handling of the same child doc id being processed multiple times in the reverse_nested aggregation.
Closes elastic#9263 Closes elastic#9345
1 parent d9c2377 commit 73223b3

File tree

2 files changed

+174
-21
lines changed

2 files changed

+174
-21
lines changed

src/main/java/org/elasticsearch/search/aggregations/bucket/nested/ReverseNestedAggregator.java

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20,12 +20,11 @@
2020

2121
import com.carrotsearch.hppc.LongIntOpenHashMap;
2222
import org.apache.lucene.index.AtomicReaderContext;
23-
import org.apache.lucene.search.DocIdSet;
2423
import org.apache.lucene.search.DocIdSetIterator;
2524
import org.apache.lucene.search.Filter;
25+
import org.apache.lucene.util.FixedBitSet;
2626
import org.elasticsearch.common.lease.Releasables;
2727
import org.elasticsearch.common.lucene.ReaderContextAware;
28-
import org.elasticsearch.common.lucene.docset.DocIdSets;
2928
import org.elasticsearch.common.recycler.Recycler;
3029
import org.elasticsearch.index.cache.fixedbitset.FixedBitSetFilter;
3130
import org.elasticsearch.index.mapper.MapperService;
@@ -35,7 +34,6 @@
3534
import org.elasticsearch.search.aggregations.*;
3635
import org.elasticsearch.search.aggregations.bucket.SingleBucketAggregator;
3736
import org.elasticsearch.search.aggregations.support.AggregationContext;
38-
import org.elasticsearch.search.internal.SearchContext;
3937

4038
import java.io.IOException;
4139

@@ -45,8 +43,9 @@
4543
public class ReverseNestedAggregator extends SingleBucketAggregator implements ReaderContextAware {
4644

4745
private final FixedBitSetFilter parentFilter;
46+
// It is ok to use bitset from bitset cache, because in this agg the path always to a nested parent path.
47+
private FixedBitSet parentDocs;
4848
private final String nestedPath;
49-
private DocIdSetIterator parentDocs;
5049

5150
// TODO: Add LongIntPagedHashMap?
5251
private final Recycler.V<LongIntOpenHashMap> bucketOrdToLastCollectedParentDocRecycler;
@@ -62,9 +61,9 @@ public ReverseNestedAggregator(String name, AggregatorFactories factories, Strin
6261
throw new SearchParseException(context.searchContext(), "Reverse nested aggregation [" + name + "] can only be used inside a [nested] aggregation");
6362
}
6463
if (nestedPath == null) {
65-
parentFilter = SearchContext.current().fixedBitSetFilterCache().getFixedBitSetFilter(NonNestedDocsFilter.INSTANCE);
64+
parentFilter = context.searchContext().fixedBitSetFilterCache().getFixedBitSetFilter(NonNestedDocsFilter.INSTANCE);
6665
} else {
67-
MapperService.SmartNameObjectMapper mapper = SearchContext.current().smartNameObjectMapper(nestedPath);
66+
MapperService.SmartNameObjectMapper mapper = context.searchContext().smartNameObjectMapper(nestedPath);
6867
if (mapper == null) {
6968
throw new AggregationExecutionException("[reverse_nested] nested path [" + nestedPath + "] not found");
7069
}
@@ -75,7 +74,7 @@ public ReverseNestedAggregator(String name, AggregatorFactories factories, Strin
7574
if (!objectMapper.nested().isNested()) {
7675
throw new AggregationExecutionException("[reverse_nested] nested path [" + nestedPath + "] is not nested");
7776
}
78-
parentFilter = SearchContext.current().fixedBitSetFilterCache().getFixedBitSetFilter(objectMapper.nestedTypeFilter());
77+
parentFilter = context.searchContext().fixedBitSetFilterCache().getFixedBitSetFilter(objectMapper.nestedTypeFilter());
7978
}
8079
bucketOrdToLastCollectedParentDocRecycler = aggregationContext.searchContext().cacheRecycler().longIntMap(32);
8180
bucketOrdToLastCollectedParentDoc = bucketOrdToLastCollectedParentDocRecycler.v();
@@ -88,12 +87,7 @@ public void setNextReader(AtomicReaderContext reader) {
8887
try {
8988
// In ES if parent is deleted, then also the children are deleted, so the child docs this agg receives
9089
// must belong to parent docs that is alive. For this reason acceptedDocs can be null here.
91-
DocIdSet docIdSet = parentFilter.getDocIdSet(reader, null);
92-
if (DocIdSets.isEmpty(docIdSet)) {
93-
parentDocs = null;
94-
} else {
95-
parentDocs = docIdSet.iterator();
96-
}
90+
parentDocs = parentFilter.getDocIdSet(reader, null);
9791
} catch (IOException ioe) {
9892
throw new AggregationExecutionException("Failed to aggregate [" + name + "]", ioe);
9993
}
@@ -106,12 +100,7 @@ public void collect(int childDoc, long bucketOrd) throws IOException {
106100
}
107101

108102
// fast forward to retrieve the parentDoc this childDoc belongs to
109-
final int parentDoc;
110-
if (parentDocs.docID() < childDoc) {
111-
parentDoc = parentDocs.advance(childDoc);
112-
} else {
113-
parentDoc = parentDocs.docID();
114-
}
103+
final int parentDoc = parentDocs.nextSetBit(childDoc);
115104
assert childDoc <= parentDoc && parentDoc != DocIdSetIterator.NO_MORE_DOCS;
116105
if (bucketOrdToLastCollectedParentDoc.containsKey(bucketOrd)) {
117106
int lastCollectedParentDoc = bucketOrdToLastCollectedParentDoc.lget();

src/test/java/org/elasticsearch/search/aggregations/bucket/ReverseNestedTests.java

Lines changed: 166 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,22 +20,27 @@
2020

2121
import org.elasticsearch.action.search.SearchPhaseExecutionException;
2222
import org.elasticsearch.action.search.SearchResponse;
23+
import org.elasticsearch.common.settings.ImmutableSettings;
2324
import org.elasticsearch.common.xcontent.XContentBuilder;
2425
import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode;
26+
import org.elasticsearch.search.aggregations.bucket.filter.Filter;
2527
import org.elasticsearch.search.aggregations.bucket.nested.Nested;
2628
import org.elasticsearch.search.aggregations.bucket.nested.ReverseNested;
2729
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
30+
import org.elasticsearch.search.aggregations.metrics.valuecount.ValueCount;
2831
import org.elasticsearch.test.ElasticsearchIntegrationTest;
2932
import org.junit.Test;
3033

3134
import java.util.ArrayList;
3235
import java.util.Arrays;
3336
import java.util.List;
3437

38+
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
39+
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
3540
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
41+
import static org.elasticsearch.index.query.FilterBuilders.termFilter;
3642
import static org.elasticsearch.search.aggregations.AggregationBuilders.*;
37-
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
38-
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
43+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
3944
import static org.hamcrest.Matchers.equalTo;
4045
import static org.hamcrest.Matchers.is;
4146
import static org.hamcrest.core.IsNull.notNullValue;
@@ -448,4 +453,163 @@ public void testReverseNestedAggWithoutNestedAgg() throws Exception {
448453
)
449454
).get();
450455
}
456+
457+
@Test
458+
public void testSameParentDocHavingMultipleBuckets() throws Exception {
459+
XContentBuilder mapping = jsonBuilder().startObject().startObject("product").field("dynamic", "strict").startObject("properties")
460+
.startObject("id").field("type", "long").endObject()
461+
.startObject("category")
462+
.field("type", "nested")
463+
.startObject("properties")
464+
.startObject("name").field("type", "string").endObject()
465+
.endObject()
466+
.endObject()
467+
.startObject("sku")
468+
.field("type", "nested")
469+
.startObject("properties")
470+
.startObject("sku_type").field("type", "string").endObject()
471+
.startObject("colors")
472+
.field("type", "nested")
473+
.startObject("properties")
474+
.startObject("name").field("type", "string").endObject()
475+
.endObject()
476+
.endObject()
477+
.endObject()
478+
.endObject()
479+
.endObject().endObject().endObject();
480+
assertAcked(
481+
prepareCreate("idx3")
482+
.setSettings(ImmutableSettings.builder().put(SETTING_NUMBER_OF_SHARDS, 1).put(SETTING_NUMBER_OF_REPLICAS, 0))
483+
.addMapping("product", mapping)
484+
);
485+
486+
client().prepareIndex("idx3", "product", "1").setRefresh(true).setSource(
487+
jsonBuilder().startObject()
488+
.startArray("sku")
489+
.startObject()
490+
.field("sku_type", "bar1")
491+
.startArray("colors")
492+
.startObject().field("name", "red").endObject()
493+
.startObject().field("name", "green").endObject()
494+
.startObject().field("name", "yellow").endObject()
495+
.endArray()
496+
.endObject()
497+
.startObject()
498+
.field("sku_type", "bar1")
499+
.startArray("colors")
500+
.startObject().field("name", "red").endObject()
501+
.startObject().field("name", "blue").endObject()
502+
.startObject().field("name", "white").endObject()
503+
.endArray()
504+
.endObject()
505+
.startObject()
506+
.field("sku_type", "bar1")
507+
.startArray("colors")
508+
.startObject().field("name", "black").endObject()
509+
.startObject().field("name", "blue").endObject()
510+
.endArray()
511+
.endObject()
512+
.startObject()
513+
.field("sku_type", "bar2")
514+
.startArray("colors")
515+
.startObject().field("name", "orange").endObject()
516+
.endArray()
517+
.endObject()
518+
.startObject()
519+
.field("sku_type", "bar2")
520+
.startArray("colors")
521+
.startObject().field("name", "pink").endObject()
522+
.endArray()
523+
.endObject()
524+
.endArray()
525+
.startArray("category")
526+
.startObject().field("name", "abc").endObject()
527+
.startObject().field("name", "klm").endObject()
528+
.startObject().field("name", "xyz").endObject()
529+
.endArray()
530+
.endObject()
531+
).get();
532+
533+
SearchResponse response = client().prepareSearch("idx3")
534+
.addAggregation(
535+
nested("nested_0").path("category").subAggregation(
536+
terms("group_by_category").field("category.name").subAggregation(
537+
reverseNested("to_root").subAggregation(
538+
nested("nested_1").path("sku").subAggregation(
539+
filter("filter_by_sku").filter(termFilter("sku.sku_type", "bar1")).subAggregation(
540+
count("sku_count").field("sku_type")
541+
)
542+
)
543+
)
544+
)
545+
)
546+
).get();
547+
assertNoFailures(response);
548+
assertHitCount(response, 1);
549+
550+
Nested nested0 = response.getAggregations().get("nested_0");
551+
assertThat(nested0.getDocCount(), equalTo(3l));
552+
Terms terms = nested0.getAggregations().get("group_by_category");
553+
assertThat(terms.getBuckets().size(), equalTo(3));
554+
for (String bucketName : new String[]{"abc", "klm", "xyz"}) {
555+
logger.info("Checking results for bucket {}", bucketName);
556+
Terms.Bucket bucket = terms.getBucketByKey(bucketName);
557+
assertThat(bucket.getDocCount(), equalTo(1l));
558+
ReverseNested toRoot = bucket.getAggregations().get("to_root");
559+
assertThat(toRoot.getDocCount(), equalTo(1l));
560+
Nested nested1 = toRoot.getAggregations().get("nested_1");
561+
assertThat(nested1.getDocCount(), equalTo(5l));
562+
Filter filterByBar = nested1.getAggregations().get("filter_by_sku");
563+
assertThat(filterByBar.getDocCount(), equalTo(3l));
564+
ValueCount barCount = filterByBar.getAggregations().get("sku_count");
565+
assertThat(barCount.getValue(), equalTo(3l));
566+
}
567+
568+
response = client().prepareSearch("idx3")
569+
.addAggregation(
570+
nested("nested_0").path("category").subAggregation(
571+
terms("group_by_category").field("category.name").subAggregation(
572+
reverseNested("to_root").subAggregation(
573+
nested("nested_1").path("sku").subAggregation(
574+
filter("filter_by_sku").filter(termFilter("sku.sku_type", "bar1")).subAggregation(
575+
nested("nested_2").path("sku.colors").subAggregation(
576+
filter("filter_sku_color").filter(termFilter("sku.colors.name", "red")).subAggregation(
577+
reverseNested("reverse_to_sku").path("sku").subAggregation(
578+
count("sku_count").field("sku_type")
579+
)
580+
)
581+
)
582+
)
583+
)
584+
)
585+
)
586+
)
587+
).get();
588+
assertNoFailures(response);
589+
assertHitCount(response, 1);
590+
591+
nested0 = response.getAggregations().get("nested_0");
592+
assertThat(nested0.getDocCount(), equalTo(3l));
593+
terms = nested0.getAggregations().get("group_by_category");
594+
assertThat(terms.getBuckets().size(), equalTo(3));
595+
for (String bucketName : new String[]{"abc", "klm", "xyz"}) {
596+
logger.info("Checking results for bucket {}", bucketName);
597+
Terms.Bucket bucket = terms.getBucketByKey(bucketName);
598+
assertThat(bucket.getDocCount(), equalTo(1l));
599+
ReverseNested toRoot = bucket.getAggregations().get("to_root");
600+
assertThat(toRoot.getDocCount(), equalTo(1l));
601+
Nested nested1 = toRoot.getAggregations().get("nested_1");
602+
assertThat(nested1.getDocCount(), equalTo(5l));
603+
Filter filterByBar = nested1.getAggregations().get("filter_by_sku");
604+
assertThat(filterByBar.getDocCount(), equalTo(3l));
605+
Nested nested2 = filterByBar.getAggregations().get("nested_2");
606+
assertThat(nested2.getDocCount(), equalTo(8l));
607+
Filter filterBarColor = nested2.getAggregations().get("filter_sku_color");
608+
assertThat(filterBarColor.getDocCount(), equalTo(2l));
609+
ReverseNested reverseToBar = filterBarColor.getAggregations().get("reverse_to_sku");
610+
assertThat(reverseToBar.getDocCount(), equalTo(2l));
611+
ValueCount barCount = reverseToBar.getAggregations().get("sku_count");
612+
assertThat(barCount.getValue(), equalTo(2l));
613+
}
614+
}
451615
}

0 commit comments

Comments
 (0)