Skip to content

Commit a645994

Browse files
committed
Aggs: fix handling of the same child doc id being processed multiple times in the reverse_nested aggregation.
Closes #9263 Closes #9345
1 parent 385c43c commit a645994

File tree

2 files changed

+177
-16
lines changed

2 files changed

+177
-16
lines changed

src/main/java/org/elasticsearch/search/aggregations/bucket/nested/ReverseNestedAggregator.java

Lines changed: 9 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,11 @@
2020

2121
import com.carrotsearch.hppc.LongIntOpenHashMap;
2222
import org.apache.lucene.index.LeafReaderContext;
23-
import org.apache.lucene.search.DocIdSet;
2423
import org.apache.lucene.search.DocIdSetIterator;
2524
import org.apache.lucene.search.Filter;
2625
import org.apache.lucene.search.join.BitDocIdSetFilter;
26+
import org.apache.lucene.util.BitDocIdSet;
27+
import org.apache.lucene.util.BitSet;
2728
import org.elasticsearch.common.lucene.ReaderContextAware;
2829
import org.elasticsearch.common.lucene.docset.DocIdSets;
2930
import org.elasticsearch.index.mapper.MapperService;
@@ -33,7 +34,6 @@
3334
import org.elasticsearch.search.aggregations.*;
3435
import org.elasticsearch.search.aggregations.bucket.SingleBucketAggregator;
3536
import org.elasticsearch.search.aggregations.support.AggregationContext;
36-
import org.elasticsearch.search.internal.SearchContext;
3737

3838
import java.io.IOException;
3939
import java.util.Map;
@@ -44,17 +44,17 @@
4444
public class ReverseNestedAggregator extends SingleBucketAggregator implements ReaderContextAware {
4545

4646
private final BitDocIdSetFilter parentFilter;
47-
private DocIdSetIterator parentDocs;
47+
private BitSet parentDocs;
4848

4949
// TODO: Add LongIntPagedHashMap?
5050
private final LongIntOpenHashMap bucketOrdToLastCollectedParentDoc;
5151

5252
public ReverseNestedAggregator(String name, AggregatorFactories factories, ObjectMapper objectMapper, AggregationContext aggregationContext, Aggregator parent, Map<String, Object> metaData) throws IOException {
5353
super(name, factories, aggregationContext, parent, metaData);
5454
if (objectMapper == null) {
55-
parentFilter = SearchContext.current().bitsetFilterCache().getBitDocIdSetFilter(NonNestedDocsFilter.INSTANCE);
55+
parentFilter = context.searchContext().bitsetFilterCache().getBitDocIdSetFilter(NonNestedDocsFilter.INSTANCE);
5656
} else {
57-
parentFilter = SearchContext.current().bitsetFilterCache().getBitDocIdSetFilter(objectMapper.nestedTypeFilter());
57+
parentFilter = context.searchContext().bitsetFilterCache().getBitDocIdSetFilter(objectMapper.nestedTypeFilter());
5858
}
5959
bucketOrdToLastCollectedParentDoc = new LongIntOpenHashMap(32);
6060
}
@@ -65,11 +65,11 @@ public void setNextReader(LeafReaderContext reader) {
6565
try {
6666
// In ES if parent is deleted, then also the children are deleted, so the child docs this agg receives
6767
// must belong to parent docs that is alive. For this reason acceptedDocs can be null here.
68-
DocIdSet docIdSet = parentFilter.getDocIdSet(reader, null);
68+
BitDocIdSet docIdSet = parentFilter.getDocIdSet(reader);
6969
if (DocIdSets.isEmpty(docIdSet)) {
7070
parentDocs = null;
7171
} else {
72-
parentDocs = docIdSet.iterator();
72+
parentDocs = docIdSet.bits();
7373
}
7474
} catch (IOException ioe) {
7575
throw new AggregationExecutionException("Failed to aggregate [" + name + "]", ioe);
@@ -83,12 +83,7 @@ public void collect(int childDoc, long bucketOrd) throws IOException {
8383
}
8484

8585
// fast forward to retrieve the parentDoc this childDoc belongs to
86-
final int parentDoc;
87-
if (parentDocs.docID() < childDoc) {
88-
parentDoc = parentDocs.advance(childDoc);
89-
} else {
90-
parentDoc = parentDocs.docID();
91-
}
86+
final int parentDoc = parentDocs.nextSetBit(childDoc);
9287
assert childDoc <= parentDoc && parentDoc != DocIdSetIterator.NO_MORE_DOCS;
9388
if (bucketOrdToLastCollectedParentDoc.containsKey(bucketOrd)) {
9489
int lastCollectedParentDoc = bucketOrdToLastCollectedParentDoc.lget();
@@ -148,7 +143,7 @@ public Aggregator createInternal(AggregationContext context, Aggregator parent,
148143

149144
final ObjectMapper objectMapper;
150145
if (path != null) {
151-
MapperService.SmartNameObjectMapper mapper = SearchContext.current().smartNameObjectMapper(path);
146+
MapperService.SmartNameObjectMapper mapper = context.searchContext().smartNameObjectMapper(path);
152147
if (mapper == null) {
153148
return new Unmapped(name, context, parent, metaData);
154149
}

src/test/java/org/elasticsearch/search/aggregations/bucket/ReverseNestedTests.java

Lines changed: 168 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,14 @@
2020

2121
import org.elasticsearch.action.search.SearchPhaseExecutionException;
2222
import org.elasticsearch.action.search.SearchResponse;
23+
import org.elasticsearch.common.settings.ImmutableSettings;
2324
import org.elasticsearch.common.xcontent.XContentBuilder;
2425
import org.elasticsearch.search.aggregations.Aggregator.SubAggCollectionMode;
26+
import org.elasticsearch.search.aggregations.bucket.filter.Filter;
2527
import org.elasticsearch.search.aggregations.bucket.nested.Nested;
2628
import org.elasticsearch.search.aggregations.bucket.nested.ReverseNested;
2729
import org.elasticsearch.search.aggregations.bucket.terms.Terms;
30+
import org.elasticsearch.search.aggregations.metrics.valuecount.ValueCount;
2831
import org.elasticsearch.test.ElasticsearchIntegrationTest;
2932
import org.hamcrest.Matchers;
3033
import org.junit.Test;
@@ -33,11 +36,15 @@
3336
import java.util.Arrays;
3437
import java.util.List;
3538

39+
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
40+
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
3641
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
42+
import static org.elasticsearch.index.query.FilterBuilders.termFilter;
3743
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
3844
import static org.elasticsearch.search.aggregations.AggregationBuilders.*;
39-
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
40-
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse;
45+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
46+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
47+
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
4148
import static org.hamcrest.Matchers.*;
4249
import static org.hamcrest.core.IsNull.notNullValue;
4350

@@ -466,4 +473,163 @@ public void nonExistingNestedField() throws Exception {
466473
ReverseNested reverseNested = nested.getAggregations().get("incorrect");
467474
assertThat(reverseNested.getDocCount(), is(0l));
468475
}
476+
477+
@Test
478+
public void testSameParentDocHavingMultipleBuckets() throws Exception {
479+
XContentBuilder mapping = jsonBuilder().startObject().startObject("product").field("dynamic", "strict").startObject("properties")
480+
.startObject("id").field("type", "long").endObject()
481+
.startObject("category")
482+
.field("type", "nested")
483+
.startObject("properties")
484+
.startObject("name").field("type", "string").endObject()
485+
.endObject()
486+
.endObject()
487+
.startObject("sku")
488+
.field("type", "nested")
489+
.startObject("properties")
490+
.startObject("sku_type").field("type", "string").endObject()
491+
.startObject("colors")
492+
.field("type", "nested")
493+
.startObject("properties")
494+
.startObject("name").field("type", "string").endObject()
495+
.endObject()
496+
.endObject()
497+
.endObject()
498+
.endObject()
499+
.endObject().endObject().endObject();
500+
assertAcked(
501+
prepareCreate("idx3")
502+
.setSettings(ImmutableSettings.builder().put(SETTING_NUMBER_OF_SHARDS, 1).put(SETTING_NUMBER_OF_REPLICAS, 0))
503+
.addMapping("product", mapping)
504+
);
505+
506+
client().prepareIndex("idx3", "product", "1").setRefresh(true).setSource(
507+
jsonBuilder().startObject()
508+
.startArray("sku")
509+
.startObject()
510+
.field("sku_type", "bar1")
511+
.startArray("colors")
512+
.startObject().field("name", "red").endObject()
513+
.startObject().field("name", "green").endObject()
514+
.startObject().field("name", "yellow").endObject()
515+
.endArray()
516+
.endObject()
517+
.startObject()
518+
.field("sku_type", "bar1")
519+
.startArray("colors")
520+
.startObject().field("name", "red").endObject()
521+
.startObject().field("name", "blue").endObject()
522+
.startObject().field("name", "white").endObject()
523+
.endArray()
524+
.endObject()
525+
.startObject()
526+
.field("sku_type", "bar1")
527+
.startArray("colors")
528+
.startObject().field("name", "black").endObject()
529+
.startObject().field("name", "blue").endObject()
530+
.endArray()
531+
.endObject()
532+
.startObject()
533+
.field("sku_type", "bar2")
534+
.startArray("colors")
535+
.startObject().field("name", "orange").endObject()
536+
.endArray()
537+
.endObject()
538+
.startObject()
539+
.field("sku_type", "bar2")
540+
.startArray("colors")
541+
.startObject().field("name", "pink").endObject()
542+
.endArray()
543+
.endObject()
544+
.endArray()
545+
.startArray("category")
546+
.startObject().field("name", "abc").endObject()
547+
.startObject().field("name", "klm").endObject()
548+
.startObject().field("name", "xyz").endObject()
549+
.endArray()
550+
.endObject()
551+
).get();
552+
553+
SearchResponse response = client().prepareSearch("idx3")
554+
.addAggregation(
555+
nested("nested_0").path("category").subAggregation(
556+
terms("group_by_category").field("category.name").subAggregation(
557+
reverseNested("to_root").subAggregation(
558+
nested("nested_1").path("sku").subAggregation(
559+
filter("filter_by_sku").filter(termFilter("sku.sku_type", "bar1")).subAggregation(
560+
count("sku_count").field("sku_type")
561+
)
562+
)
563+
)
564+
)
565+
)
566+
).get();
567+
assertNoFailures(response);
568+
assertHitCount(response, 1);
569+
570+
Nested nested0 = response.getAggregations().get("nested_0");
571+
assertThat(nested0.getDocCount(), equalTo(3l));
572+
Terms terms = nested0.getAggregations().get("group_by_category");
573+
assertThat(terms.getBuckets().size(), equalTo(3));
574+
for (String bucketName : new String[]{"abc", "klm", "xyz"}) {
575+
logger.info("Checking results for bucket {}", bucketName);
576+
Terms.Bucket bucket = terms.getBucketByKey(bucketName);
577+
assertThat(bucket.getDocCount(), equalTo(1l));
578+
ReverseNested toRoot = bucket.getAggregations().get("to_root");
579+
assertThat(toRoot.getDocCount(), equalTo(1l));
580+
Nested nested1 = toRoot.getAggregations().get("nested_1");
581+
assertThat(nested1.getDocCount(), equalTo(5l));
582+
Filter filterByBar = nested1.getAggregations().get("filter_by_sku");
583+
assertThat(filterByBar.getDocCount(), equalTo(3l));
584+
ValueCount barCount = filterByBar.getAggregations().get("sku_count");
585+
assertThat(barCount.getValue(), equalTo(3l));
586+
}
587+
588+
response = client().prepareSearch("idx3")
589+
.addAggregation(
590+
nested("nested_0").path("category").subAggregation(
591+
terms("group_by_category").field("category.name").subAggregation(
592+
reverseNested("to_root").subAggregation(
593+
nested("nested_1").path("sku").subAggregation(
594+
filter("filter_by_sku").filter(termFilter("sku.sku_type", "bar1")).subAggregation(
595+
nested("nested_2").path("sku.colors").subAggregation(
596+
filter("filter_sku_color").filter(termFilter("sku.colors.name", "red")).subAggregation(
597+
reverseNested("reverse_to_sku").path("sku").subAggregation(
598+
count("sku_count").field("sku_type")
599+
)
600+
)
601+
)
602+
)
603+
)
604+
)
605+
)
606+
)
607+
).get();
608+
assertNoFailures(response);
609+
assertHitCount(response, 1);
610+
611+
nested0 = response.getAggregations().get("nested_0");
612+
assertThat(nested0.getDocCount(), equalTo(3l));
613+
terms = nested0.getAggregations().get("group_by_category");
614+
assertThat(terms.getBuckets().size(), equalTo(3));
615+
for (String bucketName : new String[]{"abc", "klm", "xyz"}) {
616+
logger.info("Checking results for bucket {}", bucketName);
617+
Terms.Bucket bucket = terms.getBucketByKey(bucketName);
618+
assertThat(bucket.getDocCount(), equalTo(1l));
619+
ReverseNested toRoot = bucket.getAggregations().get("to_root");
620+
assertThat(toRoot.getDocCount(), equalTo(1l));
621+
Nested nested1 = toRoot.getAggregations().get("nested_1");
622+
assertThat(nested1.getDocCount(), equalTo(5l));
623+
Filter filterByBar = nested1.getAggregations().get("filter_by_sku");
624+
assertThat(filterByBar.getDocCount(), equalTo(3l));
625+
Nested nested2 = filterByBar.getAggregations().get("nested_2");
626+
assertThat(nested2.getDocCount(), equalTo(8l));
627+
Filter filterBarColor = nested2.getAggregations().get("filter_sku_color");
628+
assertThat(filterBarColor.getDocCount(), equalTo(2l));
629+
ReverseNested reverseToBar = filterBarColor.getAggregations().get("reverse_to_sku");
630+
assertThat(reverseToBar.getDocCount(), equalTo(2l));
631+
ValueCount barCount = reverseToBar.getAggregations().get("sku_count");
632+
assertThat(barCount.getValue(), equalTo(2l));
633+
}
634+
}
469635
}

0 commit comments

Comments
 (0)