Skip to content

Commit

Permalink
[ML] Fix frequent_item_sets aggregation on empty index (elastic#103116
Browse files Browse the repository at this point in the history
)

Previously the `frequent_item_sets` aggregation would fail with an
internal server error if run against an empty index.

This change makes it return empty output, as expected.

Fixes elastic#103067
  • Loading branch information
droberts195 committed Dec 7, 2023
1 parent 47e2cc4 commit cafd993
Show file tree
Hide file tree
Showing 3 changed files with 65 additions and 11 deletions.
6 changes: 6 additions & 0 deletions docs/changelog/103116.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
pr: 103116
summary: Fix `frequent_item_sets` aggregation on empty index
area: Machine Learning
type: bug
issues:
- 103067
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.BiConsumer;

public abstract class ItemSetMapReduceAggregator<
Expand Down Expand Up @@ -76,7 +77,7 @@ protected ItemSetMapReduceAggregator(
List<ItemSetMapReduceValueSource> valueSources = new ArrayList<>();
List<Field> fields = new ArrayList<>();
IndexSearcher contextSearcher = context.searcher();
LeafReaderContext ctx = getLeafReaderForOrdinals(context);
Optional<LeafReaderContext> ctx = getLeafReaderForOrdinals(context);

int id = 0;
this.weightDocumentFilter = documentFilter != null
Expand All @@ -85,15 +86,17 @@ protected ItemSetMapReduceAggregator(

boolean rewriteBasedOnOrdinals = false;

for (var c : configsAndValueFilters) {
ItemSetMapReduceValueSource e = context.getValuesSourceRegistry()
.getAggregator(registryKey, c.v1())
.build(c.v1(), id++, c.v2(), ordinalOptimization, ctx);
if (e.getField().getName() != null) {
fields.add(e.getField());
valueSources.add(e);
if (ctx.isPresent()) {
for (var c : configsAndValueFilters) {
ItemSetMapReduceValueSource e = context.getValuesSourceRegistry()
.getAggregator(registryKey, c.v1())
.build(c.v1(), id++, c.v2(), ordinalOptimization, ctx.get());
if (e.getField().getName() != null) {
fields.add(e.getField());
valueSources.add(e);
}
rewriteBasedOnOrdinals |= e.usesOrdinals();
}
rewriteBasedOnOrdinals |= e.usesOrdinals();
}

this.rewriteBasedOnOrdinals = rewriteBasedOnOrdinals;
Expand Down Expand Up @@ -220,8 +223,8 @@ private InternalAggregation buildAggregation(long owningBucketOrdinal) throws IO
return new InternalItemSetMapReduceAggregation<>(name, metadata(), mapReducer, context, null, fields, profiling);
}

private static LeafReaderContext getLeafReaderForOrdinals(AggregationContext context) {
private static Optional<LeafReaderContext> getLeafReaderForOrdinals(AggregationContext context) {
IndexReader reader = context.searcher().getIndexReader();
return reader.leaves().get(0);
return reader.leaves().stream().findFirst();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -540,3 +540,48 @@ setup:
- match: { aggregations.fi.buckets.1.doc_count: 4 }
- match: { aggregations.fi.buckets.1.support: 0.4 }
- match: { aggregations.fi.buckets.1.key.error_message: ["engine overheated"] }

---
"Test frequent items on empty index":
- skip:
features: headers

- do:
headers:
Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser
indices.create:
index: unavailable-data
body:
mappings:
properties:
features:
type: keyword
error_message:
type: keyword
timestamp:
type: date
geo_point:
type: geo_point
histogram:
type: histogram

- do:
search:
index: unavailable-data
body: >
{
"size": 0,
"aggs": {
"fi": {
"frequent_item_sets": {
"minimum_set_size": 3,
"minimum_support": 0.3,
"fields": [
{"field": "features"},
{"field": "error_message"}
]
}
}
}
}
- length: { aggregations.fi.buckets: 0 }

0 comments on commit cafd993

Please sign in to comment.