[ML] Fix frequent_item_sets aggregation on empty index (elastic#103116

) Previously the `frequent_item_sets` aggregation would fail with an internal server error if run against an empty index. This change makes it return empty output, as expected. Fixes elastic#103067
droberts195 · Dec 7, 2023 · cafd993 · cafd993
1 parent 47e2cc4
commit cafd993
Show file tree

Hide file tree

Showing 3 changed files with 65 additions and 11 deletions.
diff --git a/docs/changelog/103116.yaml b/docs/changelog/103116.yaml
@@ -0,0 +1,6 @@
+pr: 103116
+summary: Fix `frequent_item_sets` aggregation on empty index
+area: Machine Learning
+type: bug
+issues:
+ - 103067
diff --git a/.../java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/mr/ItemSetMapReduceAggregator.java b/.../java/org/elasticsearch/xpack/ml/aggs/frequentitemsets/mr/ItemSetMapReduceAggregator.java
@@ -42,6 +42,7 @@
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
+import java.util.Optional;
 import java.util.function.BiConsumer;
 
 public abstract class ItemSetMapReduceAggregator<
@@ -76,7 +77,7 @@ protected ItemSetMapReduceAggregator(
         List<ItemSetMapReduceValueSource> valueSources = new ArrayList<>();
         List<Field> fields = new ArrayList<>();
         IndexSearcher contextSearcher = context.searcher();
-        LeafReaderContext ctx = getLeafReaderForOrdinals(context);
+        Optional<LeafReaderContext> ctx = getLeafReaderForOrdinals(context);
 
         int id = 0;
         this.weightDocumentFilter = documentFilter != null
@@ -85,15 +86,17 @@ protected ItemSetMapReduceAggregator(
 
         boolean rewriteBasedOnOrdinals = false;
 
-        for (var c : configsAndValueFilters) {
-            ItemSetMapReduceValueSource e = context.getValuesSourceRegistry()
-                .getAggregator(registryKey, c.v1())
-                .build(c.v1(), id++, c.v2(), ordinalOptimization, ctx);
-            if (e.getField().getName() != null) {
-                fields.add(e.getField());
-                valueSources.add(e);
+        if (ctx.isPresent()) {
+            for (var c : configsAndValueFilters) {
+                ItemSetMapReduceValueSource e = context.getValuesSourceRegistry()
+                    .getAggregator(registryKey, c.v1())
+                    .build(c.v1(), id++, c.v2(), ordinalOptimization, ctx.get());
+                if (e.getField().getName() != null) {
+                    fields.add(e.getField());
+                    valueSources.add(e);
+                }
+                rewriteBasedOnOrdinals |= e.usesOrdinals();
             }
-            rewriteBasedOnOrdinals |= e.usesOrdinals();
         }
 
         this.rewriteBasedOnOrdinals = rewriteBasedOnOrdinals;
@@ -220,8 +223,8 @@ private InternalAggregation buildAggregation(long owningBucketOrdinal) throws IO
         return new InternalItemSetMapReduceAggregation<>(name, metadata(), mapReducer, context, null, fields, profiling);
     }
 
-    private static LeafReaderContext getLeafReaderForOrdinals(AggregationContext context) {
+    private static Optional<LeafReaderContext> getLeafReaderForOrdinals(AggregationContext context) {
         IndexReader reader = context.searcher().getIndexReader();
-        return reader.leaves().get(0);
+        return reader.leaves().stream().findFirst();
     }
 }
diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/frequent_item_sets_agg.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/ml/frequent_item_sets_agg.yml
@@ -540,3 +540,48 @@ setup:
   - match: { aggregations.fi.buckets.1.doc_count: 4 }
   - match: { aggregations.fi.buckets.1.support: 0.4 }
   - match: { aggregations.fi.buckets.1.key.error_message: ["engine overheated"] }
+
+---
+"Test frequent items on empty index":
+  - skip:
+      features: headers
+
+  - do:
+      headers:
+        Authorization: "Basic eF9wYWNrX3Jlc3RfdXNlcjp4LXBhY2stdGVzdC1wYXNzd29yZA==" # run as x_pack_rest_user, i.e. the test setup superuser
+      indices.create:
+        index: unavailable-data
+        body:
+          mappings:
+            properties:
+              features:
+                type: keyword
+              error_message:
+                type: keyword
+              timestamp:
+                type: date
+              geo_point:
+                type: geo_point
+              histogram:
+                type: histogram
+
+  - do:
+      search:
+        index: unavailable-data
+        body: >
+          {
+            "size": 0,
+            "aggs": {
+              "fi": {
+                "frequent_item_sets": {
+                  "minimum_set_size": 3,
+                  "minimum_support": 0.3,
+                  "fields": [
+                    {"field": "features"},
+                    {"field": "error_message"}
+                  ]
+                }
+              }
+            }
+          }
+  - length: { aggregations.fi.buckets: 0 }