elastic · inqueue · Aug 29, 2024 · Aug 29, 2024 · Aug 30, 2024 · Aug 30, 2024
diff --git a/http_logs/README.md b/http_logs/README.md
@@ -42,6 +42,8 @@ This track allows to overwrite the following parameters with Rally 0.8.0+ using
 * `number_of_shards` (default: 5)
 * `source_enabled` (default: true): A boolean defining whether the `_source` field is stored in the index.
 * `index_settings`: A list of index settings. Index settings defined elsewhere (e.g. `number_of_replicas`) need to be overridden explicitly.
+* `index_mode` (default: unset): Set to `logsdb` to enable indexing to [logs data streams](https://www.elastic.co/guide/en/elasticsearch/reference/master/logs-data-stream.html). If not enabled, Rally will not use logs data streams.
+* `index_type` (default: unset): Set to `data_stream` to enable indexing to data streams. `index_type` is not required when `index_mode` is set to `logsdb`.
 * `cluster_health` (default: "green"): The minimum required cluster health.
 * `ingest_pipeline`: Only applicable for `--challenge=append-index-only-with-ingest-pipeline`, selects which ingest
 node pipeline to run. Valid options are `'baseline'` (default), `'grok'`  and `'geoip'`. For example: `--challenge=append-index-only-with-ingest-pipeline --track-params="ingest_pipeline:'baseline'" `

diff --git a/http_logs/challenges/common/default-schedule.json b/http_logs/challenges/common/default-schedule.json
@@ -1,11 +1,44 @@
         {
-          "operation": "delete-index"
+          "operation": {
+            "name": "delete-data-stream",
+            "operation-type": "delete-data-stream",
+            "only-if-exists": false,
+            "data-stream": ["logs-181998", "logs-191998", "logs-201998", "logs-211998", "logs-221998", "logs-231998", "logs-241998", "reindexed-logs"]
+          },
+          "tags": ["setup"]
+        },
+        {
+          "operation": {
+            "name": "delete-index",
+            "operation-type": "delete-index",
+            "only-if-exists": false,
+            "index": ["logs-181998", "logs-191998", "logs-201998", "logs-211998", "logs-221998", "logs-231998", "logs-241998", "reindexed-logs"]
+          },
+          "tags": ["setup"]
+        },
+        {
+          "operation" : {
+            "name": "delete-all-index-templates",
+            "operation-type": "delete-composable-template"
+          },
+          "tags": ["setup"]
         },
         {
           "operation": {
-            "operation-type": "create-index",
+            "name": "create-all-templates",
+            "operation-type": "create-composable-template"
+          },
+          "tags": ["setup"]
+        },
+        {
+          {%- if index_mode == "logsdb" or index_type == "data_stream" %}
+            {%- set indexing_operation_type = "create-data-stream" %}
+          {%- endif %}
+          "operation": {
+            "operation-type": {{ indexing_operation_type | default("create-index") | tojson }},
             "settings": {{index_settings | default({}) | tojson}}
-          }
+          },
+          "tags": ["setup"]
         },
         {
           "name": "check-cluster-health",

diff --git a/http_logs/challenges/common/setup-schedule.json b/http_logs/challenges/common/setup-schedule.json
@@ -0,0 +1,32 @@
+{
+  "operation": {
+    "name": "delete-data-stream",
+    "operation-type": "delete-data-stream",
+    "only-if-exists": false,
+    "data-stream": ["logs-181998", "logs-191998", "logs-201998", "logs-211998", "logs-221998", "logs-231998", "logs-241998", "reindexed-logs"]
+  },
+  "tags": ["setup"]
+},
+{
+  "operation": {
+    "name": "delete-index",
+    "operation-type": "delete-index",
+    "only-if-exists": false,
+    "index": ["logs-181998", "logs-191998", "logs-201998", "logs-211998", "logs-221998", "logs-231998", "logs-241998", "reindexed-logs"]
+  },
+  "tags": ["setup"]
+},
+{
+  "operation" : {
+    "name": "delete-all-index-templates",
+    "operation-type": "delete-composable-template"
+  },
+  "tags": ["setup"]
+},
+{
+  "operation": {
+    "name": "create-all-templates",
+    "operation-type": "create-composable-template"
+  },
+  "tags": ["setup"]
+}
diff --git a/http_logs/challenges/default.json b/http_logs/challenges/default.json
@@ -17,14 +17,18 @@
       "name": "append-no-conflicts-index-only",
       "description": "Indexes the whole document corpus using Elasticsearch default settings. We only adjust the number of replicas as we benchmark a single node cluster and Rally will only start the benchmark if the cluster turns green. Document ids are unique so all index operations are append only.",
       "schedule": [
+        {{ rally.collect(parts="common/setup-schedule.json") }},
         {
-          "operation": "delete-index"
-        },
-        {
+          {%- if index_mode == "logsdb" or index_type == "data_stream" %}
+            {%- set indexing_operation_type = "create-data-stream" %}
+          {%- endif %}
           "operation": {
-            "operation-type": "create-index",
+            "operation-type": {{ indexing_operation_type | default("create-index") | tojson }},
             "settings": {{index_settings | default({}) | tojson}}
-          }
+          },
+          "tags": [
+            "setup"
+          ]
         },
         {
           "name": "check-cluster-health",
@@ -77,17 +81,21 @@
       "name": "append-sorted-no-conflicts",
       "description": "Indexes the whole document corpus in an index sorted by timestamp field in descending order (most recent first) and using a setup that will lead to a lower indexing throughput than the default settings. Document ids are unique so all index operations are append only.",
       "schedule": [
+        {{ rally.collect(parts="common/setup-schedule.json") }},
         {
-          "operation": "delete-index"
-        },
-        {
+          {%- if index_mode == "logsdb" or index_type == "data_stream" %}
+            {%- set indexing_operation_type = "create-data-stream" %}
+          {%- endif %}
           "operation": {
-            "operation-type": "create-index",
+            "operation-type": {{ indexing_operation_type | default("create-index") | tojson }},
             "settings": {%- if index_settings is defined %} {{index_settings | tojson}} {%- else %} {
               "index.sort.field": "@timestamp",
               "index.sort.order": "desc"
             }{%- endif %}
-          }
+          },
+          "tags": [
+            "setup"
+          ]
         },
         {
           "name": "check-cluster-health",
@@ -140,14 +148,21 @@
       "name": "append-index-only-with-ingest-pipeline",
       "description": "Indexes the whole document corpus using Elasticsearch default settings. We only adjust the number of replicas as we benchmark a single node cluster and Rally will only start the benchmark if the cluster turns green. Document ids are unique so all index operations are append only. Runs the documents through an ingest node pipeline to parse the http logs. May require --elasticsearch-plugins='ingest-geoip' ",
       "schedule": [
+        {{ rally.collect(parts="common/setup-schedule.json") }},
         {
-          "operation": "delete-index"
-        },
-        {
+          {%- if index_mode == "logsdb" or index_type == "data_stream" %}
+            {%- set indexing_operation_type = "create-data-stream" %}
+          {%- endif %}
           "operation": {
-            "operation-type": "create-index",
-            "settings": {{index_settings | default({}) | tojson}}
-          }
+            "operation-type": {{ indexing_operation_type | default("create-index") | tojson }},
+            "settings": {%- if index_settings is defined %} {{index_settings | tojson}} {%- else %} {
+              "index.sort.field": "@timestamp",
+              "index.sort.order": "desc"
+            }{%- endif %}
+          },
+          "tags": [
+            "setup"
+          ]
         },
         {
           "name": "check-cluster-health",
@@ -201,10 +216,9 @@
     },
     {
       "name": "update",
+      "description": "Perform bulk update operations. The update challenge is for standard index use only.",
       "schedule": [
-        {
-          "operation": "delete-index"
-        },
+        {{ rally.collect(parts="common/setup-schedule.json") }},
         {
           "operation": {
             "operation-type": "create-index",
@@ -268,14 +282,21 @@
       "name": "append-no-conflicts-index-reindex-only",
       "description": "Indexes the whole document corpus using Elasticsearch default settings. We only adjust the number of replicas as we benchmark a single node cluster and Rally will only start the benchmark if the cluster turns green. Document ids are unique so all index operations are append only. After indexing, same data are reindexed.",
       "schedule": [
+        {{ rally.collect(parts="common/setup-schedule.json") }},
         {
-          "operation": "delete-index"
-        },
-        {
+          {%- if index_mode == "logsdb" or index_type == "data_stream" %}
+            {%- set indexing_operation_type = "create-data-stream" %}
+          {%- endif %}
           "operation": {
-            "operation-type": "create-index",
-            "settings": {{index_settings | default({}) | tojson}}
-          }
+            "operation-type": {{ indexing_operation_type | default("create-index") | tojson }},
+            "settings": {%- if index_settings is defined %} {{index_settings | tojson}} {%- else %} {
+              "index.sort.field": "@timestamp",
+              "index.sort.order": "desc"
+            }{%- endif %}
+          },
+          "tags": [
+            "setup"
+          ]
         },
         {
           "name": "check-cluster-health",

diff --git a/http_logs/index-runtime-fields.json → http_logs/index-template.json b/http_logs/index-runtime-fields.json → http_logs/index-template.json
@@ -1,30 +1,84 @@
 {
-  "settings": {
-    {# non-serverless-index-settings-marker-start #}{%- if build_flavor != "serverless" or serverless_operator == true -%}
-    "index.number_of_shards": {{ number_of_shards | default(5) }},
-    "index.number_of_replicas": {{ number_of_replicas | default(0) }},
-    "index.requests.cache.enable": false
-    {%- endif -%}{# non-serverless-index-settings-marker-end #}
-  },
-  "mappings": {
-    "dynamic": "strict",
-    "_source": {
-      "enabled": {{ source_enabled | default(true) | tojson }}
+  "priority": 101,
+  "index_patterns": ["logs-*", "reindexed-logs"],
+  {%- if index_mode == "logsdb" or index_type == "data_stream" %}
+  "data_stream": {},
+  {%- endif %}
+  "template": {
+    "settings": {
+      {%- if index_mode %}
+      "mode": {{ index_mode | tojson }},
+      {%- endif -%}
+      {# non-serverless-index-settings-marker-start -#}
+      {%- if build_flavor != "serverless" %}
+      "index.number_of_replicas": {{ number_of_replicas | default(0) | tojson }},
+      {%- endif -%}
+      {%- if build_flavor != "serverless" or serverless_operator == true %}
+      "index.number_of_shards": {{ number_of_shards | default(5) | tojson }},
+      "index.requests.cache.enable": false
+      {%- endif -%}
+      {# non-serverless-index-settings-marker-end #}
     },
-    "properties": {
-      "@timestamp": {
-        "format": "strict_date_optional_time",
-        "type": "date"
-      },
-      "message": {
-        "type": "wildcard",
-        "fields": {
-          "keyword": {
-            "type": "keyword"
+    "mappings": {
+      "dynamic": "strict",
+      {%- if index_mode != "logsdb" %}
+      "_source": {
+        "enabled": {{ source_enabled | default(true) | tojson }}
+      },
+      {%- endif %}
+      "properties": {
+        "@timestamp": {
+          {%- if (ingest_pipeline is defined and ingest_pipeline == "grok") or runtime_fields is defined %}
+          "format": "strict_date_optional_time",
+          {%- else %}
+          "format": "epoch_second",
+          {%- endif %}
+          "type": "date"
+        },
+        {%- if runtime_fields is defined %}
+        "message": {
+          "type": "wildcard",
+          "fields": {
+            "keyword": {
+              "type": "keyword"
+            }
+          }
+      },
+        {%- else %}
+        "message": {
+          "type": "keyword",
+          "index": false,
+          "doc_values": false
+        },
+        {%- endif %}
+        "clientip": {
+          "type": "ip"
+        },
+        "request": {
+          "type": "match_only_text",
+          "fields": {
+            "raw": {
+              "ignore_above": 256,
+              "type": "keyword"
+            }
+          }
+        },
+        "status": {
+          "type": "integer"
+        },
+        "size": {
+          "type": "integer"
+        },
+        "geoip" : {
+          "properties" : {
+            "country_name": { "type": "keyword" },
+            "city_name": { "type": "keyword" },
+            "location" : { "type" : "geo_point" }
           }
         }
       }
-    },
+    }
+    {%- if runtime_fields is defined %},
     "runtime": {
       {%- set sources = [('source', 'message.source'), ('wildcard', 'message'), ('keyword', 'message.keyword')] %}
       {%- for source_type, field in sources %}
@@ -97,6 +151,6 @@
         "type": "keyword",
         "script": "emit(params._source.message)"
       }
-    }
+    }{% endif %}
   }
 }
diff --git a/http_logs/index.json b/http_logs/index.json