From 3cbbcc57485ddc88145caefa7124164089b1dac0 Mon Sep 17 00:00:00 2001 From: Salvatore Campagna <93581129+salvatore-campagna@users.noreply.github.com> Date: Thu, 31 Oct 2024 15:54:42 +0100 Subject: [PATCH] Default LogsDB value for `ignore_dynamic_beyond_limit` (#115265) When ingesting logs, it's important to ensure that documents are not dropped due to mapping issues, also when dealing with dynamically mapped fields. Elasticsearch provides two key settings that help manage the total number of field mappings and handle situations where this limit might be exceeded: 1. **`index.mapping.total_fields.limit`**: This setting defines the maximum number of fields allowed in an index. If this limit is reached, any further mapped fields would cause indexing to fail. 2. **`index.mapping.total_fields.ignore_dynamic_beyond_limit`**: This setting determines whether Elasticsearch should ignore any dynamically mapped fields that exceed the limit defined by `index.mapping.total_fields.limit`. If set to `false`, indexing will fail once the limit is surpassed. However, if set to `true`, Elasticsearch will continue indexing the document but will silently ignore any additional dynamically mapped fields beyond the limit. To prevent indexing failures due to dynamic mapping issues, especially in logs where the schema might change frequently, we change the default value of **`index.mapping.total_fields.ignore_dynamic_beyond_limit` from `false` to `true` in LogsDB**. This change ensures that even when the number of dynamically mapped fields exceeds the set limit, documents will still be indexed, and additional fields will simply be ignored rather than causing an indexing failure. This adjustment is important for LogsDB, where dynamically mapped fields may be common, and we want to make sure to avoid documents from being dropped. --- .../indices.create/20_synthetic_source.yml | 1 + .../rest-api-spec/test/logsdb/10_settings.yml | 281 ++++++++++++++++++ .../elasticsearch/index/IndexVersions.java | 3 +- .../index/mapper/MapperFeatures.java | 3 +- .../index/mapper/MapperService.java | 17 +- 5 files changed, 302 insertions(+), 3 deletions(-) diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml index 258dfeb57e00c..cc5fd0e08e695 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml @@ -1,3 +1,4 @@ +--- object with unmapped fields: - requires: cluster_features: ["mapper.track_ignored_source", "mapper.bwc_workaround_9_0"] diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/logsdb/10_settings.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/logsdb/10_settings.yml index 4439441efdd02..20c2ef63fc850 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/logsdb/10_settings.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/logsdb/10_settings.yml @@ -599,3 +599,284 @@ end time not allowed in logs mode: - match: { error.root_cause.0.type: "illegal_argument_exception" } - match: { error.type: "illegal_argument_exception" } - match: { error.reason: "[index.time_series.end_time] requires [index.mode=time_series]" } + +--- +ignore dynamic beyond limit logsdb default value: + - requires: + cluster_features: [ "mapper.logsdb_default_ignore_dynamic_beyond_limit" ] + reason: requires logsdb default value for `index.mapping.total_fields.ignore_dynamic_beyond_limit` + + - do: + indices.create: + index: test-ignore-dynamic-default + body: + settings: + index: + mode: logsdb + + - do: + indices.get_settings: + index: test-ignore-dynamic-default + include_defaults: true + + - match: { test-ignore-dynamic-default.settings.index.mode: "logsdb" } + - match: { test-ignore-dynamic-default.defaults.index.mapping.total_fields.limit: "1000" } + - match: { test-ignore-dynamic-default.defaults.index.mapping.total_fields.ignore_dynamic_beyond_limit: "true" } + +--- +ignore dynamic beyond limit logsdb override value: + - requires: + cluster_features: [ "mapper.logsdb_default_ignore_dynamic_beyond_limit" ] + reason: requires logsdb default value for `index.mapping.total_fields.ignore_dynamic_beyond_limit` + + - do: + indices.create: + index: test-ignore-dynamic-override + body: + settings: + index: + mode: logsdb + mapping: + total_fields: + ignore_dynamic_beyond_limit: false + + - do: + indices.get_settings: + index: test-ignore-dynamic-override + + - match: { test-ignore-dynamic-override.settings.index.mode: "logsdb" } + - match: { test-ignore-dynamic-override.settings.index.mapping.total_fields.ignore_dynamic_beyond_limit: "false" } + +--- +logsdb with default ignore dynamic beyond limit and default sorting: + - requires: + cluster_features: ["mapper.logsdb_default_ignore_dynamic_beyond_limit"] + reason: requires default value for ignore_dynamic_beyond_limit + + - do: + indices.create: + index: test-logsdb-default-sort + body: + settings: + index: + mode: logsdb + mapping: + # NOTE: When the index mode is set to `logsdb`, the `host.name` field is automatically injected if + # sort settings are not overridden. + # With `subobjects` set to `true` (default), this creates a `host` object field and a nested `name` + # keyword field (`host.name`). + # + # As a result, there are always at least 4 statically mapped fields (`@timestamp`, `host`, `host.name` + # and `name`). We cannot use a field limit lower than 4 because these fields are always present. + # + # Indeed, if `index.mapping.total_fields.ignore_dynamic_beyond_limit` is `true`, any dynamically + # mapped fields beyond the limit `index.mapping.total_fields.limit` are ignored, but the statically + # mapped fields are always counted. + total_fields: + limit: 4 + mappings: + properties: + "@timestamp": + type: date + name: + type: keyword + + - do: + indices.get_settings: + index: test-logsdb-default-sort + + - match: { test-logsdb-default-sort.settings.index.mode: "logsdb" } + + - do: + bulk: + index: test-logsdb-default-sort + refresh: true + body: + - '{ "index": { } }' + - '{ "@timestamp": "2024-08-13T12:30:00Z", "name": "foo", "host.name": "92f4a67c", "value": 10, "message": "the quick brown fox", "region": "us-west", "pid": 153462 }' + - '{ "index": { } }' + - '{ "@timestamp": "2024-08-13T12:01:00Z", "name": "bar", "host.name": "24eea278", "value": 20, "message": "jumps over the lazy dog", "region": "us-central", "pid": 674972 }' + - match: { errors: false } + + - do: + search: + index: test-logsdb-default-sort + body: + query: + match_all: {} + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._source.name: "bar" } + - match: { hits.hits.0._source.value: 20 } + - match: { hits.hits.0._source.message: "jumps over the lazy dog" } + - match: { hits.hits.0._ignored: [ "message", "pid", "region", "value" ] } + - match: { hits.hits.1._source.name: "foo" } + - match: { hits.hits.1._source.value: 10 } + - match: { hits.hits.1._source.message: "the quick brown fox" } + - match: { hits.hits.1._ignored: [ "message", "pid", "region", "value" ] } + +--- +logsdb with default ignore dynamic beyond limit and non-default sorting: + - requires: + cluster_features: ["mapper.logsdb_default_ignore_dynamic_beyond_limit"] + reason: requires default value for ignore_dynamic_beyond_limit + + - do: + indices.create: + index: test-logsdb-non-default-sort + body: + settings: + index: + sort.field: [ "name" ] + sort.order: [ "desc" ] + mode: logsdb + mapping: + # NOTE: Here sort settings are overridden and we do not have any additional statically mapped field other + # than `name` and `timestamp`. As a result, there are only 2 statically mapped fields. + total_fields: + limit: 2 + mappings: + properties: + "@timestamp": + type: date + name: + type: keyword + + - do: + indices.get_settings: + index: test-logsdb-non-default-sort + + - match: { test-logsdb-non-default-sort.settings.index.mode: "logsdb" } + + - do: + bulk: + index: test-logsdb-non-default-sort + refresh: true + body: + - '{ "index": { } }' + - '{ "@timestamp": "2024-08-13T12:30:00Z", "name": "foo", "host.name": "92f4a67c", "value": 10, "message": "the quick brown fox", "region": "us-west", "pid": 153462 }' + - '{ "index": { } }' + - '{ "@timestamp": "2024-08-13T12:01:00Z", "name": "bar", "host.name": "24eea278", "value": 20, "message": "jumps over the lazy dog", "region": "us-central", "pid": 674972 }' + - match: { errors: false } + + - do: + search: + index: test-logsdb-non-default-sort + body: + query: + match_all: {} + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._source.name: "foo" } + - match: { hits.hits.0._source.value: 10 } + - match: { hits.hits.0._source.message: "the quick brown fox" } + - match: { hits.hits.0._ignored: [ "host", "message", "pid", "region", "value" ] } + - match: { hits.hits.1._source.name: "bar" } + - match: { hits.hits.1._source.value: 20 } + - match: { hits.hits.1._source.message: "jumps over the lazy dog" } + - match: { hits.hits.1._ignored: [ "host", "message", "pid", "region", "value" ] } + +--- +logsdb with default ignore dynamic beyond limit and too low limit: + - requires: + cluster_features: ["mapper.logsdb_default_ignore_dynamic_beyond_limit"] + reason: requires default value for ignore_dynamic_beyond_limit + + - do: + catch: bad_request + indices.create: + index: test-logsdb-low-limit + body: + settings: + index: + mode: logsdb + mapping: + # NOTE: When the index mode is set to `logsdb`, the `host.name` field is automatically injected if + # sort settings are not overridden. + # With `subobjects` set to `true` (default), this creates a `host` object field and a nested `name` + # keyword field (`host.name`). + # + # As a result, there are always at least 4 statically mapped fields (`@timestamp`, `host`, `host.name` + # and `name`). We cannot use a field limit lower than 4 because these fields are always present. + # + # Indeed, if `index.mapping.total_fields.ignore_dynamic_beyond_limit` is `true`, any dynamically + # mapped fields beyond the limit `index.mapping.total_fields.limit` are ignored, but the statically + # mapped fields are always counted. + total_fields: + limit: 3 + mappings: + properties: + "@timestamp": + type: date + name: + type: keyword + - match: { error.type: "illegal_argument_exception" } + - match: { error.reason: "Limit of total fields [3] has been exceeded" } + +--- +logsdb with default ignore dynamic beyond limit and subobjects false: + - requires: + cluster_features: ["mapper.logsdb_default_ignore_dynamic_beyond_limit"] + reason: requires default value for ignore_dynamic_beyond_limit + + - do: + indices.create: + index: test-logsdb-subobjects-false + body: + settings: + index: + mode: logsdb + mapping: + # NOTE: When the index mode is set to `logsdb`, the `host.name` field is automatically injected if + # sort settings are not overridden. + # With `subobjects` set to `false` anyway, a single `host.name` keyword field is automatically mapped. + # + # As a result, there are just 3 statically mapped fields (`@timestamp`, `host.name` and `name`). + # We cannot use a field limit lower than 3 because these fields are always present. + # + # Indeed, if `index.mapping.total_fields.ignore_dynamic_beyond_limit` is `true`, any dynamically + # mapped fields beyond the limit `index.mapping.total_fields.limit` are ignored, but the statically + # mapped fields are always counted. + total_fields: + limit: 3 + mappings: + subobjects: false + properties: + "@timestamp": + type: date + name: + type: keyword + + - do: + indices.get_settings: + index: test-logsdb-subobjects-false + + - match: { test-logsdb-subobjects-false.settings.index.mode: "logsdb" } + + - do: + bulk: + index: test-logsdb-subobjects-false + refresh: true + body: + - '{ "index": { } }' + - '{ "@timestamp": "2024-08-13T12:30:00Z", "name": "foo", "host.name": "92f4a67c", "value": 10, "message": "the quick brown fox", "region": "us-west", "pid": 153462 }' + - '{ "index": { } }' + - '{ "@timestamp": "2024-08-13T12:01:00Z", "name": "bar", "host.name": "24eea278", "value": 20, "message": "jumps over the lazy dog", "region": "us-central", "pid": 674972 }' + - match: { errors: false } + + - do: + search: + index: test-logsdb-subobjects-false + body: + query: + match_all: {} + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._source.name: "bar" } + - match: { hits.hits.0._source.value: 20 } + - match: { hits.hits.0._source.message: "jumps over the lazy dog" } + - match: { hits.hits.0._ignored: [ "message", "pid", "region", "value" ] } + - match: { hits.hits.1._source.name: "foo" } + - match: { hits.hits.1._source.value: 10 } + - match: { hits.hits.1._source.message: "the quick brown fox" } + - match: { hits.hits.1._ignored: [ "message", "pid", "region", "value" ] } diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java index 2919f98ee200e..440613263d441 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java @@ -129,8 +129,9 @@ private static Version parseUnchecked(String version) { public static final IndexVersion UPGRADE_TO_LUCENE_9_12 = def(8_516_00_0, Version.LUCENE_9_12_0); public static final IndexVersion ENABLE_IGNORE_ABOVE_LOGSDB = def(8_517_00_0, Version.LUCENE_9_12_0); public static final IndexVersion ADD_ROLE_MAPPING_CLEANUP_MIGRATION = def(8_518_00_0, Version.LUCENE_9_12_0); + public static final IndexVersion LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT_BACKPORT = def(8_519_00_0, Version.LUCENE_9_12_0); public static final IndexVersion UPGRADE_TO_LUCENE_10_0_0 = def(9_000_00_0, Version.LUCENE_10_0_0); - + public static final IndexVersion LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT = def(9_001_00_0, Version.LUCENE_10_0_0); /* * STOP! READ THIS FIRST! No, really, * ____ _____ ___ ____ _ ____ _____ _ ____ _____ _ _ ___ ____ _____ ___ ____ ____ _____ _ diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index a5f173afffba2..4797857fc12f8 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -64,7 +64,8 @@ public Set getTestFeatures() { IgnoredSourceFieldMapper.DONT_EXPAND_DOTS_IN_IGNORED_SOURCE, SourceFieldMapper.REMOVE_SYNTHETIC_SOURCE_ONLY_VALIDATION, IgnoredSourceFieldMapper.IGNORED_SOURCE_AS_TOP_LEVEL_METADATA_ARRAY_FIELD, - IgnoredSourceFieldMapper.ALWAYS_STORE_OBJECT_ARRAYS_IN_NESTED_OBJECTS + IgnoredSourceFieldMapper.ALWAYS_STORE_OBJECT_ARRAYS_IN_NESTED_OBJECTS, + MapperService.LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT ); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java index 08461525526b9..7f952153c6453 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperService.java @@ -22,9 +22,12 @@ import org.elasticsearch.common.xcontent.LoggingDeprecationHandler; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.Nullable; +import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.AbstractIndexComponent; +import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.analysis.AnalysisRegistry; import org.elasticsearch.index.analysis.IndexAnalyzers; import org.elasticsearch.index.analysis.NamedAnalyzer; @@ -121,9 +124,21 @@ public boolean isAutoUpdate() { Property.IndexScope, Property.ServerlessPublic ); + + public static final NodeFeature LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT = new NodeFeature( + "mapper.logsdb_default_ignore_dynamic_beyond_limit" + ); public static final Setting INDEX_MAPPING_IGNORE_DYNAMIC_BEYOND_LIMIT_SETTING = Setting.boolSetting( "index.mapping.total_fields.ignore_dynamic_beyond_limit", - false, + settings -> { + boolean isLogsDBIndexMode = IndexSettings.MODE.get(settings) == IndexMode.LOGSDB; + final IndexVersion indexVersionCreated = IndexMetadata.SETTING_INDEX_VERSION_CREATED.get(settings); + boolean isNewIndexVersion = indexVersionCreated.between( + IndexVersions.LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT_BACKPORT, + IndexVersions.UPGRADE_TO_LUCENE_10_0_0 + ) || indexVersionCreated.onOrAfter(IndexVersions.LOGSDB_DEFAULT_IGNORE_DYNAMIC_BEYOND_LIMIT); + return String.valueOf(isLogsDBIndexMode && isNewIndexVersion); + }, Property.Dynamic, Property.IndexScope, Property.ServerlessPublic